diff options
Diffstat (limited to 'cvs2svn_lib/symbol_statistics.py')
-rw-r--r-- | cvs2svn_lib/symbol_statistics.py | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/cvs2svn_lib/symbol_statistics.py b/cvs2svn_lib/symbol_statistics.py new file mode 100644 index 0000000..0d35a50 --- /dev/null +++ b/cvs2svn_lib/symbol_statistics.py @@ -0,0 +1,521 @@ +# (Be in -*- python -*- mode.) +# +# ==================================================================== +# Copyright (c) 2000-2008 CollabNet. All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://subversion.tigris.org/license-1.html. +# If newer versions of this license are posted there, you may use a +# newer version instead, at your option. +# +# This software consists of voluntary contributions made by many +# individuals. For exact contribution history, see the revision +# history and logs, available at http://cvs2svn.tigris.org/. +# ==================================================================== + +"""This module gathers and processes statistics about lines of development.""" + +import cPickle + +from cvs2svn_lib import config +from cvs2svn_lib.common import error_prefix +from cvs2svn_lib.common import FatalException +from cvs2svn_lib.log import Log +from cvs2svn_lib.artifact_manager import artifact_manager +from cvs2svn_lib.symbol import Trunk +from cvs2svn_lib.symbol import IncludedSymbol +from cvs2svn_lib.symbol import Branch +from cvs2svn_lib.symbol import Tag +from cvs2svn_lib.symbol import ExcludedSymbol + + +class SymbolPlanError(FatalException): + pass + + +class SymbolPlanException(SymbolPlanError): + def __init__(self, stats, symbol, msg): + self.stats = stats + self.symbol = symbol + SymbolPlanError.__init__( + self, + 'Cannot convert the following symbol to %s: %s\n %s' + % (symbol, msg, self.stats,) + ) + + +class IndeterminateSymbolException(SymbolPlanException): + def __init__(self, stats, symbol): + SymbolPlanException.__init__(self, stats, symbol, 'Indeterminate type') + + +class _Stats: + """A summary of information about a symbol (tag or branch). + + Members: + + lod -- the LineOfDevelopment instance of the lod being described + + tag_create_count -- the number of files in which this lod appears + as a tag + + branch_create_count -- the number of files in which this lod + appears as a branch + + branch_commit_count -- the number of files in which there were + commits on this lod + + trivial_import_count -- the number of files in which this branch + was purely a non-trunk default branch containing exactly one + revision. + + pure_ntdb_count -- the number of files in which this branch was + purely a non-trunk default branch (consisting only of + non-trunk default branch revisions). + + branch_blockers -- a set of Symbol instances for any symbols that + sprout from a branch with this name. + + possible_parents -- a map {LineOfDevelopment : count} indicating + in how many files each LOD could have served as the parent of + self.lod.""" + + def __init__(self, lod): + self.lod = lod + self.tag_create_count = 0 + self.branch_create_count = 0 + self.branch_commit_count = 0 + self.branch_blockers = set() + self.trivial_import_count = 0 + self.pure_ntdb_count = 0 + self.possible_parents = { } + + def register_tag_creation(self): + """Register the creation of this lod as a tag.""" + + self.tag_create_count += 1 + + def register_branch_creation(self): + """Register the creation of this lod as a branch.""" + + self.branch_create_count += 1 + + def register_branch_commit(self): + """Register that there were commit(s) on this branch in one file.""" + + self.branch_commit_count += 1 + + def register_branch_blocker(self, blocker): + """Register BLOCKER as preventing this symbol from being deleted. + + BLOCKER is a tag or a branch that springs from a revision on this + symbol.""" + + self.branch_blockers.add(blocker) + + def register_trivial_import(self): + """Register that this branch is a trivial import branch in one file.""" + + self.trivial_import_count += 1 + + def register_pure_ntdb(self): + """Register that this branch is a pure import branch in one file.""" + + self.pure_ntdb_count += 1 + + def register_possible_parent(self, lod): + """Register that LOD was a possible parent for SELF.lod in a file.""" + + self.possible_parents[lod] = self.possible_parents.get(lod, 0) + 1 + + def register_branch_possible_parents(self, cvs_branch, cvs_file_items): + """Register any possible parents of this symbol from CVS_BRANCH.""" + + # This routine is a bottleneck. So we define some local variables + # to speed up access to frequently-needed variables. + register = self.register_possible_parent + parent_cvs_rev = cvs_file_items[cvs_branch.source_id] + + # The "obvious" parent of a branch is the branch holding the + # revision where the branch is rooted: + register(parent_cvs_rev.lod) + + # Any other branches that are rooted at the same revision and + # were committed earlier than the branch are also possible + # parents: + symbol = cvs_branch.symbol + for branch_id in parent_cvs_rev.branch_ids: + parent_symbol = cvs_file_items[branch_id].symbol + # A branch cannot be its own parent, nor can a branch's + # parent be a branch that was created after it. So we stop + # iterating when we reached the branch whose parents we are + # collecting: + if parent_symbol == symbol: + break + register(parent_symbol) + + def register_tag_possible_parents(self, cvs_tag, cvs_file_items): + """Register any possible parents of this symbol from CVS_TAG.""" + + # This routine is a bottleneck. So use local variables to speed + # up access to frequently-needed objects. + register = self.register_possible_parent + parent_cvs_rev = cvs_file_items[cvs_tag.source_id] + + # The "obvious" parent of a tag is the branch holding the + # revision where the branch is rooted: + register(parent_cvs_rev.lod) + + # Branches that are rooted at the same revision are also + # possible parents: + for branch_id in parent_cvs_rev.branch_ids: + parent_symbol = cvs_file_items[branch_id].symbol + register(parent_symbol) + + def is_ghost(self): + """Return True iff this lod never really existed.""" + + return ( + not isinstance(self.lod, Trunk) + and self.branch_commit_count == 0 + and not self.branch_blockers + and not self.possible_parents + ) + + def check_valid(self, symbol): + """Check whether SYMBOL is a valid conversion of SELF.lod. + + It is planned to convert SELF.lod as SYMBOL. Verify that SYMBOL + is a TypedSymbol and that the information that it contains is + consistent with that stored in SELF.lod. (This routine does not + do higher-level tests of whether the chosen conversion is actually + sensible.) If there are any problems, raise a + SymbolPlanException.""" + + if not isinstance(symbol, (Trunk, Branch, Tag, ExcludedSymbol)): + raise IndeterminateSymbolException(self, symbol) + + if symbol.id != self.lod.id: + raise SymbolPlanException(self, symbol, 'IDs must match') + + if symbol.project != self.lod.project: + raise SymbolPlanException(self, symbol, 'Projects must match') + + if isinstance(symbol, IncludedSymbol) and symbol.name != self.lod.name: + raise SymbolPlanException(self, symbol, 'Names must match') + + def check_preferred_parent_allowed(self, symbol): + """Check that SYMBOL's preferred_parent_id is an allowed parent. + + SYMBOL is the planned conversion of SELF.lod. Verify that its + preferred_parent_id is a possible parent of SELF.lod. If not, + raise a SymbolPlanException describing the problem.""" + + if isinstance(symbol, IncludedSymbol) \ + and symbol.preferred_parent_id is not None: + for pp in self.possible_parents.keys(): + if pp.id == symbol.preferred_parent_id: + return + else: + raise SymbolPlanException( + self, symbol, + 'The selected parent is not among the symbol\'s ' + 'possible parents.' + ) + + def __str__(self): + return ( + '\'%s\' is ' + 'a tag in %d files, ' + 'a branch in %d files, ' + 'a trivial import in %d files, ' + 'a pure import in %d files, ' + 'and has commits in %d files' + % (self.lod, self.tag_create_count, self.branch_create_count, + self.trivial_import_count, self.pure_ntdb_count, + self.branch_commit_count) + ) + + def __repr__(self): + retval = ['%s\n possible parents:\n' % (self,)] + parent_counts = self.possible_parents.items() + parent_counts.sort(lambda a,b: - cmp(a[1], b[1])) + for (symbol, count) in parent_counts: + if isinstance(symbol, Trunk): + retval.append(' trunk : %d\n' % count) + else: + retval.append(' \'%s\' : %d\n' % (symbol.name, count)) + if self.branch_blockers: + blockers = list(self.branch_blockers) + blockers.sort() + retval.append(' blockers:\n') + for blocker in blockers: + retval.append(' \'%s\'\n' % (blocker,)) + return ''.join(retval) + + +class SymbolStatisticsCollector: + """Collect statistics about lines of development. + + Record a summary of information about each line of development in + the RCS files for later storage into a database. The database is + created in CollectRevsPass and it is used in CollateSymbolsPass (via + the SymbolStatistics class). + + collect_data._SymbolDataCollector inserts information into instances + of this class by by calling its register_*() methods. + + Its main purpose is to assist in the decisions about which symbols + can be treated as branches and tags and which may be excluded. + + The data collected by this class can be written to the file + config.SYMBOL_STATISTICS.""" + + def __init__(self): + # A map { lod -> _Stats } for all lines of development: + self._stats = { } + + def __getitem__(self, lod): + """Return the _Stats record for line of development LOD. + + Create and register a new one if necessary.""" + + try: + return self._stats[lod] + except KeyError: + stats = _Stats(lod) + self._stats[lod] = stats + return stats + + def register(self, cvs_file_items): + """Register the statistics for each symbol in CVS_FILE_ITEMS.""" + + for lod_items in cvs_file_items.iter_lods(): + if lod_items.lod is not None: + branch_stats = self[lod_items.lod] + + branch_stats.register_branch_creation() + + if lod_items.cvs_revisions: + branch_stats.register_branch_commit() + + if lod_items.is_trivial_import(): + branch_stats.register_trivial_import() + + if lod_items.is_pure_ntdb(): + branch_stats.register_pure_ntdb() + + for cvs_symbol in lod_items.iter_blockers(): + branch_stats.register_branch_blocker(cvs_symbol.symbol) + + if lod_items.cvs_branch is not None: + branch_stats.register_branch_possible_parents( + lod_items.cvs_branch, cvs_file_items + ) + + for cvs_tag in lod_items.cvs_tags: + tag_stats = self[cvs_tag.symbol] + + tag_stats.register_tag_creation() + + tag_stats.register_tag_possible_parents(cvs_tag, cvs_file_items) + + def purge_ghost_symbols(self): + """Purge any symbols that don't have any activity. + + Such ghost symbols can arise if a symbol was defined in an RCS + file but pointed at a non-existent revision.""" + + for stats in self._stats.values(): + if stats.is_ghost(): + Log().warn('Deleting ghost symbol: %s' % (stats.lod,)) + del self._stats[stats.lod] + + def close(self): + """Store the stats database to the SYMBOL_STATISTICS file.""" + + f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb') + cPickle.dump(self._stats.values(), f, -1) + f.close() + self._stats = None + + +class SymbolStatistics: + """Read and handle line of development statistics. + + The statistics are read from a database created by + SymbolStatisticsCollector. This class has methods to process the + statistics information and help with decisions about: + + 1. What tags and branches should be processed/excluded + + 2. What tags should be forced to be branches and vice versa (this + class maintains some statistics to help the user decide) + + 3. Are there inconsistencies? + + - A symbol that is sometimes a branch and sometimes a tag + + - A forced branch with commit(s) on it + + - A non-excluded branch depends on an excluded branch + + The data in this class is read from a pickle file.""" + + def __init__(self, filename): + """Read the stats database from FILENAME.""" + + # A map { LineOfDevelopment -> _Stats } for all lines of + # development: + self._stats = { } + + # A map { LineOfDevelopment.id -> _Stats } for all lines of + # development: + self._stats_by_id = { } + + stats_list = cPickle.load(open(filename, 'rb')) + + for stats in stats_list: + self._stats[stats.lod] = stats + self._stats_by_id[stats.lod.id] = stats + + def __len__(self): + return len(self._stats) + + def __getitem__(self, lod_id): + return self._stats_by_id[lod_id] + + def get_stats(self, lod): + """Return the _Stats object for LineOfDevelopment instance LOD. + + Raise KeyError if no such lod exists.""" + + return self._stats[lod] + + def __iter__(self): + return self._stats.itervalues() + + def _check_blocked_excludes(self, symbol_map): + """Check for any excluded LODs that are blocked by non-excluded symbols. + + If any are found, describe the problem to Log().error() and raise + a FatalException.""" + + # A list of (lod,[blocker,...]) tuples for excludes that are + # blocked by the specified non-excluded blockers: + problems = [] + + for lod in symbol_map.itervalues(): + if isinstance(lod, ExcludedSymbol): + # Symbol is excluded; make sure that its blockers are also + # excluded: + lod_blockers = [] + for blocker in self.get_stats(lod).branch_blockers: + if isinstance(symbol_map.get(blocker, None), IncludedSymbol): + lod_blockers.append(blocker) + if lod_blockers: + problems.append((lod, lod_blockers)) + + if problems: + s = [] + for (lod, lod_blockers) in problems: + s.append( + '%s: %s cannot be excluded because the following symbols ' + 'depend on it:\n' + % (error_prefix, lod,) + ) + for blocker in lod_blockers: + s.append(' %s\n' % (blocker,)) + s.append('\n') + Log().error(''.join(s)) + + raise FatalException() + + def _check_invalid_tags(self, symbol_map): + """Check for commits on any symbols that are to be converted as tags. + + SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)} + indicating how each AbstractSymbol is to be converted. If there + is a commit on a symbol, then it cannot be converted as a tag. If + any tags with commits are found, output error messages describing + the problems then raise a FatalException.""" + + Log().quiet("Checking for forced tags with commits...") + + invalid_tags = [ ] + for symbol in symbol_map.itervalues(): + if isinstance(symbol, Tag): + stats = self.get_stats(symbol) + if stats.branch_commit_count > 0: + invalid_tags.append(symbol) + + if not invalid_tags: + # No problems found: + return + + s = [] + s.append( + '%s: The following branches cannot be forced to be tags ' + 'because they have commits:\n' + % (error_prefix,) + ) + for tag in invalid_tags: + s.append(' %s\n' % (tag.name)) + s.append('\n') + Log().error(''.join(s)) + + raise FatalException() + + def check_consistency(self, symbol_map): + """Check the plan for how to convert symbols for consistency. + + SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)} + indicating how each AbstractSymbol is to be converted. If any + problems are detected, describe the problem to Log().error() and + raise a FatalException.""" + + # We want to do all of the consistency checks even if one of them + # fails, so that the user gets as much feedback as possible. Set + # this variable to True if any errors are found. + error_found = False + + # Check that the planned preferred parents are OK for all + # IncludedSymbols: + for lod in symbol_map.itervalues(): + if isinstance(lod, IncludedSymbol): + stats = self.get_stats(lod) + try: + stats.check_preferred_parent_allowed(lod) + except SymbolPlanException, e: + Log().error('%s\n' % (e,)) + error_found = True + + try: + self._check_blocked_excludes(symbol_map) + except FatalException: + error_found = True + + try: + self._check_invalid_tags(symbol_map) + except FatalException: + error_found = True + + if error_found: + raise FatalException( + 'Please fix the above errors and restart CollateSymbolsPass' + ) + + def exclude_symbol(self, symbol): + """SYMBOL has been excluded; remove it from our statistics.""" + + del self._stats[symbol] + del self._stats_by_id[symbol.id] + + # Remove references to this symbol from other statistics objects: + for stats in self._stats.itervalues(): + stats.branch_blockers.discard(symbol) + if symbol in stats.possible_parents: + del stats.possible_parents[symbol] + + |