# -*- coding: utf-8 -*- # # Copyright (C) 2005 Edgewall Software # Copyright (C) 2006 K.S.Sreeram # Copyright (C) 2007-2010 Lele Gaifax # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://trac.edgewall.com/license.html. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://projects.edgewall.com/trac/. # # Author: K.S.Sreeram ''' This module implements the trac versioncontrol backend API. The API consists of 3 classes: DarcsRepository, DarcsNode and DarcsChangeset. Please see the docs in trac.versioncontrol.api for the interface which is implemented by this module. ''' import os, StringIO, mimetypes from trac.util import TracError from trac.versioncontrol import (Repository, Node, Changeset, NoSuchChangeset, NoSuchNode) from command import DarcsCommand from updatedb import update_darcsdb from dbutil import (CHANGE_ADDED, CHANGE_EDITED, CHANGE_MOVED, CHANGE_MOVED_EDITED, CHANGE_REMOVED, IS_TRAC_0_10_X, IS_TRAC_0_11_X, TimedDB, IS_TRAC_0_12_OR_BETTER, NODE_DIR_TYPE, NODE_FILE_TYPE, get_node_type, get_prev_path_rev, get_repository_id, query_nodes_for_revision) # mapping from node types used by the darcs backend and the types # used by the trac api _node_type_map = { NODE_FILE_TYPE : Node.FILE, NODE_DIR_TYPE : Node.DIRECTORY } # mapping from change types used by the darcs backend and the types # used by the trac api _change_map = { CHANGE_ADDED : Changeset.ADD, CHANGE_REMOVED : Changeset.DELETE, CHANGE_MOVED : Changeset.MOVE, CHANGE_EDITED : Changeset.EDIT, #FIXME: treat moved&edited as just moved? CHANGE_MOVED_EDITED : Changeset.MOVE } class DarcsRepository(Repository): """Implement the IRepository interface for a darcs repository.""" def __init__(self, db, path, log, darcscmd, possible_encodings, params, eager_annotations): if IS_TRAC_0_12_OR_BETTER: Repository.__init__(self, 'darcs:%s' % path, params, log) else: Repository.__init__(self, 'darcs:%s' % path, params, None, log) self.db = TimedDB(db, log) self.path = path self.__cmd = DarcsCommand(darcscmd, path, log, possible_encodings) if not IS_TRAC_0_12_OR_BETTER: self.log = log self.id = get_repository_id(db, path) or 0 self.eager_annotations = eager_annotations if IS_TRAC_0_10_X: self.sync() def close(self): pass def get_changeset(self, rev): rev = self.normalize_rev(rev) return DarcsChangeset(self, rev) def get_node(self, path, rev=None): path = self.normalize_path(path) rev = self.normalize_rev(rev) # compute node_id, node_type and last_rev and then # create a DarcsNode object. # 'last_rev' is the last revision <= rev where this # node was modified. if path == '/': node_id = None node_type = NODE_DIR_TYPE last_rev = rev else: c = self.db.cursor() q,args = query_nodes_for_revision(self.id, rev, 'dnc.path = %s') args.append(path) c.execute(q, args) row = c.fetchone() if row is None: raise NoSuchNode(path, rev) node_id,last_rev = row[:2] node_type = get_node_type(self.db, self.id, node_id) return DarcsNode(node_id, node_type, path, last_rev, self, self.__cmd, self.log) def get_oldest_rev(self): if self.get_youngest_rev() is None: return None return 1 def get_youngest_rev(self): c = self.db.cursor() c.execute('SELECT max(rev) FROM darcs_changesets ' 'WHERE repo_id = %s', (self.id,)) row = c.fetchone() return row and row[0] or None def previous_rev(self, rev, path=''): rev = self.normalize_rev(rev) if rev > 1: return rev-1 return None def next_rev(self, rev, path=''): rev = self.normalize_rev(rev) if rev < self.get_youngest_rev(): return rev+1 return None def rev_older_than(self, rev1, rev2): return self.normalize_rev(rev1) < self.normalize_rev(rev2) def get_path_history(self, path, rev=None, limit=None): # FIXME: this is not correct return self.get_node(path, rev).get_history(limit) def normalize_path(self, path): return path and path.strip('/') or '/' def normalize_rev(self, rev): if isinstance(rev, basestring) and len(rev) in (61,64): if rev.endswith('.gz'): # We don't store ending .gz in the db rev = rev[:-3] c = self.db.cursor() c.execute('SELECT rev FROM darcs_changesets ' 'WHERE repo_id = %s AND hash = %s', (self.id, rev)) row = c.fetchone() if row is None: raise NoSuchChangeset(rev) rev = int(row[0]) else: youngest = self.get_youngest_rev() if rev is None or rev == "": return youngest try: rev = int(rev) except ValueError, le: raise TracError('Ill-formed revision: %s, error: %s' % (rev, le)) if rev > youngest: rev = youngest return rev def get_changes(self, old_path, old_rev, new_path, new_rev, ignore_ancestry=1): old_path = self.normalize_path(old_path) old_rev = self.normalize_rev(old_rev) new_path = self.normalize_path(new_path) new_rev = self.normalize_rev(new_rev) old_node = self.get_node(old_path, old_rev) new_node = self.get_node(new_path, new_rev) node_id = old_node._get_node_id() if node_id != new_node._get_node_id(): raise TracError('Node mismatch: base is %s in rev %d ' 'and target is %s in rev %d' % (old_path, old_rev, new_path, new_rev)) if old_node.kind == Node.FILE: if old_node.rev != new_node.rev: yield (old_node, new_node, Node.FILE, Changeset.EDIT) return c = self.db.cursor() if node_id is not None: c.execute('SELECT rev,path FROM darcs_node_changes ' 'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s', (self.id, node_id, old_rev, new_rev)) else: c.execute('SELECT rev,path FROM darcs_node_changes ' 'WHERE repo_id = %s AND rev >= %s AND rev <= %s', (self.id, old_rev, new_rev)) node_set = dict() node_list = [] c1 = self.db.cursor() for rev,path in c: c1.execute('SELECT node_id FROM darcs_node_changes ' 'WHERE repo_id = %s AND rev = %s AND path LIKE %s', (self.id, rev, path+'/%')) for nid, in c1: if nid not in node_set: node_set[nid] = 1 node_list.append(nid) for nid in node_list: old_node = new_node = None c1.execute('SELECT rev,path FROM darcs_node_changes ' 'WHERE repo_id = %s AND node_id = %s AND rev < %s ' 'ORDER BY rev DESC LIMIT 1', (self.id, nid, old_rev)) row = c1.fetchone() if row is not None: rev,path = row old_node = self.get_node(path, rev) c1.execute('SELECT rev,path,change_kind FROM darcs_node_changes ' 'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s ' 'ORDER BY rev DESC LIMIT 1', (self.id, nid, old_rev, new_rev)) rev,path,change = c1.fetchone() if change != CHANGE_REMOVED: new_node = self.get_node(path, rev) assert (old_node is not None) or (new_node is not None) kind = old_node and old_node.kind or new_node.kind if old_node is None: change = Changeset.ADD elif new_node is None: change = Changeset.DELETE elif old_node.path != new_node.path: change = Changeset.MOVE else: change = Changeset.EDIT yield (old_node,new_node,kind,change) def sync(self, rev_callback=None, clean=False): from time import time from dbutil import format_elapsed_time as trepr # Import any new changesets, if any newrevs = update_darcsdb(self.db, self.__cmd, self.log, self.id, rev_callback=rev_callback, clean=clean) # In eager mode, precompute the content and the annotations for # each file modified or added by latest changesets if self.eager_annotations and newrevs: i = 1 count = len(newrevs) for rev in newrevs: t0 = time() c = self.get_changeset(rev) for path,kind,change,prev_path,prev_rev in c.get_changes(): if kind==Node.FILE and (change==Changeset.EDIT or change==Changeset.ADD): node = self.get_node(path, rev) node.get_content() node.get_annotations() t1 = time() usec = (t1-t0) * 1e6 self.log.info('Preannotated changeset %d/%d: %s', i, count, trepr(usec)) i += 1 class DarcsNode(Node): """Implement darcs specific logic around a ``Node``.""" def __init__(self, node_id, node_type, path, rev, repos, cmd, log=None): kind = _node_type_map[node_type] Node.__init__(self, repos, path, rev, kind) self.__node_id = node_id self.__node_type = node_type self.__cmd = cmd self.__log = log self.created_path = path self.created_rev = rev def _get_node_id(self): return self.__node_id def _get_cached_rev(self): # if there are no future versions, use the HEAD nrev = self._get_next() if nrev is None: return None maxrange = nrev[1]-1 # if it's just one hop from node's revision, we're done if maxrange == self.rev: return maxrange # ok, let's see if there is already a cache in the range c = self.repos.db.cursor() c.execute('SELECT max(rev) FROM darcs_cache ' 'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s AND content IS NOT NULL', (self.repos.id, self.__node_id, self.rev, maxrange)) row = c.fetchone() if row[0] is not None: return row[0] # No luck, return the most recent revision before the next return maxrange def get_content(self): if self.__node_type == NODE_DIR_TYPE: return None c = self.repos.db.cursor() # Since darcs is faster and faster in building the content # of a file for more and more recent changes, compute the # optimal revision to build the cache of crev = self._get_cached_rev() if crev is not None: # check if the file content is there in the cache c.execute('SELECT content FROM darcs_cache ' 'WHERE repo_id = %s AND node_id = %s AND rev = %s', (self.repos.id, self.__node_id, crev)) row = c.fetchone() if row is not None: self.__log.debug('Cache hit %s at rev %s', self.path, crev) # if present just return it data = str(buffer(row[0])) else: self.__log.debug('Building content cache for %s at rev %s', self.path, crev) # load the file content from the repo c.execute('SELECT hash FROM darcs_changesets ' 'WHERE repo_id = %s AND rev = %s', (self.repos.id, crev,)) hash = c.fetchone()[0] data = self.__cmd.cat(hash, self.path) # save the file content in the cache c = self.repos.db.cursor() try: c.execute('INSERT INTO darcs_cache (repo_id,node_id,rev,content,size) ' 'VALUES (%s,%s,%s,%s,%s)', (self.repos.id, self.__node_id, crev, buffer(data), len(data))) except: self.repos.db.rollback() c = self.repos.db.cursor() # Maybe some other thread computed the same content c.execute('SELECT content FROM darcs_cache ' 'WHERE repo_id = %s AND node_id = %s AND rev = %s', (self.repos.id, self.__node_id, crev)) row = c.fetchone() if row is not None: self.__log.debug('Late cache hit %s at rev %s', self.path, crev) data = str(buffer(row[0])) else: raise else: self.repos.db.commit() else: # Use the HEAD self.__log.debug('Serving pristine file %s, no changes since rev %s', self.path, self.rev) data = self.__cmd.cat(None, self.path) return StringIO.StringIO(data) def get_entries(self): if self.__node_type == NODE_FILE_TYPE: return if self.__node_id is None: cond = 'dnc.parent_id IS NULL' else: cond = 'dnc.parent_id = %d' % self.__node_id q,args = query_nodes_for_revision(self.repos.id, self.rev, cond) c = self.repos.db.cursor() c.execute(q, args) for node_id,rev,path,_ in c: node_type = get_node_type(self.repos.db, self.repos.id, node_id) yield DarcsNode(node_id, node_type, path, rev, self.repos, self.__cmd, self.__log) def get_history(self, limit=None): if self.path == '/': for i in range(self.rev,0,-1): yield (self.path, i, Changeset.EDIT) return c = self.repos.db.cursor() q = ('SELECT path,rev,change_kind FROM darcs_node_changes ' 'WHERE repo_id = %s AND node_id = %s AND rev <= %s ' 'ORDER BY rev DESC') if limit is not None: q += ' LIMIT %d' % limit c.execute(q, (self.repos.id, self.__node_id, self.rev)) for path,rev,change in c: yield (path, rev, _change_map[change]) def _get_next(self): try: return self._get_future(1).next() except StopIteration: return None def _get_future(self, limit=None): if self.path == '/': youngest = self.get_youngest_rev() for i in range(youngest, self.rev, -1): yield (self.path, i, Changeset.EDIT) return c = self.repos.db.cursor() q = ('SELECT path,rev,change_kind FROM darcs_node_changes ' 'WHERE repo_id = %s AND node_id = %s AND rev > %s ' 'ORDER BY rev') if limit is not None: q += ' LIMIT %d' % limit c.execute(q, (self.repos.id, self.__node_id, self.rev)) for path,rev,change in c: yield (path, rev, _change_map[change]) def get_annotations(self): """Provide detailed backward history for the content of this Node. Retrieve an array of revisions parsing `darcs annotate`. Since that is (still) not fast enough for some repository, we write a cache of the information: a future annotate on the same file at the same revision won't reexecute `darcs annotate`. """ from xml.sax import make_parser from xml.sax.handler import ContentHandler, ErrorHandler c = self.repos.db.cursor() # Since darcs is faster and faster in building the content # of a file for more and more recent changes, compute the # optimal revision to build the cache of crev = self._get_cached_rev() if crev is None: crev = self.rev # Check if the annotate cache is already present c.execute('SELECT up_to_line,blame_rev FROM darcs_annotate_cache ' 'WHERE repo_id = %s AND node_id = %s AND rev = %s ' 'ORDER BY up_to_line', (self.repos.id, self.__node_id, crev)) row = c.fetchone() if row is not None: self.__log.debug('Annotate cache hit for %s at rev %s', self.path, crev) revs = [] line = 0 # Expand the cache, producing a list of revisions, one per line while row is not None: while linerep.id" " AND rep2.name='identity'" " AND rep2.value=rep.value", (self.repos.id,)) other_rids = [r[0] for r in c.fetchall()] if not other_rids: return props # Compute a list of "equivalent changesets", when the same # changeset is present in other related repositories. other_rids = ','.join(str(rid) for rid in other_rids), c.execute("SELECT rep.value, dcs.rev " "FROM darcs_changesets dcs, repository rep " "WHERE dcs.hash = %%s" " AND dcs.repo_id IN (%s)" " AND rep.id = dcs.repo_id AND rep.name = 'name'" "ORDER BY rep.value" % other_rids, (self.__hash,)) eqcsets = [(repo, rev) for repo,rev in c.fetchall()] props['PresentIn'] = eqcsets # Compute the opposite list, that is the repositories where # the changeset is missing. c.execute("SELECT rep.value " "FROM repository rep " "WHERE rep.name='name'" " AND rep.id IN (%s)" " AND NOT EXISTS (" "SELECT dcs.rev " "FROM darcs_changesets dcs " "WHERE dcs.repo_id=rep.id" " AND dcs.hash=%%s) " "ORDER BY rep.value" % other_rids, (self.__hash,)) mir = [r[0] for r in c.fetchall()] props['MissingIn'] = mir return props