# -*- mode: python; coding: utf-8 -*- # :Progetto: Tailor -- Darcs peculiarities when used as a source # :Creato: lun 10 lug 2006 00:04:59 CEST # :Autore: Lele Gaifax # :Licenza: GNU General Public License # """ This module contains the source specific bits of the darcs backend. """ __docformat__ = 'reStructuredText' import re from vcpx.shwrap import ExternalCommand, PIPE, STDOUT from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \ GetUpstreamChangesetsFailure from vcpx.target import TargetInitializationFailure from vcpx.tzinfo import UTC def changesets_from_darcschanges(changes, unidiff=False, repodir=None, chunksize=2**15): """ Parse XML output of ``darcs changes``. Return a list of ``Changeset`` instances. Filters out the (currently incorrect) tag info from changesets_from_darcschanges_unsafe. """ csets = changesets_from_darcschanges_unsafe(changes, unidiff, repodir, chunksize) for cs in csets: cs.tags = None yield cs def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None, chunksize=2**15): """ Do the real work of parsing the change log, including tags. Warning: the tag information in the changsets returned by this function are only correct if each darcs tag in the repo depends on all of the patches that precede it. This is not a valid assumption in general--a tag that does not depend on patch P can be pulled in from another darcs repo after P. We collect the tag info anyway because DarcsWorkingDir._currentTags() can use it safely despite this problem. Hopefully the problem will eventually be fixed and this function can be renamed changesets_from_darcschanges. """ from xml.sax import make_parser from xml.sax.handler import ContentHandler, ErrorHandler from datetime import datetime from vcpx.changes import ChangesetEntry, Changeset class DarcsXMLChangesHandler(ContentHandler): def __init__(self): self.changesets = [] self.current = None self.current_field = [] if unidiff and repodir: cmd = ["darcs", "diff", "--unified", "--repodir", repodir, "--patch", "%(patchname)s"] self.darcsdiff = ExternalCommand(command=cmd) else: self.darcsdiff = None def startElement(self, name, attributes): if name == 'patch': self.current = {} self.current['author'] = attributes['author'] date = attributes['date'] from time import strptime try: # 20040619130027 timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6]) except ValueError: # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002 timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6]) timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well self.current['date'] = timestamp self.current['comment'] = '' self.current['hash'] = attributes['hash'] self.current['entries'] = [] elif name in ['name', 'comment', 'add_file', 'add_directory', 'modify_file', 'remove_file', 'remove_directory']: self.current_field = [] elif name == 'move': self.old_name = attributes['from'] self.new_name = attributes['to'] def endElement(self, name): if name == 'patch': entries = [] todo = self.current['entries'] # Darcs allows "rename A B; remove B": collapse those # into "remove A" while todo: e = todo.pop(0) if e.action_kind == e.RENAMED: lookfor = e.name forget = [] for i,n in enumerate(todo): if n.action_kind == n.DELETED and n.name == lookfor: e.action_kind = e.DELETED e.name = e.old_name e.old_name = None entries.append(e) forget.append(i) forget.reverse() for i in forget: del todo[i] break elif n.action_kind == n.RENAMED and n.old_name == lookfor: forget.append(i) lookfor = n.name if not forget: entries.append(e) else: entries.append(e) # Darcs changes --xml (as of 1.0.7) emits bad ordered hunks: it # begins with file moves, apparently for no good reason. Do as # little reordering as needed to adjust the meaning, ie moving all # add_dirs before add_file and ren_file that have that dir as # target sorted = False while not sorted: sorted = True for i,e in enumerate(entries): if e.action_kind == e.RENAMED: for j,n in enumerate(entries[i+1:]): if ((e.name.startswith(n.name+'/') or e.old_name==n.name) and (n.action_kind == n.ADDED or n.action_kind == n.RENAMED)): m = entries.pop(i+1+j) entries.insert(i, m) sorted = False cset = Changeset(self.current['name'], self.current['date'], self.current['author'], self.current['comment'], entries, tags=self.current.get('tags',[])) cset.darcs_hash = self.current['hash'] if self.darcsdiff: cset.unidiff = self.darcsdiff.execute( stdout=PIPE, patchname=cset.revision)[0].read() self.changesets.append(cset) self.current = None elif name in ['name', 'comment']: val = ''.join(self.current_field) if val[:4] == 'TAG ': self.current.setdefault('tags',[]).append(val[4:]) self.current[name] = val elif name == 'move': entry = ChangesetEntry(self.new_name) entry.action_kind = entry.RENAMED entry.old_name = self.old_name self.current['entries'].append(entry) elif name in ['add_file', 'add_directory', 'modify_file', 'remove_file', 'remove_directory']: entry = ChangesetEntry(''.join(self.current_field).strip()) entry.action_kind = { 'add_file': entry.ADDED, 'add_directory': entry.ADDED, 'modify_file': entry.UPDATED, 'remove_file': entry.DELETED, 'remove_directory': entry.DELETED }[name] self.current['entries'].append(entry) def characters(self, data): self.current_field.append(data) parser = make_parser() handler = DarcsXMLChangesHandler() parser.setContentHandler(handler) parser.setErrorHandler(ErrorHandler()) chunk = changes.read(chunksize) while chunk: parser.feed(chunk) for cs in handler.changesets: yield cs handler.changesets = [] chunk = changes.read(chunksize) parser.close() for cs in handler.changesets: yield cs class DarcsSourceWorkingDir(UpdatableSourceWorkingDir): """ A source working directory under ``darcs``. """ is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz') def _getUpstreamChangesets(self, sincerev): """ Do the actual work of fetching the upstream changeset. """ from datetime import datetime from time import strptime from sha import new from vcpx.changes import Changeset cmd = self.repository.command("pull", "--dry-run") pull = ExternalCommand(cwd=self.repository.basedir, command=cmd) output = pull.execute(self.repository.repository, stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0] if pull.exit_status: raise GetUpstreamChangesetsFailure( "%s returned status %d saying\n%s" % (str(pull), pull.exit_status, output.read())) l = output.readline() while l and not (l.startswith('Would pull the following changes:') or l == 'No remote changes to pull in!\n'): l = output.readline() if l <> 'No remote changes to pull in!\n': ## Sat Jul 17 01:22:08 CEST 2004 lele@nautilus ## * Refix _getUpstreamChangesets for darcs l = output.readline() while not l.startswith('Making no changes: this is a dry run.'): # Assume it's a line like # Sun Jan 2 00:24:04 UTC 2005 lele@nautilus.homeip.net # we used to split on the double space before the email, # but in this case this is wrong. Waiting for xml output, # is it really sane asserting date's length to 28 chars? date = l[:28] author = l[30:-1] y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y") date = datetime(y,m,d,hh,mm,ss,0,UTC) l = output.readline() assert (l.startswith(' * ') or l.startswith(' UNDO:') or l.startswith(' tagged')) if l.startswith(' *'): name = l[4:-1] else: name = l[2:-1] changelog = [] l = output.readline() while l.startswith(' '): changelog.append(l[2:-1]) l = output.readline() cset = Changeset(name, date, author, '\n'.join(changelog)) compactdate = date.strftime("%Y%m%d%H%M%S") if name.startswith('UNDO: '): name = name[6:] inverted = 't' else: inverted = 'f' phash = new() phash.update(name) phash.update(author) phash.update(compactdate) phash.update(''.join(changelog)) phash.update(inverted) cset.darcs_hash = '%s-%s-%s.gz' % (compactdate, new(author).hexdigest()[:5], phash.hexdigest()) if name.startswith('tagged'): self.log.warning("Skipping tag %s because I don't " "propagate tags from darcs.", name) else: yield cset while not l.strip(): l = output.readline() def _applyChangeset(self, changeset): """ Do the actual work of applying the changeset to the working copy. """ needspatchesopt = False if hasattr(changeset, 'darcs_hash'): selector = '--match' revtag = 'hash ' + changeset.darcs_hash elif changeset.revision.startswith('tagged '): selector = '--tag' revtag = changeset.revision[7:] else: selector = '--match' revtag = 'date "%s" && author "%s"' % ( changeset.date.strftime("%Y%m%d%H%M%S"), changeset.author) # The 'exact' matcher doesn't groke double quotes: # """currently there is no provision for escaping a double # quote, so you have to choose between matching double # quotes and matching spaces""" if not '"' in changeset.revision: revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%') else: needspatchesopt = True cmd = self.repository.command("pull", "--all", "--quiet", selector, revtag) if needspatchesopt: cmd.extend(['--patches', re.escape(changeset.revision)]) pull = ExternalCommand(cwd=self.repository.basedir, command=cmd) output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0] if pull.exit_status: raise ChangesetApplicationFailure( "%s returned status %d saying\n%s" % (str(pull), pull.exit_status, output.read())) conflicts = [] line = output.readline() while line: if line.startswith('We have conflicts in the following files:'): files = output.readline()[:-1].split('./')[1:] self.log.warning("Conflict after 'darcs pull': %s", ' '.join(files)) conflicts.extend(['./' + f for f in files]) line = output.readline() cmd = self.repository.command("changes", selector, revtag, "--xml-output", "--summ") changes = ExternalCommand(cwd=self.repository.basedir, command=cmd) last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0]) try: changeset.entries.extend(last.next().entries) except StopIteration: pass return conflicts def _handleConflict(self, changeset, conflicts, conflict): """ Handle the conflict raised by the application of the upstream changeset. Override parent behaviour: with darcs, we need to execute a revert on the conflicted files, **trashing** local changes, but there should be none of them in tailor context. """ self.log.info("Reverting changes to %s, to solve the conflict", ' '.join(conflict)) cmd = self.repository.command("revert", "--all") revert = ExternalCommand(cwd=self.repository.basedir, command=cmd) revert.execute(conflict) def _checkoutUpstreamRevision(self, revision): """ Concretely do the checkout of the upstream revision and return the last applied changeset. """ from os.path import join, exists from os import mkdir from vcpx.source import InvocationError if not self.repository.repository: raise InvocationError("Must specify a the darcs source repository") if revision == 'INITIAL' or self.is_hash_rx.match(revision): initial = True if revision == 'INITIAL': cmd = self.repository.command("changes", "--xml-output", "--repo", self.repository.repository, "--reverse") changes = ExternalCommand(command=cmd) output = changes.execute(stdout=PIPE, stderr=STDOUT)[0] if changes.exit_status: raise ChangesetApplicationFailure( "%s returned status %d saying\n%s" % (str(changes), changes.exit_status, output and output.read() or '')) csets = changesets_from_darcschanges(output) changeset = csets.next() revision = 'hash %s' % changeset.darcs_hash else: revision = 'hash %s' % revision else: initial = False if self.repository.subdir == '.' or exists(self.repository.basedir): # This is currently *very* slow, compared to the darcs get # below! if not exists(join(self.repository.basedir, '_darcs')): if not exists(self.repository.basedir): mkdir(self.repository.basedir) cmd = self.repository.command("initialize") init = ExternalCommand(cwd=self.repository.basedir, command=cmd) init.execute() if init.exit_status: raise TargetInitializationFailure( "%s returned status %s" % (str(init), init.exit_status)) cmd = self.repository.command("pull", "--all", "--quiet") if revision and revision<>'HEAD': cmd.extend([initial and "--match" or "--tag", revision]) dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd) output = dpull.execute(self.repository.repository, stdout=PIPE, stderr=STDOUT)[0] if dpull.exit_status: raise TargetInitializationFailure( "%s returned status %d saying\n%s" % (str(dpull), dpull.exit_status, output.read())) else: # Use much faster 'darcs get' cmd = self.repository.command("get", "--quiet") if revision and revision<>'HEAD': cmd.extend([initial and "--to-match" or "--tag", revision]) else: cmd.append("--partial") dget = ExternalCommand(command=cmd) output = dget.execute(self.repository.repository, self.repository.basedir, stdout=PIPE, stderr=STDOUT)[0] if dget.exit_status: raise TargetInitializationFailure( "%s returned status %d saying\n%s" % (str(dget), dget.exit_status, output.read())) cmd = self.repository.command("changes", "--last", "1", "--xml-output") changes = ExternalCommand(cwd=self.repository.basedir, command=cmd) output = changes.execute(stdout=PIPE, stderr=STDOUT)[0] if changes.exit_status: raise ChangesetApplicationFailure( "%s returned status %d saying\n%s" % (str(changes), changes.exit_status, output.read())) last = changesets_from_darcschanges(output) return last.next()