source: tailor/vcpx/repository/darcs/source.py @ 1330

Revision 1330, 21.8 KB checked in by John Goerzen <jgoerzen@…>, 6 years ago (diff)

Revert earlier change to darcs output
The problem was in some instructions on the hg wiki

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Luckily enough (since removes are the first entries
76                    # in the list, that is) by anticipating the add we
77                    # cure also the case below, when addition follows
78                    # edit.
79                    e.action_kind = e.ADDED
80                    e.old_name = None
81                    return e
82
83                # Assert also that add_dir events must preceeds any
84                # add_file and ren_file that have that dir as target,
85                # and that add_file preceeds any edit.
86
87                if ((e.name == entry.name or e.name.startswith(dirname))
88                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
89                    self.entries.insert(i, entry)
90                    return entry
91
92        # Likewise, if this is a deletion, and there is a rename of
93        # this entry (such as "rename A B; remove B") then turn the
94        # existing rename into a deletion instead.
95
96        # If instead the removed entry was added by the same patch,
97        # annihilate the two: a bug in darcs (possibly fixed in recent
98        # versions) created patches with ADD+EDIT+REMOVE of a single
99        # file (see tailor ticket #71, or darcs issue185). Too bad
100        # another bug (still present in 1.0.8) hides that and makes
101        # very hard (read: impossible) any workaround on the tailor
102        # side. Luckily I learnt another tiny bit of Haskell and
103        # proposed a fix for that: hopefully the patch will be
104        # accepted by darcs developers. In the meantime, I attached it
105        # to ticket #71: without that, tailor does not have enough
106        # information to do the right thing.
107
108        elif entry.action_kind == entry.DELETED:
109            for i,e in enumerate(self.entries):
110                if e.action_kind == e.RENAMED and e.name == entry.name:
111                    e.action_kind = e.DELETED
112                    e.name = e.old_name
113                    e.old_name = None
114                    return e
115                elif e.action_kind == e.ADDED and e.name == entry.name:
116                    del self.entries[i]
117                    return None
118
119        # Ok, it must be either an edit or a rename: the former goes
120        # obviously to the end, and since the latter, as said, come
121        # in very early, appending is just good.
122        self.entries.append(entry)
123        return entry
124
125
126def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
127                                 chunksize=2**15):
128    """
129    Parse XML output of ``darcs changes``.
130
131    Return a list of ``Changeset`` instances.
132
133    Filters out the (currently incorrect) tag info from
134    changesets_from_darcschanges_unsafe.
135    """
136
137    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
138                                                repodir, chunksize)
139    for cs in csets:
140        cs.tags = None
141        yield cs
142
143def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
144                                        chunksize=2**15):
145    """
146    Do the real work of parsing the change log, including tags.
147    Warning: the tag information in the changsets returned by this
148    function are only correct if each darcs tag in the repo depends on
149    all of the patches that precede it.  This is not a valid
150    assumption in general--a tag that does not depend on patch P can
151    be pulled in from another darcs repo after P.  We collect the tag
152    info anyway because DarcsWorkingDir._currentTags() can use it
153    safely despite this problem.  Hopefully the problem will
154    eventually be fixed and this function can be renamed
155    changesets_from_darcschanges.
156    """
157    from xml.sax import make_parser
158    from xml.sax.handler import ContentHandler, ErrorHandler
159    from datetime import datetime
160
161    class DarcsXMLChangesHandler(ContentHandler):
162        def __init__(self):
163            self.changesets = []
164            self.current = None
165            self.current_field = []
166            if unidiff and repodir:
167                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
168                       "--patch", "%(patchname)s"]
169                self.darcsdiff = ExternalCommand(command=cmd)
170            else:
171                self.darcsdiff = None
172
173        def startElement(self, name, attributes):
174            if name == 'patch':
175                self.current = {}
176                self.current['author'] = attributes['author']
177                date = attributes['date']
178                from time import strptime
179                try:
180                    # 20040619130027
181                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
182                except ValueError:
183                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
184                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
185
186                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
187
188                self.current['date'] = timestamp
189                self.current['comment'] = ''
190                self.current['hash'] = attributes['hash']
191                self.current['entries'] = []
192            elif name in ['name', 'comment', 'add_file', 'add_directory',
193                          'modify_file', 'remove_file', 'remove_directory']:
194                self.current_field = []
195            elif name == 'move':
196                self.old_name = attributes['from']
197                self.new_name = attributes['to']
198
199        def endElement(self, name):
200            if name == 'patch':
201                cset = DarcsChangeset(self.current['name'],
202                                      self.current['date'],
203                                      self.current['author'],
204                                      self.current['comment'],
205                                      self.current['entries'],
206                                      tags=self.current.get('tags',[]))
207                cset.darcs_hash = self.current['hash']
208                if self.darcsdiff:
209                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
210                        stdout=PIPE, patchname=cset.revision)[0].read()
211
212                self.changesets.append(cset)
213                self.current = None
214            elif name in ['name', 'comment']:
215                val = ''.join(self.current_field)
216                if val[:4] == 'TAG ':
217                    self.current.setdefault('tags',[]).append(val[4:])
218                self.current[name] = val
219            elif name == 'move':
220                entry = ChangesetEntry(self.new_name)
221                entry.action_kind = entry.RENAMED
222                entry.old_name = self.old_name
223                self.current['entries'].append(entry)
224            elif name in ['add_file', 'add_directory', 'modify_file',
225                          'remove_file', 'remove_directory']:
226                entry = ChangesetEntry(''.join(self.current_field).strip())
227                entry.action_kind = { 'add_file': entry.ADDED,
228                                      'add_directory': entry.ADDED,
229                                      'modify_file': entry.UPDATED,
230                                      'remove_file': entry.DELETED,
231                                      'remove_directory': entry.DELETED
232                                    }[name]
233
234                self.current['entries'].append(entry)
235
236        def characters(self, data):
237            self.current_field.append(data)
238
239    parser = make_parser()
240    handler = DarcsXMLChangesHandler()
241    parser.setContentHandler(handler)
242    parser.setErrorHandler(ErrorHandler())
243
244    chunk = changes.read(chunksize)
245    while chunk:
246        parser.feed(chunk)
247        for cs in handler.changesets:
248            yield cs
249        handler.changesets = []
250        chunk = changes.read(chunksize)
251    parser.close()
252    for cs in handler.changesets:
253        yield cs
254
255
256class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
257    """
258    A source working directory under ``darcs``.
259    """
260
261    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
262
263    def _getUpstreamChangesets(self, sincerev):
264        """
265        Do the actual work of fetching the upstream changeset.
266        """
267
268        cmd = self.repository.command("pull", "--dry-run")
269        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
270        output = pull.execute(self.repository.repository,
271                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
272
273        if pull.exit_status:
274            raise GetUpstreamChangesetsFailure(
275                "%s returned status %d saying\n%s" %
276                (str(pull), pull.exit_status, output.read()))
277
278        return self._parseDarcsPull(output)
279
280
281    def _parseDarcsPull(self, output):
282        """Process 'darcs pull' output to Changesets.
283        """
284        from datetime import datetime
285        from time import strptime
286        from sha import new
287        from vcpx.changes import Changeset
288
289        l = output.readline()
290        while l and not (l.startswith('Would pull the following changes:') or
291                         l == 'No remote changes to pull in!\n'):
292            l = output.readline()
293
294        if l <> 'No remote changes to pull in!\n':
295            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
296            ##   * Refix _getUpstreamChangesets for darcs
297
298            l = output.readline()
299            while not l.startswith('Making no changes:  this is a dry run.'):
300                # Assume it's a line like
301                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
302                # we used to split on the double space before the email,
303                # but in this case this is wrong. Then we assumed the date
304                # part to be exactly 28 chars long, but what about timezone
305                # names like 'CEST'? Waiting for xml output...
306                # We still assume there are *two* spaces before the email.
307                # The alternative is using some sort of a regex: Aaron Kaplan
308                # kindly suggested his own perl snippet
309                #    /^(... ... .\d .\d:\d\d:\d\d ...?. \d\d\d\d)  (.*)/ || die;
310                #    my ($date, $author) = ($1, $2);
311                # but that assumes the two spaces as separator, so I find the
312                # following solution easier and by any chance faster too.               
313                pieces = l.rstrip().split('  ')
314                assert len(pieces)>1, "Cannot parse %r as a patch timestamp" % l
315                author = pieces.pop()
316                date = ' '.join(pieces)
317                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y")
318                date = datetime(y,m,d,hh,mm,ss,0,UTC)
319                l = output.readline().rstrip()
320                assert (l.startswith('  * ') or
321                        l.startswith('  UNDO:') or
322                        l.startswith('  tagged')), \
323                        "Got %r but expected the start of the log" % l
324
325                if l.startswith('  *'):
326                    name = l[4:]
327                else:
328                    name = l[2:]
329
330                changelog = []
331                l = output.readline()
332                while l.startswith('  '):
333                    changelog.append(l[2:-1])
334                    l = output.readline()
335
336                cset = Changeset(name, date, author, '\n'.join(changelog))
337                compactdate = date.strftime("%Y%m%d%H%M%S")
338                if name.startswith('UNDO: '):
339                    name = name[6:]
340                    inverted = 't'
341                else:
342                    inverted = 'f'
343                phash = new()
344                phash.update(name)
345                phash.update(author)
346                phash.update(compactdate)
347                phash.update(''.join(changelog))
348                phash.update(inverted)
349                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
350                                                   new(author).hexdigest()[:5],
351                                                   phash.hexdigest())
352
353                if name.startswith('tagged'):
354                    self.log.warning("Skipping tag %s because I don't "
355                                     "propagate tags from darcs.", name)
356                else:
357                    yield cset
358
359                while not l.strip():
360                    l = output.readline()
361
362    def _applyChangeset(self, changeset):
363        """
364        Do the actual work of applying the changeset to the working copy.
365        """
366
367        needspatchesopt = False
368        if hasattr(changeset, 'darcs_hash'):
369            selector = '--match'
370            revtag = 'hash ' + changeset.darcs_hash
371        elif changeset.revision.startswith('tagged '):
372            selector = '--tag'
373            revtag = changeset.revision[7:]
374        else:
375            selector = '--match'
376            revtag = 'date "%s" && author "%s"' % (
377                changeset.date.strftime("%Y%m%d%H%M%S"),
378                changeset.author)
379            # The 'exact' matcher doesn't groke double quotes:
380            # """currently there is no provision for escaping a double
381            # quote, so you have to choose between matching double
382            # quotes and matching spaces"""
383            if not '"' in changeset.revision:
384                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
385            else:
386                needspatchesopt = True
387
388        cmd = self.repository.command("pull", "--all", "--quiet",
389                                      selector, revtag)
390
391        if needspatchesopt:
392            cmd.extend(['--patches', re.escape(changeset.revision)])
393
394        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
395        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
396
397        if pull.exit_status:
398            raise ChangesetApplicationFailure(
399                "%s returned status %d saying\n%s" %
400                (str(pull), pull.exit_status, output.read()))
401
402        conflicts = []
403        line = output.readline()
404        while line:
405            if line.startswith('We have conflicts in the following files:'):
406                files = output.readline()[:-1].split('./')[1:]
407                self.log.warning("Conflict after 'darcs pull': %s",
408                                 ' '.join(files))
409                conflicts.extend(['./' + f for f in files])
410            line = output.readline()
411
412        cmd = self.repository.command("changes", selector, revtag,
413                                      "--xml-output", "--summ")
414        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
415        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0])
416        try:
417            changeset.entries.extend(last.next().entries)
418        except StopIteration:
419            pass
420
421        return conflicts
422
423    def _handleConflict(self, changeset, conflicts, conflict):
424        """
425        Handle the conflict raised by the application of the upstream changeset.
426
427        Override parent behaviour: with darcs, we need to execute a revert
428        on the conflicted files, **trashing** local changes, but there should
429        be none of them in tailor context.
430        """
431
432        self.log.info("Reverting changes to %s, to solve the conflict",
433                      ' '.join(conflict))
434        cmd = self.repository.command("revert", "--all")
435        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
436        revert.execute(conflict)
437
438    def _checkoutUpstreamRevision(self, revision):
439        """
440        Concretely do the checkout of the upstream revision and return
441        the last applied changeset.
442        """
443
444        from os.path import join, exists
445        from os import mkdir
446        from vcpx.source import InvocationError
447
448        if not self.repository.repository:
449            raise InvocationError("Must specify a the darcs source repository")
450
451        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
452            initial = True
453
454            if revision == 'INITIAL':
455                cmd = self.repository.command("changes", "--xml-output",
456                                              "--repo", self.repository.repository,
457                                               "--reverse")
458                changes = ExternalCommand(command=cmd)
459                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
460
461                if changes.exit_status:
462                    raise ChangesetApplicationFailure(
463                        "%s returned status %d saying\n%s" %
464                        (str(changes), changes.exit_status,
465                         output and output.read() or ''))
466
467                csets = changesets_from_darcschanges(output)
468                changeset = csets.next()
469
470                revision = 'hash %s' % changeset.darcs_hash
471            else:
472                revision = 'hash %s' % revision
473        else:
474            initial = False
475
476        if self.repository.subdir == '.' or exists(self.repository.basedir):
477            # This is currently *very* slow, compared to the darcs get
478            # below!
479            if not exists(join(self.repository.basedir, '_darcs')):
480                if not exists(self.repository.basedir):
481                    mkdir(self.repository.basedir)
482
483                cmd = self.repository.command("initialize")
484                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
485                init.execute()
486
487                if init.exit_status:
488                    raise TargetInitializationFailure(
489                        "%s returned status %s" % (str(init),
490                                                   init.exit_status))
491
492                cmd = self.repository.command("pull", "--all", "--quiet")
493                if revision and revision<>'HEAD':
494                    cmd.extend([initial and "--match" or "--tag", revision])
495                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
496                output = dpull.execute(self.repository.repository,
497                                       stdout=PIPE, stderr=STDOUT)[0]
498
499                if dpull.exit_status:
500                    raise TargetInitializationFailure(
501                        "%s returned status %d saying\n%s" %
502                        (str(dpull), dpull.exit_status, output.read()))
503        else:
504            # Use much faster 'darcs get'
505            cmd = self.repository.command("get", "--quiet")
506            if revision and revision<>'HEAD':
507                cmd.extend([initial and "--to-match" or "--tag", revision])
508            else:
509                cmd.append("--partial")
510            dget = ExternalCommand(command=cmd)
511            output = dget.execute(self.repository.repository, self.repository.basedir,
512                                  stdout=PIPE, stderr=STDOUT)[0]
513
514            if dget.exit_status:
515                raise TargetInitializationFailure(
516                    "%s returned status %d saying\n%s" %
517                    (str(dget), dget.exit_status, output.read()))
518
519        cmd = self.repository.command("changes", "--last", "1",
520                                      "--xml-output")
521        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
522        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
523
524        if changes.exit_status:
525            raise ChangesetApplicationFailure(
526                "%s returned status %d saying\n%s" %
527                (str(changes), changes.exit_status, output.read()))
528
529        last = changesets_from_darcschanges(output)
530
531        return last.next()
Note: See TracBrowser for help on using the repository browser.