source: tailor/vcpx/repository/darcs/source.py @ 1362

Revision 1362, 21.9 KB checked in by zooko@…, 6 years ago (diff)

fix parsing of darcs conflict output

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Luckily enough (since removes are the first entries
76                    # in the list, that is) by anticipating the add we
77                    # cure also the case below, when addition follows
78                    # edit.
79                    e.action_kind = e.ADDED
80                    e.old_name = None
81                    return e
82
83                # Assert also that add_dir events must preceeds any
84                # add_file and ren_file that have that dir as target,
85                # and that add_file preceeds any edit.
86
87                if ((e.name == entry.name or e.name.startswith(dirname))
88                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
89                    self.entries.insert(i, entry)
90                    return entry
91
92        # Likewise, if this is a deletion, and there is a rename of
93        # this entry (such as "rename A B; remove B") then turn the
94        # existing rename into a deletion instead.
95
96        # If instead the removed entry was added by the same patch,
97        # annihilate the two: a bug in darcs (possibly fixed in recent
98        # versions) created patches with ADD+EDIT+REMOVE of a single
99        # file (see tailor ticket #71, or darcs issue185). Too bad
100        # another bug (still present in 1.0.8) hides that and makes
101        # very hard (read: impossible) any workaround on the tailor
102        # side. Luckily I learnt another tiny bit of Haskell and
103        # proposed a fix for that: hopefully the patch will be
104        # accepted by darcs developers. In the meantime, I attached it
105        # to ticket #71: without that, tailor does not have enough
106        # information to do the right thing.
107
108        elif entry.action_kind == entry.DELETED:
109            for i,e in enumerate(self.entries):
110                if e.action_kind == e.RENAMED and e.name == entry.name:
111                    e.action_kind = e.DELETED
112                    e.name = e.old_name
113                    e.old_name = None
114                    return e
115                elif e.action_kind == e.ADDED and e.name == entry.name:
116                    del self.entries[i]
117                    return None
118
119        # Ok, it must be either an edit or a rename: the former goes
120        # obviously to the end, and since the latter, as said, come
121        # in very early, appending is just good.
122        self.entries.append(entry)
123        return entry
124
125
126def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
127                                 chunksize=2**15):
128    """
129    Parse XML output of ``darcs changes``.
130
131    Return a list of ``Changeset`` instances.
132
133    Filters out the (currently incorrect) tag info from
134    changesets_from_darcschanges_unsafe.
135    """
136
137    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
138                                                repodir, chunksize)
139    for cs in csets:
140        yield cs
141
142def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
143                                        chunksize=2**15):
144    """
145    Do the real work of parsing the change log, including tags.
146    Warning: the tag information in the changsets returned by this
147    function are only correct if each darcs tag in the repo depends on
148    all of the patches that precede it.  This is not a valid
149    assumption in general--a tag that does not depend on patch P can
150    be pulled in from another darcs repo after P.  We collect the tag
151    info anyway because DarcsWorkingDir._currentTags() can use it
152    safely despite this problem.  Hopefully the problem will
153    eventually be fixed and this function can be renamed
154    changesets_from_darcschanges.
155    """
156    from xml.sax import make_parser
157    from xml.sax.handler import ContentHandler, ErrorHandler
158    from datetime import datetime
159
160    class DarcsXMLChangesHandler(ContentHandler):
161        def __init__(self):
162            self.changesets = []
163            self.current = None
164            self.current_field = []
165            if unidiff and repodir:
166                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
167                       "--patch", "%(patchname)s"]
168                self.darcsdiff = ExternalCommand(command=cmd)
169            else:
170                self.darcsdiff = None
171
172        def startElement(self, name, attributes):
173            if name == 'patch':
174                self.current = {}
175                self.current['author'] = attributes['author']
176                date = attributes['date']
177                from time import strptime
178                try:
179                    # 20040619130027
180                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
181                except ValueError:
182                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
183                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
184
185                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
186
187                self.current['date'] = timestamp
188                self.current['comment'] = ''
189                self.current['hash'] = attributes['hash']
190                self.current['entries'] = []
191            elif name in ['name', 'comment', 'add_file', 'add_directory',
192                          'modify_file', 'remove_file', 'remove_directory']:
193                self.current_field = []
194            elif name == 'move':
195                self.old_name = attributes['from']
196                self.new_name = attributes['to']
197
198        def endElement(self, name):
199            if name == 'patch':
200                cset = DarcsChangeset(self.current['name'],
201                                      self.current['date'],
202                                      self.current['author'],
203                                      self.current['comment'],
204                                      self.current['entries'],
205                                      tags=self.current.get('tags',[]))
206                cset.darcs_hash = self.current['hash']
207                if self.darcsdiff:
208                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
209                        stdout=PIPE, patchname=cset.revision)[0].read()
210
211                self.changesets.append(cset)
212                self.current = None
213            elif name in ['name', 'comment']:
214                val = ''.join(self.current_field)
215                if val[:4] == 'TAG ':
216                    self.current.setdefault('tags',[]).append(val[4:])
217                self.current[name] = val
218            elif name == 'move':
219                entry = ChangesetEntry(self.new_name)
220                entry.action_kind = entry.RENAMED
221                entry.old_name = self.old_name
222                self.current['entries'].append(entry)
223            elif name in ['add_file', 'add_directory', 'modify_file',
224                          'remove_file', 'remove_directory']:
225                entry = ChangesetEntry(''.join(self.current_field).strip())
226                entry.action_kind = { 'add_file': entry.ADDED,
227                                      'add_directory': entry.ADDED,
228                                      'modify_file': entry.UPDATED,
229                                      'remove_file': entry.DELETED,
230                                      'remove_directory': entry.DELETED
231                                    }[name]
232
233                self.current['entries'].append(entry)
234
235        def characters(self, data):
236            self.current_field.append(data)
237
238    parser = make_parser()
239    handler = DarcsXMLChangesHandler()
240    parser.setContentHandler(handler)
241    parser.setErrorHandler(ErrorHandler())
242
243    chunk = changes.read(chunksize)
244    while chunk:
245        parser.feed(chunk)
246        for cs in handler.changesets:
247            yield cs
248        handler.changesets = []
249        chunk = changes.read(chunksize)
250    parser.close()
251    for cs in handler.changesets:
252        yield cs
253
254
255class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
256    """
257    A source working directory under ``darcs``.
258    """
259
260    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
261
262    def _getUpstreamChangesets(self, sincerev):
263        """
264        Do the actual work of fetching the upstream changeset.
265        """
266
267        cmd = self.repository.command("pull", "--dry-run")
268        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
269        output = pull.execute(self.repository.repository,
270                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
271
272        if pull.exit_status:
273            raise GetUpstreamChangesetsFailure(
274                "%s returned status %d saying\n%s" %
275                (str(pull), pull.exit_status, output.read()))
276
277        return self._parseDarcsPull(output)
278
279
280    def _parseDarcsPull(self, output):
281        """Process 'darcs pull' output to Changesets.
282        """
283        from datetime import datetime
284        from time import strptime
285        from sha import new
286        from vcpx.changes import Changeset
287
288        l = output.readline()
289        while l and not (l.startswith('Would pull the following changes:') or
290                         l == 'No remote changes to pull in!\n'):
291            l = output.readline()
292
293        if l <> 'No remote changes to pull in!\n':
294            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
295            ##   * Refix _getUpstreamChangesets for darcs
296
297            l = output.readline()
298            while not l.startswith('Making no changes:  this is a dry run.'):
299                # Assume it's a line like
300                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
301                # we used to split on the double space before the email,
302                # but in this case this is wrong. Then we assumed the date
303                # part to be exactly 28 chars long, but what about timezone
304                # names like 'CEST'? Waiting for xml output...
305                # We still assume there are *two* spaces before the email.
306                # The alternative is using some sort of a regex: Aaron Kaplan
307                # kindly suggested his own perl snippet
308                #    /^(... ... .\d .\d:\d\d:\d\d ...?. \d\d\d\d)  (.*)/ || die;
309                #    my ($date, $author) = ($1, $2);
310                # but that assumes the two spaces as separator, so I find the
311                # following solution easier and by any chance faster too.               
312                pieces = l.rstrip().split('  ')
313                assert len(pieces)>1, "Cannot parse %r as a patch timestamp" % l
314                author = pieces.pop()
315                date = ' '.join(pieces)
316                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y")
317                date = datetime(y,m,d,hh,mm,ss,0,UTC)
318                l = output.readline().rstrip()
319                assert (l.startswith('  * ') or
320                        l.startswith('  UNDO:') or
321                        l.startswith('  tagged')), \
322                        "Got %r but expected the start of the log" % l
323
324                if l.startswith('  *'):
325                    name = l[4:]
326                else:
327                    name = l[2:]
328
329                changelog = []
330                l = output.readline()
331                while l.startswith('  '):
332                    changelog.append(l[2:-1])
333                    l = output.readline()
334
335                cset = Changeset(name, date, author, '\n'.join(changelog))
336                compactdate = date.strftime("%Y%m%d%H%M%S")
337                if name.startswith('UNDO: '):
338                    name = name[6:]
339                    inverted = 't'
340                else:
341                    inverted = 'f'
342
343                if name.startswith('tagged '):
344                    name = name[7:]
345                    if cset.tags is None:
346                        cset.tags = [name]
347                    else:
348                        cset.tags.append(name)
349                    name = "TAG " + name
350
351                phash = new()
352                phash.update(name)
353                phash.update(author)
354                phash.update(compactdate)
355                phash.update(''.join(changelog))
356                phash.update(inverted)
357                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
358                                                   new(author).hexdigest()[:5],
359                                                   phash.hexdigest())
360
361
362                yield cset
363
364                while not l.strip():
365                    l = output.readline()
366
367    def _applyChangeset(self, changeset):
368        """
369        Do the actual work of applying the changeset to the working copy.
370        """
371
372        needspatchesopt = False
373        if hasattr(changeset, 'darcs_hash'):
374            selector = '--match'
375            revtag = 'hash ' + changeset.darcs_hash
376        elif changeset.revision.startswith('tagged '):
377            selector = '--tag'
378            revtag = changeset.revision[7:]
379        else:
380            selector = '--match'
381            revtag = 'date "%s" && author "%s"' % (
382                changeset.date.strftime("%Y%m%d%H%M%S"),
383                changeset.author)
384            # The 'exact' matcher doesn't groke double quotes:
385            # """currently there is no provision for escaping a double
386            # quote, so you have to choose between matching double
387            # quotes and matching spaces"""
388            if not '"' in changeset.revision:
389                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
390            else:
391                needspatchesopt = True
392
393        cmd = self.repository.command("pull", "--all", "--quiet",
394                                      selector, revtag)
395
396        if needspatchesopt:
397            cmd.extend(['--patches', re.escape(changeset.revision)])
398
399        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
400        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
401
402        if pull.exit_status:
403            raise ChangesetApplicationFailure(
404                "%s returned status %d saying\n%s" %
405                (str(pull), pull.exit_status, output.read()))
406
407        conflicts = []
408        line = output.readline()
409        while line:
410            if line.startswith('We have conflicts in the following files:'):
411                files = output.readline()[:-1].split(' ')
412                self.log.warning("Conflict after 'darcs pull': %s",
413                                 ' '.join(files))
414                conflicts.extend(files)
415            line = output.readline()
416
417        cmd = self.repository.command("changes", selector, revtag,
418                                      "--xml-output", "--summ")
419        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
420        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0])
421        try:
422            changeset.entries.extend(last.next().entries)
423        except StopIteration:
424            pass
425
426        return conflicts
427
428    def _handleConflict(self, changeset, conflicts, conflict):
429        """
430        Handle the conflict raised by the application of the upstream changeset.
431
432        Override parent behaviour: with darcs, we need to execute a revert
433        on the conflicted files, **trashing** local changes, but there should
434        be none of them in tailor context.
435        """
436
437        self.log.info("Reverting changes to %s, to solve the conflict",
438                      ' '.join(conflict))
439        cmd = self.repository.command("revert", "--all")
440        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
441        revert.execute(conflict)
442
443    def _checkoutUpstreamRevision(self, revision):
444        """
445        Concretely do the checkout of the upstream revision and return
446        the last applied changeset.
447        """
448
449        from os.path import join, exists
450        from os import mkdir
451        from vcpx.source import InvocationError
452
453        if not self.repository.repository:
454            raise InvocationError("Must specify a the darcs source repository")
455
456        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
457            initial = True
458
459            if revision == 'INITIAL':
460                cmd = self.repository.command("changes", "--xml-output",
461                                              "--repo", self.repository.repository,
462                                               "--reverse")
463                changes = ExternalCommand(command=cmd)
464                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
465
466                if changes.exit_status:
467                    raise ChangesetApplicationFailure(
468                        "%s returned status %d saying\n%s" %
469                        (str(changes), changes.exit_status,
470                         output and output.read() or ''))
471
472                csets = changesets_from_darcschanges(output)
473                changeset = csets.next()
474
475                revision = 'hash %s' % changeset.darcs_hash
476            else:
477                revision = 'hash %s' % revision
478        else:
479            initial = False
480
481        if self.repository.subdir == '.' or exists(self.repository.basedir):
482            # This is currently *very* slow, compared to the darcs get
483            # below!
484            if not exists(join(self.repository.basedir, '_darcs')):
485                if not exists(self.repository.basedir):
486                    mkdir(self.repository.basedir)
487
488                cmd = self.repository.command("initialize")
489                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
490                init.execute()
491
492                if init.exit_status:
493                    raise TargetInitializationFailure(
494                        "%s returned status %s" % (str(init),
495                                                   init.exit_status))
496
497                cmd = self.repository.command("pull", "--all", "--quiet")
498                if revision and revision<>'HEAD':
499                    cmd.extend([initial and "--match" or "--tag", revision])
500                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
501                output = dpull.execute(self.repository.repository,
502                                       stdout=PIPE, stderr=STDOUT)[0]
503
504                if dpull.exit_status:
505                    raise TargetInitializationFailure(
506                        "%s returned status %d saying\n%s" %
507                        (str(dpull), dpull.exit_status, output.read()))
508        else:
509            # Use much faster 'darcs get'
510            cmd = self.repository.command("get", "--quiet")
511            if revision and revision<>'HEAD':
512                cmd.extend([initial and "--to-match" or "--tag", revision])
513            else:
514                cmd.append("--partial")
515            dget = ExternalCommand(command=cmd)
516            output = dget.execute(self.repository.repository, self.repository.basedir,
517                                  stdout=PIPE, stderr=STDOUT)[0]
518
519            if dget.exit_status:
520                raise TargetInitializationFailure(
521                    "%s returned status %d saying\n%s" %
522                    (str(dget), dget.exit_status, output.read()))
523
524        cmd = self.repository.command("changes", "--last", "1",
525                                      "--xml-output")
526        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
527        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
528
529        if changes.exit_status:
530            raise ChangesetApplicationFailure(
531                "%s returned status %d saying\n%s" %
532                (str(changes), changes.exit_status, output.read()))
533
534        last = changesets_from_darcschanges(output)
535
536        return last.next()
Note: See TracBrowser for help on using the repository browser.