source: tailor/vcpx/repository/darcs/source.py @ 1332

Revision 1332, 21.8 KB checked in by John Goerzen <jgoerzen@…>, 6 years ago (diff)

Added initial support for not-quite-right tags from Darcs

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Luckily enough (since removes are the first entries
76                    # in the list, that is) by anticipating the add we
77                    # cure also the case below, when addition follows
78                    # edit.
79                    e.action_kind = e.ADDED
80                    e.old_name = None
81                    return e
82
83                # Assert also that add_dir events must preceeds any
84                # add_file and ren_file that have that dir as target,
85                # and that add_file preceeds any edit.
86
87                if ((e.name == entry.name or e.name.startswith(dirname))
88                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
89                    self.entries.insert(i, entry)
90                    return entry
91
92        # Likewise, if this is a deletion, and there is a rename of
93        # this entry (such as "rename A B; remove B") then turn the
94        # existing rename into a deletion instead.
95
96        # If instead the removed entry was added by the same patch,
97        # annihilate the two: a bug in darcs (possibly fixed in recent
98        # versions) created patches with ADD+EDIT+REMOVE of a single
99        # file (see tailor ticket #71, or darcs issue185). Too bad
100        # another bug (still present in 1.0.8) hides that and makes
101        # very hard (read: impossible) any workaround on the tailor
102        # side. Luckily I learnt another tiny bit of Haskell and
103        # proposed a fix for that: hopefully the patch will be
104        # accepted by darcs developers. In the meantime, I attached it
105        # to ticket #71: without that, tailor does not have enough
106        # information to do the right thing.
107
108        elif entry.action_kind == entry.DELETED:
109            for i,e in enumerate(self.entries):
110                if e.action_kind == e.RENAMED and e.name == entry.name:
111                    e.action_kind = e.DELETED
112                    e.name = e.old_name
113                    e.old_name = None
114                    return e
115                elif e.action_kind == e.ADDED and e.name == entry.name:
116                    del self.entries[i]
117                    return None
118
119        # Ok, it must be either an edit or a rename: the former goes
120        # obviously to the end, and since the latter, as said, come
121        # in very early, appending is just good.
122        self.entries.append(entry)
123        return entry
124
125
126def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
127                                 chunksize=2**15):
128    """
129    Parse XML output of ``darcs changes``.
130
131    Return a list of ``Changeset`` instances.
132
133    Filters out the (currently incorrect) tag info from
134    changesets_from_darcschanges_unsafe.
135    """
136
137    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
138                                                repodir, chunksize)
139    for cs in csets:
140        yield cs
141
142def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
143                                        chunksize=2**15):
144    """
145    Do the real work of parsing the change log, including tags.
146    Warning: the tag information in the changsets returned by this
147    function are only correct if each darcs tag in the repo depends on
148    all of the patches that precede it.  This is not a valid
149    assumption in general--a tag that does not depend on patch P can
150    be pulled in from another darcs repo after P.  We collect the tag
151    info anyway because DarcsWorkingDir._currentTags() can use it
152    safely despite this problem.  Hopefully the problem will
153    eventually be fixed and this function can be renamed
154    changesets_from_darcschanges.
155    """
156    from xml.sax import make_parser
157    from xml.sax.handler import ContentHandler, ErrorHandler
158    from datetime import datetime
159
160    class DarcsXMLChangesHandler(ContentHandler):
161        def __init__(self):
162            self.changesets = []
163            self.current = None
164            self.current_field = []
165            if unidiff and repodir:
166                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
167                       "--patch", "%(patchname)s"]
168                self.darcsdiff = ExternalCommand(command=cmd)
169            else:
170                self.darcsdiff = None
171
172        def startElement(self, name, attributes):
173            if name == 'patch':
174                self.current = {}
175                self.current['author'] = attributes['author']
176                date = attributes['date']
177                from time import strptime
178                try:
179                    # 20040619130027
180                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
181                except ValueError:
182                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
183                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
184
185                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
186
187                self.current['date'] = timestamp
188                self.current['comment'] = ''
189                self.current['hash'] = attributes['hash']
190                self.current['entries'] = []
191            elif name in ['name', 'comment', 'add_file', 'add_directory',
192                          'modify_file', 'remove_file', 'remove_directory']:
193                self.current_field = []
194            elif name == 'move':
195                self.old_name = attributes['from']
196                self.new_name = attributes['to']
197
198        def endElement(self, name):
199            if name == 'patch':
200                cset = DarcsChangeset(self.current['name'],
201                                      self.current['date'],
202                                      self.current['author'],
203                                      self.current['comment'],
204                                      self.current['entries'],
205                                      tags=self.current.get('tags',[]))
206                cset.darcs_hash = self.current['hash']
207                if self.darcsdiff:
208                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
209                        stdout=PIPE, patchname=cset.revision)[0].read()
210
211                self.changesets.append(cset)
212                self.current = None
213            elif name in ['name', 'comment']:
214                val = ''.join(self.current_field)
215                if val[:4] == 'TAG ':
216                    self.current.setdefault('tags',[]).append(val[4:])
217                self.current[name] = val
218            elif name == 'move':
219                entry = ChangesetEntry(self.new_name)
220                entry.action_kind = entry.RENAMED
221                entry.old_name = self.old_name
222                self.current['entries'].append(entry)
223            elif name in ['add_file', 'add_directory', 'modify_file',
224                          'remove_file', 'remove_directory']:
225                entry = ChangesetEntry(''.join(self.current_field).strip())
226                entry.action_kind = { 'add_file': entry.ADDED,
227                                      'add_directory': entry.ADDED,
228                                      'modify_file': entry.UPDATED,
229                                      'remove_file': entry.DELETED,
230                                      'remove_directory': entry.DELETED
231                                    }[name]
232
233                self.current['entries'].append(entry)
234
235        def characters(self, data):
236            self.current_field.append(data)
237
238    parser = make_parser()
239    handler = DarcsXMLChangesHandler()
240    parser.setContentHandler(handler)
241    parser.setErrorHandler(ErrorHandler())
242
243    chunk = changes.read(chunksize)
244    while chunk:
245        parser.feed(chunk)
246        for cs in handler.changesets:
247            yield cs
248        handler.changesets = []
249        chunk = changes.read(chunksize)
250    parser.close()
251    for cs in handler.changesets:
252        yield cs
253
254
255class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
256    """
257    A source working directory under ``darcs``.
258    """
259
260    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
261
262    def _getUpstreamChangesets(self, sincerev):
263        """
264        Do the actual work of fetching the upstream changeset.
265        """
266
267        cmd = self.repository.command("pull", "--dry-run")
268        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
269        output = pull.execute(self.repository.repository,
270                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
271
272        if pull.exit_status:
273            raise GetUpstreamChangesetsFailure(
274                "%s returned status %d saying\n%s" %
275                (str(pull), pull.exit_status, output.read()))
276
277        return self._parseDarcsPull(output)
278
279
280    def _parseDarcsPull(self, output):
281        """Process 'darcs pull' output to Changesets.
282        """
283        from datetime import datetime
284        from time import strptime
285        from sha import new
286        from vcpx.changes import Changeset
287
288        l = output.readline()
289        while l and not (l.startswith('Would pull the following changes:') or
290                         l == 'No remote changes to pull in!\n'):
291            l = output.readline()
292
293        if l <> 'No remote changes to pull in!\n':
294            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
295            ##   * Refix _getUpstreamChangesets for darcs
296
297            l = output.readline()
298            while not l.startswith('Making no changes:  this is a dry run.'):
299                # Assume it's a line like
300                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
301                # we used to split on the double space before the email,
302                # but in this case this is wrong. Then we assumed the date
303                # part to be exactly 28 chars long, but what about timezone
304                # names like 'CEST'? Waiting for xml output...
305                # We still assume there are *two* spaces before the email.
306                # The alternative is using some sort of a regex: Aaron Kaplan
307                # kindly suggested his own perl snippet
308                #    /^(... ... .\d .\d:\d\d:\d\d ...?. \d\d\d\d)  (.*)/ || die;
309                #    my ($date, $author) = ($1, $2);
310                # but that assumes the two spaces as separator, so I find the
311                # following solution easier and by any chance faster too.               
312                pieces = l.rstrip().split('  ')
313                assert len(pieces)>1, "Cannot parse %r as a patch timestamp" % l
314                author = pieces.pop()
315                date = ' '.join(pieces)
316                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y")
317                date = datetime(y,m,d,hh,mm,ss,0,UTC)
318                l = output.readline().rstrip()
319                assert (l.startswith('  * ') or
320                        l.startswith('  UNDO:') or
321                        l.startswith('  tagged')), \
322                        "Got %r but expected the start of the log" % l
323
324                if l.startswith('  *'):
325                    name = l[4:]
326                else:
327                    name = l[2:]
328
329                changelog = []
330                l = output.readline()
331                while l.startswith('  '):
332                    changelog.append(l[2:-1])
333                    l = output.readline()
334
335                cset = Changeset(name, date, author, '\n'.join(changelog))
336                compactdate = date.strftime("%Y%m%d%H%M%S")
337                if name.startswith('UNDO: '):
338                    name = name[6:]
339                    inverted = 't'
340                else:
341                    inverted = 'f'
342
343                if name.startswith('tagged '):
344                    name = name[7:]
345                    if cset.tags is None:
346                        cset.tags = [name]
347                    else:
348                        cset.tags.append(name)
349
350                phash = new()
351                phash.update(name)
352                phash.update(author)
353                phash.update(compactdate)
354                phash.update(''.join(changelog))
355                phash.update(inverted)
356                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
357                                                   new(author).hexdigest()[:5],
358                                                   phash.hexdigest())
359
360
361                yield cset
362
363                while not l.strip():
364                    l = output.readline()
365
366    def _applyChangeset(self, changeset):
367        """
368        Do the actual work of applying the changeset to the working copy.
369        """
370
371        needspatchesopt = False
372        if hasattr(changeset, 'darcs_hash'):
373            selector = '--match'
374            revtag = 'hash ' + changeset.darcs_hash
375        elif changeset.revision.startswith('tagged '):
376            selector = '--tag'
377            revtag = changeset.revision[7:]
378        else:
379            selector = '--match'
380            revtag = 'date "%s" && author "%s"' % (
381                changeset.date.strftime("%Y%m%d%H%M%S"),
382                changeset.author)
383            # The 'exact' matcher doesn't groke double quotes:
384            # """currently there is no provision for escaping a double
385            # quote, so you have to choose between matching double
386            # quotes and matching spaces"""
387            if not '"' in changeset.revision:
388                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
389            else:
390                needspatchesopt = True
391
392        cmd = self.repository.command("pull", "--all", "--quiet",
393                                      selector, revtag)
394
395        if needspatchesopt:
396            cmd.extend(['--patches', re.escape(changeset.revision)])
397
398        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
399        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
400
401        if pull.exit_status:
402            raise ChangesetApplicationFailure(
403                "%s returned status %d saying\n%s" %
404                (str(pull), pull.exit_status, output.read()))
405
406        conflicts = []
407        line = output.readline()
408        while line:
409            if line.startswith('We have conflicts in the following files:'):
410                files = output.readline()[:-1].split('./')[1:]
411                self.log.warning("Conflict after 'darcs pull': %s",
412                                 ' '.join(files))
413                conflicts.extend(['./' + f for f in files])
414            line = output.readline()
415
416        cmd = self.repository.command("changes", selector, revtag,
417                                      "--xml-output", "--summ")
418        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
419        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0])
420        try:
421            changeset.entries.extend(last.next().entries)
422        except StopIteration:
423            pass
424
425        return conflicts
426
427    def _handleConflict(self, changeset, conflicts, conflict):
428        """
429        Handle the conflict raised by the application of the upstream changeset.
430
431        Override parent behaviour: with darcs, we need to execute a revert
432        on the conflicted files, **trashing** local changes, but there should
433        be none of them in tailor context.
434        """
435
436        self.log.info("Reverting changes to %s, to solve the conflict",
437                      ' '.join(conflict))
438        cmd = self.repository.command("revert", "--all")
439        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
440        revert.execute(conflict)
441
442    def _checkoutUpstreamRevision(self, revision):
443        """
444        Concretely do the checkout of the upstream revision and return
445        the last applied changeset.
446        """
447
448        from os.path import join, exists
449        from os import mkdir
450        from vcpx.source import InvocationError
451
452        if not self.repository.repository:
453            raise InvocationError("Must specify a the darcs source repository")
454
455        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
456            initial = True
457
458            if revision == 'INITIAL':
459                cmd = self.repository.command("changes", "--xml-output",
460                                              "--repo", self.repository.repository,
461                                               "--reverse")
462                changes = ExternalCommand(command=cmd)
463                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
464
465                if changes.exit_status:
466                    raise ChangesetApplicationFailure(
467                        "%s returned status %d saying\n%s" %
468                        (str(changes), changes.exit_status,
469                         output and output.read() or ''))
470
471                csets = changesets_from_darcschanges(output)
472                changeset = csets.next()
473
474                revision = 'hash %s' % changeset.darcs_hash
475            else:
476                revision = 'hash %s' % revision
477        else:
478            initial = False
479
480        if self.repository.subdir == '.' or exists(self.repository.basedir):
481            # This is currently *very* slow, compared to the darcs get
482            # below!
483            if not exists(join(self.repository.basedir, '_darcs')):
484                if not exists(self.repository.basedir):
485                    mkdir(self.repository.basedir)
486
487                cmd = self.repository.command("initialize")
488                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
489                init.execute()
490
491                if init.exit_status:
492                    raise TargetInitializationFailure(
493                        "%s returned status %s" % (str(init),
494                                                   init.exit_status))
495
496                cmd = self.repository.command("pull", "--all", "--quiet")
497                if revision and revision<>'HEAD':
498                    cmd.extend([initial and "--match" or "--tag", revision])
499                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
500                output = dpull.execute(self.repository.repository,
501                                       stdout=PIPE, stderr=STDOUT)[0]
502
503                if dpull.exit_status:
504                    raise TargetInitializationFailure(
505                        "%s returned status %d saying\n%s" %
506                        (str(dpull), dpull.exit_status, output.read()))
507        else:
508            # Use much faster 'darcs get'
509            cmd = self.repository.command("get", "--quiet")
510            if revision and revision<>'HEAD':
511                cmd.extend([initial and "--to-match" or "--tag", revision])
512            else:
513                cmd.append("--partial")
514            dget = ExternalCommand(command=cmd)
515            output = dget.execute(self.repository.repository, self.repository.basedir,
516                                  stdout=PIPE, stderr=STDOUT)[0]
517
518            if dget.exit_status:
519                raise TargetInitializationFailure(
520                    "%s returned status %d saying\n%s" %
521                    (str(dget), dget.exit_status, output.read()))
522
523        cmd = self.repository.command("changes", "--last", "1",
524                                      "--xml-output")
525        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
526        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
527
528        if changes.exit_status:
529            raise ChangesetApplicationFailure(
530                "%s returned status %d saying\n%s" %
531                (str(changes), changes.exit_status, output.read()))
532
533        last = changesets_from_darcschanges(output)
534
535        return last.next()
Note: See TracBrowser for help on using the repository browser.