source: tailor/vcpx/repository/darcs/source.py @ 1498

Revision 1498, 24.3 KB checked in by lele@…, 5 years ago (diff)

Use the newer 'darcs pull --xml-output'
Now that "darcs pull" accepts the "--xml-output" option, use that
instead of re-computing the patch hash from the normal "pull" output.

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        - collapse "rename A B; rename B C" into "rename A C"
50        """
51
52        # This should not happen, since the parser feeds us an already built
53        # list of ChangesetEntries, anyway...
54        if not isinstance(entry, ChangesetEntry):
55            return super(DarcsChangeset, self).addEntry(entry, revision)
56
57        # Ok, before adding this entry, check it against already
58        # known: if this is an add, and there's a rename (such as "add
59        # A; rename A B; ") then...
60
61        if entry.action_kind == entry.ADDED:
62            # ... we have to check existings, because of a bug in
63            # darcs: `changes --xml` (as of 1.0.7) emits the changes
64            # in the wrong order, that is, it prefers to start with
65            # renames, *always*, even when they obviously follows the
66            # add of the same entry (even, it should apply this "fix"
67            # by its own).
68            #
69            # So, if there's a rename of this entry there, change that
70            # to an addition instead, and don't insert any other entry
71
72            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
73
74            for i,e in enumerate(self.entries):
75                if e.action_kind == e.RENAMED and e.old_name == entry.name:
76                    # Unfortunately we have to check if the order if
77                    # messed up, in that case we should not do anything.
78                    # Example: mv a a2; mkdir a; mv a2 a/b
79                    skip = False
80                    for j in self.entries:
81                        if j.action_kind == j.RENAMED and j.name.startswith(dirname):
82                            skip = True
83                            break
84                    # Luckily enough (since removes are the first entries
85                    # in the list, that is) by anticipating the add we
86                    # cure also the case below, when addition follows
87                    # edit.
88                    if not skip:
89                        e.action_kind = e.ADDED
90                        e.old_name = None
91                        return e
92
93                # Assert also that add_dir events must preceeds any
94                # add_file and ren_file that have that dir as target,
95                # and that add_file preceeds any edit.
96
97                if ((e.name == entry.name or e.name.startswith(dirname))
98                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
99                    self.entries.insert(i, entry)
100                    return entry
101
102        # Likewise, if this is a deletion, and there is a rename of
103        # this entry (such as "rename A B; remove B") then turn the
104        # existing rename into a deletion instead.
105
106        # If instead the removed entry was added by the same patch,
107        # annihilate the two: a bug in darcs (possibly fixed in recent
108        # versions) created patches with ADD+EDIT+REMOVE of a single
109        # file (see tailor ticket #71, or darcs issue185). Too bad
110        # another bug (still present in 1.0.8) hides that and makes
111        # very hard (read: impossible) any workaround on the tailor
112        # side. Luckily I learnt another tiny bit of Haskell and
113        # proposed a fix for that: hopefully the patch will be
114        # accepted by darcs developers. In the meantime, I attached it
115        # to ticket #71: without that, tailor does not have enough
116        # information to do the right thing.
117
118        elif entry.action_kind == entry.DELETED:
119            for i,e in enumerate(self.entries):
120                if e.action_kind == e.RENAMED and e.name == entry.name:
121                    e.action_kind = e.DELETED
122                    e.name = e.old_name
123                    e.old_name = None
124                    return e
125                elif e.action_kind == e.ADDED and e.name == entry.name:
126                    del self.entries[i]
127                    return None
128
129        # The "rename A B; rename B C" to "rename A C" part
130        elif entry.action_kind == entry.RENAMED:
131            for i in self.entries:
132                if i.action_kind == i.RENAMED and i.name == entry.old_name:
133                    i.name = entry.name
134                    return i
135
136        # Ok, it must be either an edit or a rename: the former goes
137        # obviously to the end, and since the latter, as said, come
138        # in very early, appending is just good.
139        self.entries.append(entry)
140        return entry
141
142
143def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
144                                 chunksize=2**15, replace_badchars=None):
145    """
146    Parse XML output of ``darcs changes``.
147
148    Return a list of ``Changeset`` instances.
149
150    Filters out the (currently incorrect) tag info from
151    changesets_from_darcschanges_unsafe.
152    """
153
154    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
155                                                repodir, chunksize,
156                                                replace_badchars)
157    for cs in csets:
158        yield cs
159
160def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
161                                        chunksize=2**15, replace_badchars=None):
162    """
163    Do the real work of parsing the change log, including tags.
164    Warning: the tag information in the changsets returned by this
165    function are only correct if each darcs tag in the repo depends on
166    all of the patches that precede it.  This is not a valid
167    assumption in general--a tag that does not depend on patch P can
168    be pulled in from another darcs repo after P.  We collect the tag
169    info anyway because DarcsWorkingDir._currentTags() can use it
170    safely despite this problem.  Hopefully the problem will
171    eventually be fixed and this function can be renamed
172    changesets_from_darcschanges.
173    """
174    from xml.sax import make_parser
175    from xml.sax.handler import ContentHandler, ErrorHandler
176    from datetime import datetime
177
178    class DarcsXMLChangesHandler(ContentHandler):
179        def __init__(self):
180            self.changesets = []
181            self.current = None
182            self.current_field = []
183            if unidiff and repodir:
184                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
185                       "--patch", "%(patchname)s"]
186                self.darcsdiff = ExternalCommand(command=cmd)
187            else:
188                self.darcsdiff = None
189
190        def startElement(self, name, attributes):
191            if name == 'patch':
192                self.current = {}
193                self.current['author'] = attributes['author']
194                date = attributes['date']
195                from time import strptime
196                try:
197                    # 20040619130027
198                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
199                except ValueError:
200                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
201                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
202
203                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
204
205                self.current['date'] = timestamp
206                self.current['comment'] = ''
207                self.current['hash'] = attributes['hash']
208                self.current['entries'] = []
209                self.inverted = (attributes['inverted'] == "True")
210            elif name in ['name', 'comment', 'add_file', 'add_directory',
211                          'modify_file', 'remove_file', 'remove_directory']:
212                self.current_field = []
213            elif name == 'move':
214                self.old_name = attributes['from']
215                self.new_name = attributes['to']
216
217        def endElement(self, name):
218            if name == 'patch':
219                cset = DarcsChangeset(self.current['name'],
220                                      self.current['date'],
221                                      self.current['author'],
222                                      self.current['comment'],
223                                      self.current['entries'],
224                                      tags=self.current.get('tags',[]))
225                cset.darcs_hash = self.current['hash']
226                if self.darcsdiff:
227                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
228                        stdout=PIPE, patchname=cset.revision)[0].read()
229
230                self.changesets.append(cset)
231                self.current = None
232            elif name in ['name', 'comment']:
233                val = ''.join(self.current_field)
234                if val[:4] == 'TAG ':
235                    self.current.setdefault('tags',[]).append(val[4:])
236                self.current[name] = val
237            elif name == 'move':
238                entry = ChangesetEntry(self.new_name)
239                entry.action_kind = entry.RENAMED
240                entry.old_name = self.old_name
241                self.current['entries'].append(entry)
242            elif name in ['add_file', 'add_directory', 'modify_file',
243                          'remove_file', 'remove_directory']:
244                current_field = ''.join(self.current_field).strip()
245                if self.inverted:
246                    # the filenames in file modifications are outdated
247                    # if there are renames
248                    for i in self.current['entries']:
249                        if i.action_kind == i.RENAMED and current_field.startswith(i.old_name):
250                            current_field = current_field.replace(i.old_name, i.name)
251                entry = ChangesetEntry(current_field)
252                entry.action_kind = { 'add_file': entry.ADDED,
253                                      'add_directory': entry.ADDED,
254                                      'modify_file': entry.UPDATED,
255                                      'remove_file': entry.DELETED,
256                                      'remove_directory': entry.DELETED
257                                    }[name]
258
259                self.current['entries'].append(entry)
260
261        def characters(self, data):
262            self.current_field.append(data)
263
264    parser = make_parser()
265    handler = DarcsXMLChangesHandler()
266    parser.setContentHandler(handler)
267    parser.setErrorHandler(ErrorHandler())
268
269    def fixup_badchars(s, map):
270        if not map:
271            return s
272
273        ret = [map.get(c, c) for c in s]
274        return "".join(ret)
275
276    chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
277    while chunk:
278        parser.feed(chunk)
279        for cs in handler.changesets:
280            yield cs
281        handler.changesets = []
282        chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
283    parser.close()
284    for cs in handler.changesets:
285        yield cs
286
287
288class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
289    """
290    A source working directory under ``darcs``.
291    """
292
293    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
294
295    def _getUpstreamChangesets(self, sincerev):
296        """
297        Do the actual work of fetching the upstream changeset.
298        """
299
300        # Use the newer pull --xml-output, if possible
301        cmd = self.repository.command("pull", "--dry-run", "--xml-output")
302        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
303        output = pull.execute(self.repository.repository,
304                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
305        if pull.exit_status and "unrecognized option `--xml-output'" in output:
306            # No way, fall back to old behaviour, that will possibly fail,
307            # on patches recorded before 2003-11-01... :-|
308            cmd = self.repository.command("pull", "--dry-run")
309            pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
310            output = pull.execute(self.repository.repository,
311                                  stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
312
313            if pull.exit_status:
314                raise GetUpstreamChangesetsFailure(
315                    "%s returned status %d saying\n%s" %
316                    (str(pull), pull.exit_status, output.read()))
317
318            return self._parseDarcsPull(output)
319        else:
320            # Skip initial verbosity, as well as the one at end
321            from cStringIO import StringIO
322
323            output.readline() # Would pull from "/home/lele/wip/darcs-2.0"...
324            output.readline() # Would pull the following changes:
325            xml = StringIO(''.join(output.readlines()[:-2]))
326            xml.seek(0)
327            badchars = self.repository.replace_badchars
328            return changesets_from_darcschanges(xml, replace_badchars=badchars)
329
330    def _parseDarcsPull(self, output):
331        """Process 'darcs pull' output to Changesets.
332        """
333        from datetime import datetime
334        from time import strptime
335        from sha import new
336        from vcpx.changes import Changeset
337
338        l = output.readline()
339        while l and not (l.startswith('Would pull the following changes:') or
340                         l == 'No remote changes to pull in!\n'):
341            l = output.readline()
342
343        if l <> 'No remote changes to pull in!\n':
344            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
345            ##   * Refix _getUpstreamChangesets for darcs
346
347            fsep = re.compile('[ :]+')
348            l = output.readline()
349            while not l.startswith('Making no changes:  this is a dry run.'):
350                # Assume it's a line like
351                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
352                # Use a regular expression matching multiple spaces or colons
353                # to split it, and use the first 7 fields to build up a datetime.
354                pieces = fsep.split(l.rstrip(), 8)
355                assert len(pieces)>=7, "Cannot parse %r as a patch timestamp" % l
356                date = ' '.join(pieces[:8])
357                try:
358                    author = pieces[8]
359                except IndexError, s:
360                    # darcs allows patches with empty author
361                    author = ""
362                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H %M %S %Z %Y")
363                date = datetime(y,m,d,hh,mm,ss,0,UTC)
364                l = output.readline().rstrip()
365                assert (l.startswith('  *') or
366                        l.startswith('  UNDO:') or
367                        l.startswith('  tagged')), \
368                        "Got %r but expected the start of the log" % l
369
370                if l.startswith('  *'):
371                    name = l[4:]
372                else:
373                    name = l[2:]
374
375                changelog = []
376                l = output.readline()
377                while l.startswith('  '):
378                    changelog.append(l[2:-1])
379                    l = output.readline()
380
381                cset = Changeset(name, date, author, '\n'.join(changelog))
382                compactdate = date.strftime("%Y%m%d%H%M%S")
383                if name.startswith('UNDO: '):
384                    name = name[6:]
385                    inverted = 't'
386                else:
387                    inverted = 'f'
388
389                if name.startswith('tagged '):
390                    name = name[7:]
391                    if cset.tags is None:
392                        cset.tags = [name]
393                    else:
394                        cset.tags.append(name)
395                    name = "TAG " + name
396
397                phash = new()
398                phash.update(name)
399                phash.update(author)
400                phash.update(compactdate)
401                phash.update(''.join(changelog))
402                phash.update(inverted)
403                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
404                                                   new(author).hexdigest()[:5],
405                                                   phash.hexdigest())
406
407
408                yield cset
409
410                while not l.strip():
411                    l = output.readline()
412
413    def _applyChangeset(self, changeset):
414        """
415        Do the actual work of applying the changeset to the working copy.
416        """
417
418        needspatchesopt = False
419        if hasattr(changeset, 'darcs_hash'):
420            selector = '--match'
421            revtag = 'hash ' + changeset.darcs_hash
422        elif changeset.revision.startswith('tagged '):
423            selector = '--tag'
424            revtag = changeset.revision[7:]
425        else:
426            selector = '--match'
427            revtag = 'date "%s" && author "%s"' % (
428                changeset.date.strftime("%Y%m%d%H%M%S"),
429                changeset.author)
430            # The 'exact' matcher doesn't groke double quotes:
431            # """currently there is no provision for escaping a double
432            # quote, so you have to choose between matching double
433            # quotes and matching spaces"""
434            if not '"' in changeset.revision:
435                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
436            else:
437                needspatchesopt = True
438
439        cmd = self.repository.command("pull", "--all", "--quiet",
440                                      selector, revtag)
441
442        if needspatchesopt:
443            cmd.extend(['--patches', re.escape(changeset.revision)])
444
445        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
446        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
447
448        if pull.exit_status:
449            raise ChangesetApplicationFailure(
450                "%s returned status %d saying\n%s" %
451                (str(pull), pull.exit_status, output.read()))
452
453        conflicts = []
454        line = output.readline()
455        while line:
456            if line.startswith('We have conflicts in the following files:'):
457                files = output.readline()[:-1].split(' ')
458                self.log.warning("Conflict after 'darcs pull': %s",
459                                 ' '.join(files))
460                conflicts.extend(files)
461            line = output.readline()
462
463        cmd = self.repository.command("changes", selector, revtag,
464                                      "--xml-output", "--summ")
465        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
466        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0],
467                                            replace_badchars=self.repository.replace_badchars)
468        try:
469            changeset.entries.extend(last.next().entries)
470        except StopIteration:
471            pass
472
473        return conflicts
474
475    def _handleConflict(self, changeset, conflicts, conflict):
476        """
477        Handle the conflict raised by the application of the upstream changeset.
478
479        Override parent behaviour: with darcs, we need to execute a revert
480        on the conflicted files, **trashing** local changes, but there should
481        be none of them in tailor context.
482        """
483
484        self.log.info("Reverting changes to %s, to solve the conflict",
485                      ' '.join(conflict))
486        cmd = self.repository.command("revert", "--all")
487        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
488        revert.execute(conflict, input="\n")
489
490    def _checkoutUpstreamRevision(self, revision):
491        """
492        Concretely do the checkout of the upstream revision and return
493        the last applied changeset.
494        """
495
496        from os.path import join, exists
497        from os import mkdir
498        from vcpx.source import InvocationError
499
500        if not self.repository.repository:
501            raise InvocationError("Must specify a the darcs source repository")
502
503        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
504            initial = True
505
506            if revision == 'INITIAL':
507                cmd = self.repository.command("changes", "--xml-output",
508                                              "--repo", self.repository.repository,
509                                               "--reverse")
510                changes = ExternalCommand(command=cmd)
511                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
512
513                if changes.exit_status:
514                    raise ChangesetApplicationFailure(
515                        "%s returned status %d saying\n%s" %
516                        (str(changes), changes.exit_status,
517                         output and output.read() or ''))
518
519                csets = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
520                changeset = csets.next()
521
522                revision = 'hash %s' % changeset.darcs_hash
523            else:
524                revision = 'hash %s' % revision
525        else:
526            initial = False
527
528        if self.repository.subdir == '.' or exists(self.repository.basedir):
529            # This is currently *very* slow, compared to the darcs get
530            # below!
531            if not exists(join(self.repository.basedir, '_darcs')):
532                if not exists(self.repository.basedir):
533                    mkdir(self.repository.basedir)
534
535                cmd = self.repository.command("initialize")
536                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
537                init.execute()
538
539                if init.exit_status:
540                    raise TargetInitializationFailure(
541                        "%s returned status %s" % (str(init),
542                                                   init.exit_status))
543
544                cmd = self.repository.command("pull", "--all", "--quiet")
545                if revision and revision<>'HEAD':
546                    cmd.extend([initial and "--match" or "--tag", revision])
547                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
548                output = dpull.execute(self.repository.repository,
549                                       stdout=PIPE, stderr=STDOUT)[0]
550
551                if dpull.exit_status:
552                    raise TargetInitializationFailure(
553                        "%s returned status %d saying\n%s" %
554                        (str(dpull), dpull.exit_status, output.read()))
555        else:
556            # Use much faster 'darcs get'
557            cmd = self.repository.command("get", "--quiet")
558            if revision and revision<>'HEAD':
559                cmd.extend([initial and "--to-match" or "--tag", revision])
560            else:
561                cmd.append("--partial")
562            dget = ExternalCommand(command=cmd)
563            output = dget.execute(self.repository.repository, self.repository.basedir,
564                                  stdout=PIPE, stderr=STDOUT)[0]
565
566            if dget.exit_status:
567                raise TargetInitializationFailure(
568                    "%s returned status %d saying\n%s" %
569                    (str(dget), dget.exit_status, output.read()))
570
571        cmd = self.repository.command("changes", "--last", "1",
572                                      "--xml-output")
573        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
574        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
575
576        if changes.exit_status:
577            raise ChangesetApplicationFailure(
578                "%s returned status %d saying\n%s" %
579                (str(changes), changes.exit_status, output.read()))
580
581        last = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
582
583        return last.next()
Note: See TracBrowser for help on using the repository browser.