source: tailor/vcpx/repository/darcs/source.py @ 1469

Revision 1469, 23.1 KB checked in by Miklos Vajna <vmiklos@…>, 5 years ago (diff)

darcs source: collapse "rename A B; rename B C" into "rename A C"

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        - collapse "rename A B; rename B C" into "rename A C"
50        """
51
52        # This should not happen, since the parser feeds us an already built
53        # list of ChangesetEntries, anyway...
54        if not isinstance(entry, ChangesetEntry):
55            return super(DarcsChangeset, self).addEntry(entry, revision)
56
57        # Ok, before adding this entry, check it against already
58        # known: if this is an add, and there's a rename (such as "add
59        # A; rename A B; ") then...
60
61        if entry.action_kind == entry.ADDED:
62            # ... we have to check existings, because of a bug in
63            # darcs: `changes --xml` (as of 1.0.7) emits the changes
64            # in the wrong order, that is, it prefers to start with
65            # renames, *always*, even when they obviously follows the
66            # add of the same entry (even, it should apply this "fix"
67            # by its own).
68            #
69            # So, if there's a rename of this entry there, change that
70            # to an addition instead, and don't insert any other entry
71
72            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
73
74            for i,e in enumerate(self.entries):
75                if e.action_kind == e.RENAMED and e.old_name == entry.name:
76                    # Unfortunately we have to check if the order if
77                    # messed up, in that case we should not do anything.
78                    # Example: mv a a2; mkdir a; mv a2 a/b
79                    skip = False
80                    for j in self.entries:
81                        if j.action_kind == j.RENAMED and j.name.startswith(dirname):
82                            skip = True
83                            break
84                    # Luckily enough (since removes are the first entries
85                    # in the list, that is) by anticipating the add we
86                    # cure also the case below, when addition follows
87                    # edit.
88                    if not skip:
89                        e.action_kind = e.ADDED
90                        e.old_name = None
91                        return e
92
93                # Assert also that add_dir events must preceeds any
94                # add_file and ren_file that have that dir as target,
95                # and that add_file preceeds any edit.
96
97                if ((e.name == entry.name or e.name.startswith(dirname))
98                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
99                    self.entries.insert(i, entry)
100                    return entry
101
102        # Likewise, if this is a deletion, and there is a rename of
103        # this entry (such as "rename A B; remove B") then turn the
104        # existing rename into a deletion instead.
105
106        # If instead the removed entry was added by the same patch,
107        # annihilate the two: a bug in darcs (possibly fixed in recent
108        # versions) created patches with ADD+EDIT+REMOVE of a single
109        # file (see tailor ticket #71, or darcs issue185). Too bad
110        # another bug (still present in 1.0.8) hides that and makes
111        # very hard (read: impossible) any workaround on the tailor
112        # side. Luckily I learnt another tiny bit of Haskell and
113        # proposed a fix for that: hopefully the patch will be
114        # accepted by darcs developers. In the meantime, I attached it
115        # to ticket #71: without that, tailor does not have enough
116        # information to do the right thing.
117
118        elif entry.action_kind == entry.DELETED:
119            for i,e in enumerate(self.entries):
120                if e.action_kind == e.RENAMED and e.name == entry.name:
121                    e.action_kind = e.DELETED
122                    e.name = e.old_name
123                    e.old_name = None
124                    return e
125                elif e.action_kind == e.ADDED and e.name == entry.name:
126                    del self.entries[i]
127                    return None
128
129        # The "rename A B; rename B C" to "rename A C" part
130        elif entry.action_kind == entry.RENAMED:
131            for i in self.entries:
132                if i.action_kind == i.RENAMED and i.name == entry.old_name:
133                    i.name = entry.name
134                    return i
135
136        # Ok, it must be either an edit or a rename: the former goes
137        # obviously to the end, and since the latter, as said, come
138        # in very early, appending is just good.
139        self.entries.append(entry)
140        return entry
141
142
143def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
144                                 chunksize=2**15, replace_badchars=None):
145    """
146    Parse XML output of ``darcs changes``.
147
148    Return a list of ``Changeset`` instances.
149
150    Filters out the (currently incorrect) tag info from
151    changesets_from_darcschanges_unsafe.
152    """
153
154    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
155                                                repodir, chunksize,
156                                                replace_badchars)
157    for cs in csets:
158        yield cs
159
160def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
161                                        chunksize=2**15, replace_badchars=None):
162    """
163    Do the real work of parsing the change log, including tags.
164    Warning: the tag information in the changsets returned by this
165    function are only correct if each darcs tag in the repo depends on
166    all of the patches that precede it.  This is not a valid
167    assumption in general--a tag that does not depend on patch P can
168    be pulled in from another darcs repo after P.  We collect the tag
169    info anyway because DarcsWorkingDir._currentTags() can use it
170    safely despite this problem.  Hopefully the problem will
171    eventually be fixed and this function can be renamed
172    changesets_from_darcschanges.
173    """
174    from xml.sax import make_parser
175    from xml.sax.handler import ContentHandler, ErrorHandler
176    from datetime import datetime
177
178    class DarcsXMLChangesHandler(ContentHandler):
179        def __init__(self):
180            self.changesets = []
181            self.current = None
182            self.current_field = []
183            if unidiff and repodir:
184                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
185                       "--patch", "%(patchname)s"]
186                self.darcsdiff = ExternalCommand(command=cmd)
187            else:
188                self.darcsdiff = None
189
190        def startElement(self, name, attributes):
191            if name == 'patch':
192                self.current = {}
193                self.current['author'] = attributes['author']
194                date = attributes['date']
195                from time import strptime
196                try:
197                    # 20040619130027
198                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
199                except ValueError:
200                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
201                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
202
203                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
204
205                self.current['date'] = timestamp
206                self.current['comment'] = ''
207                self.current['hash'] = attributes['hash']
208                self.current['entries'] = []
209                self.inverted = (attributes['inverted'] == "True")
210            elif name in ['name', 'comment', 'add_file', 'add_directory',
211                          'modify_file', 'remove_file', 'remove_directory']:
212                self.current_field = []
213            elif name == 'move':
214                self.old_name = attributes['from']
215                self.new_name = attributes['to']
216
217        def endElement(self, name):
218            if name == 'patch':
219                cset = DarcsChangeset(self.current['name'],
220                                      self.current['date'],
221                                      self.current['author'],
222                                      self.current['comment'],
223                                      self.current['entries'],
224                                      tags=self.current.get('tags',[]))
225                cset.darcs_hash = self.current['hash']
226                if self.darcsdiff:
227                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
228                        stdout=PIPE, patchname=cset.revision)[0].read()
229
230                self.changesets.append(cset)
231                self.current = None
232            elif name in ['name', 'comment']:
233                val = ''.join(self.current_field)
234                if val[:4] == 'TAG ':
235                    self.current.setdefault('tags',[]).append(val[4:])
236                self.current[name] = val
237            elif name == 'move':
238                entry = ChangesetEntry(self.new_name)
239                entry.action_kind = entry.RENAMED
240                entry.old_name = self.old_name
241                self.current['entries'].append(entry)
242            elif name in ['add_file', 'add_directory', 'modify_file',
243                          'remove_file', 'remove_directory']:
244                current_field = ''.join(self.current_field).strip()
245                if self.inverted:
246                    # the filenames in file modifications are outdated
247                    # if there are renames
248                    for i in self.current['entries']:
249                        if i.action_kind == i.RENAMED and current_field.startswith(i.old_name):
250                            current_field = current_field.replace(i.old_name, i.name)
251                entry = ChangesetEntry(current_field)
252                entry.action_kind = { 'add_file': entry.ADDED,
253                                      'add_directory': entry.ADDED,
254                                      'modify_file': entry.UPDATED,
255                                      'remove_file': entry.DELETED,
256                                      'remove_directory': entry.DELETED
257                                    }[name]
258
259                self.current['entries'].append(entry)
260
261        def characters(self, data):
262            self.current_field.append(data)
263
264    parser = make_parser()
265    handler = DarcsXMLChangesHandler()
266    parser.setContentHandler(handler)
267    parser.setErrorHandler(ErrorHandler())
268
269    def fixup_badchars(s, map):
270        if not map:
271            return s
272
273        ret = [map.get(c, c) for c in s]
274        return "".join(ret)
275
276    chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
277    while chunk:
278        parser.feed(chunk)
279        for cs in handler.changesets:
280            yield cs
281        handler.changesets = []
282        chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
283    parser.close()
284    for cs in handler.changesets:
285        yield cs
286
287
288class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
289    """
290    A source working directory under ``darcs``.
291    """
292
293    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
294
295    def _getUpstreamChangesets(self, sincerev):
296        """
297        Do the actual work of fetching the upstream changeset.
298        """
299
300        cmd = self.repository.command("pull", "--dry-run")
301        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
302        output = pull.execute(self.repository.repository,
303                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
304
305        if pull.exit_status:
306            raise GetUpstreamChangesetsFailure(
307                "%s returned status %d saying\n%s" %
308                (str(pull), pull.exit_status, output.read()))
309
310        return self._parseDarcsPull(output)
311
312
313    def _parseDarcsPull(self, output):
314        """Process 'darcs pull' output to Changesets.
315        """
316        from datetime import datetime
317        from time import strptime
318        from sha import new
319        from vcpx.changes import Changeset
320
321        l = output.readline()
322        while l and not (l.startswith('Would pull the following changes:') or
323                         l == 'No remote changes to pull in!\n'):
324            l = output.readline()
325
326        if l <> 'No remote changes to pull in!\n':
327            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
328            ##   * Refix _getUpstreamChangesets for darcs
329
330            fsep = re.compile('[ :]+')
331            l = output.readline()
332            while not l.startswith('Making no changes:  this is a dry run.'):
333                # Assume it's a line like
334                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
335                # Use a regular expression matching multiple spaces or colons
336                # to split it, and use the first 7 fields to build up a datetime.
337                pieces = fsep.split(l.rstrip(), 8)
338                assert len(pieces)>=7, "Cannot parse %r as a patch timestamp" % l
339                date = ' '.join(pieces[:8])
340                author = pieces[8]
341                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H %M %S %Z %Y")
342                date = datetime(y,m,d,hh,mm,ss,0,UTC)
343                l = output.readline().rstrip()
344                assert (l.startswith('  *') or
345                        l.startswith('  UNDO:') or
346                        l.startswith('  tagged')), \
347                        "Got %r but expected the start of the log" % l
348
349                if l.startswith('  *'):
350                    name = l[4:]
351                else:
352                    name = l[2:]
353
354                changelog = []
355                l = output.readline()
356                while l.startswith('  '):
357                    changelog.append(l[2:-1])
358                    l = output.readline()
359
360                cset = Changeset(name, date, author, '\n'.join(changelog))
361                compactdate = date.strftime("%Y%m%d%H%M%S")
362                if name.startswith('UNDO: '):
363                    name = name[6:]
364                    inverted = 't'
365                else:
366                    inverted = 'f'
367
368                if name.startswith('tagged '):
369                    name = name[7:]
370                    if cset.tags is None:
371                        cset.tags = [name]
372                    else:
373                        cset.tags.append(name)
374                    name = "TAG " + name
375
376                phash = new()
377                phash.update(name)
378                phash.update(author)
379                phash.update(compactdate)
380                phash.update(''.join(changelog))
381                phash.update(inverted)
382                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
383                                                   new(author).hexdigest()[:5],
384                                                   phash.hexdigest())
385
386
387                yield cset
388
389                while not l.strip():
390                    l = output.readline()
391
392    def _applyChangeset(self, changeset):
393        """
394        Do the actual work of applying the changeset to the working copy.
395        """
396
397        needspatchesopt = False
398        if hasattr(changeset, 'darcs_hash'):
399            selector = '--match'
400            revtag = 'hash ' + changeset.darcs_hash
401        elif changeset.revision.startswith('tagged '):
402            selector = '--tag'
403            revtag = changeset.revision[7:]
404        else:
405            selector = '--match'
406            revtag = 'date "%s" && author "%s"' % (
407                changeset.date.strftime("%Y%m%d%H%M%S"),
408                changeset.author)
409            # The 'exact' matcher doesn't groke double quotes:
410            # """currently there is no provision for escaping a double
411            # quote, so you have to choose between matching double
412            # quotes and matching spaces"""
413            if not '"' in changeset.revision:
414                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
415            else:
416                needspatchesopt = True
417
418        cmd = self.repository.command("pull", "--all", "--quiet",
419                                      selector, revtag)
420
421        if needspatchesopt:
422            cmd.extend(['--patches', re.escape(changeset.revision)])
423
424        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
425        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
426
427        if pull.exit_status:
428            raise ChangesetApplicationFailure(
429                "%s returned status %d saying\n%s" %
430                (str(pull), pull.exit_status, output.read()))
431
432        conflicts = []
433        line = output.readline()
434        while line:
435            if line.startswith('We have conflicts in the following files:'):
436                files = output.readline()[:-1].split(' ')
437                self.log.warning("Conflict after 'darcs pull': %s",
438                                 ' '.join(files))
439                conflicts.extend(files)
440            line = output.readline()
441
442        cmd = self.repository.command("changes", selector, revtag,
443                                      "--xml-output", "--summ")
444        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
445        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0],
446                                            replace_badchars=self.repository.replace_badchars)
447        try:
448            changeset.entries.extend(last.next().entries)
449        except StopIteration:
450            pass
451
452        return conflicts
453
454    def _handleConflict(self, changeset, conflicts, conflict):
455        """
456        Handle the conflict raised by the application of the upstream changeset.
457
458        Override parent behaviour: with darcs, we need to execute a revert
459        on the conflicted files, **trashing** local changes, but there should
460        be none of them in tailor context.
461        """
462
463        self.log.info("Reverting changes to %s, to solve the conflict",
464                      ' '.join(conflict))
465        cmd = self.repository.command("revert", "--all")
466        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
467        revert.execute(conflict)
468
469    def _checkoutUpstreamRevision(self, revision):
470        """
471        Concretely do the checkout of the upstream revision and return
472        the last applied changeset.
473        """
474
475        from os.path import join, exists
476        from os import mkdir
477        from vcpx.source import InvocationError
478
479        if not self.repository.repository:
480            raise InvocationError("Must specify a the darcs source repository")
481
482        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
483            initial = True
484
485            if revision == 'INITIAL':
486                cmd = self.repository.command("changes", "--xml-output",
487                                              "--repo", self.repository.repository,
488                                               "--reverse")
489                changes = ExternalCommand(command=cmd)
490                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
491
492                if changes.exit_status:
493                    raise ChangesetApplicationFailure(
494                        "%s returned status %d saying\n%s" %
495                        (str(changes), changes.exit_status,
496                         output and output.read() or ''))
497
498                csets = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
499                changeset = csets.next()
500
501                revision = 'hash %s' % changeset.darcs_hash
502            else:
503                revision = 'hash %s' % revision
504        else:
505            initial = False
506
507        if self.repository.subdir == '.' or exists(self.repository.basedir):
508            # This is currently *very* slow, compared to the darcs get
509            # below!
510            if not exists(join(self.repository.basedir, '_darcs')):
511                if not exists(self.repository.basedir):
512                    mkdir(self.repository.basedir)
513
514                cmd = self.repository.command("initialize")
515                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
516                init.execute()
517
518                if init.exit_status:
519                    raise TargetInitializationFailure(
520                        "%s returned status %s" % (str(init),
521                                                   init.exit_status))
522
523                cmd = self.repository.command("pull", "--all", "--quiet")
524                if revision and revision<>'HEAD':
525                    cmd.extend([initial and "--match" or "--tag", revision])
526                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
527                output = dpull.execute(self.repository.repository,
528                                       stdout=PIPE, stderr=STDOUT)[0]
529
530                if dpull.exit_status:
531                    raise TargetInitializationFailure(
532                        "%s returned status %d saying\n%s" %
533                        (str(dpull), dpull.exit_status, output.read()))
534        else:
535            # Use much faster 'darcs get'
536            cmd = self.repository.command("get", "--quiet")
537            if revision and revision<>'HEAD':
538                cmd.extend([initial and "--to-match" or "--tag", revision])
539            else:
540                cmd.append("--partial")
541            dget = ExternalCommand(command=cmd)
542            output = dget.execute(self.repository.repository, self.repository.basedir,
543                                  stdout=PIPE, stderr=STDOUT)[0]
544
545            if dget.exit_status:
546                raise TargetInitializationFailure(
547                    "%s returned status %d saying\n%s" %
548                    (str(dget), dget.exit_status, output.read()))
549
550        cmd = self.repository.command("changes", "--last", "1",
551                                      "--xml-output")
552        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
553        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
554
555        if changes.exit_status:
556            raise ChangesetApplicationFailure(
557                "%s returned status %d saying\n%s" %
558                (str(changes), changes.exit_status, output.read()))
559
560        last = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
561
562        return last.next()
Note: See TracBrowser for help on using the repository browser.