source: tailor/vcpx/repository/darcs/source.py @ 1468

Revision 1468, 22.7 KB checked in by Miklos Vajna <vmiklos@…>, 5 years ago (diff)

DarcsChangeset?.addEntry: fix optimization

in case the changeset is like "mv a a2; mkdir a; mv a2 a/b" disable an
optimization that would break the conversion

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Unfortunately we have to check if the order if
76                    # messed up, in that case we should not do anything.
77                    # Example: mv a a2; mkdir a; mv a2 a/b
78                    skip = False
79                    for j in self.entries:
80                        if j.action_kind == j.RENAMED and j.name.startswith(dirname):
81                            skip = True
82                            break
83                    # Luckily enough (since removes are the first entries
84                    # in the list, that is) by anticipating the add we
85                    # cure also the case below, when addition follows
86                    # edit.
87                    if not skip:
88                        e.action_kind = e.ADDED
89                        e.old_name = None
90                        return e
91
92                # Assert also that add_dir events must preceeds any
93                # add_file and ren_file that have that dir as target,
94                # and that add_file preceeds any edit.
95
96                if ((e.name == entry.name or e.name.startswith(dirname))
97                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
98                    self.entries.insert(i, entry)
99                    return entry
100
101        # Likewise, if this is a deletion, and there is a rename of
102        # this entry (such as "rename A B; remove B") then turn the
103        # existing rename into a deletion instead.
104
105        # If instead the removed entry was added by the same patch,
106        # annihilate the two: a bug in darcs (possibly fixed in recent
107        # versions) created patches with ADD+EDIT+REMOVE of a single
108        # file (see tailor ticket #71, or darcs issue185). Too bad
109        # another bug (still present in 1.0.8) hides that and makes
110        # very hard (read: impossible) any workaround on the tailor
111        # side. Luckily I learnt another tiny bit of Haskell and
112        # proposed a fix for that: hopefully the patch will be
113        # accepted by darcs developers. In the meantime, I attached it
114        # to ticket #71: without that, tailor does not have enough
115        # information to do the right thing.
116
117        elif entry.action_kind == entry.DELETED:
118            for i,e in enumerate(self.entries):
119                if e.action_kind == e.RENAMED and e.name == entry.name:
120                    e.action_kind = e.DELETED
121                    e.name = e.old_name
122                    e.old_name = None
123                    return e
124                elif e.action_kind == e.ADDED and e.name == entry.name:
125                    del self.entries[i]
126                    return None
127
128        # Ok, it must be either an edit or a rename: the former goes
129        # obviously to the end, and since the latter, as said, come
130        # in very early, appending is just good.
131        self.entries.append(entry)
132        return entry
133
134
135def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
136                                 chunksize=2**15, replace_badchars=None):
137    """
138    Parse XML output of ``darcs changes``.
139
140    Return a list of ``Changeset`` instances.
141
142    Filters out the (currently incorrect) tag info from
143    changesets_from_darcschanges_unsafe.
144    """
145
146    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
147                                                repodir, chunksize,
148                                                replace_badchars)
149    for cs in csets:
150        yield cs
151
152def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
153                                        chunksize=2**15, replace_badchars=None):
154    """
155    Do the real work of parsing the change log, including tags.
156    Warning: the tag information in the changsets returned by this
157    function are only correct if each darcs tag in the repo depends on
158    all of the patches that precede it.  This is not a valid
159    assumption in general--a tag that does not depend on patch P can
160    be pulled in from another darcs repo after P.  We collect the tag
161    info anyway because DarcsWorkingDir._currentTags() can use it
162    safely despite this problem.  Hopefully the problem will
163    eventually be fixed and this function can be renamed
164    changesets_from_darcschanges.
165    """
166    from xml.sax import make_parser
167    from xml.sax.handler import ContentHandler, ErrorHandler
168    from datetime import datetime
169
170    class DarcsXMLChangesHandler(ContentHandler):
171        def __init__(self):
172            self.changesets = []
173            self.current = None
174            self.current_field = []
175            if unidiff and repodir:
176                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
177                       "--patch", "%(patchname)s"]
178                self.darcsdiff = ExternalCommand(command=cmd)
179            else:
180                self.darcsdiff = None
181
182        def startElement(self, name, attributes):
183            if name == 'patch':
184                self.current = {}
185                self.current['author'] = attributes['author']
186                date = attributes['date']
187                from time import strptime
188                try:
189                    # 20040619130027
190                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
191                except ValueError:
192                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
193                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
194
195                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
196
197                self.current['date'] = timestamp
198                self.current['comment'] = ''
199                self.current['hash'] = attributes['hash']
200                self.current['entries'] = []
201                self.inverted = (attributes['inverted'] == "True")
202            elif name in ['name', 'comment', 'add_file', 'add_directory',
203                          'modify_file', 'remove_file', 'remove_directory']:
204                self.current_field = []
205            elif name == 'move':
206                self.old_name = attributes['from']
207                self.new_name = attributes['to']
208
209        def endElement(self, name):
210            if name == 'patch':
211                cset = DarcsChangeset(self.current['name'],
212                                      self.current['date'],
213                                      self.current['author'],
214                                      self.current['comment'],
215                                      self.current['entries'],
216                                      tags=self.current.get('tags',[]))
217                cset.darcs_hash = self.current['hash']
218                if self.darcsdiff:
219                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
220                        stdout=PIPE, patchname=cset.revision)[0].read()
221
222                self.changesets.append(cset)
223                self.current = None
224            elif name in ['name', 'comment']:
225                val = ''.join(self.current_field)
226                if val[:4] == 'TAG ':
227                    self.current.setdefault('tags',[]).append(val[4:])
228                self.current[name] = val
229            elif name == 'move':
230                entry = ChangesetEntry(self.new_name)
231                entry.action_kind = entry.RENAMED
232                entry.old_name = self.old_name
233                self.current['entries'].append(entry)
234            elif name in ['add_file', 'add_directory', 'modify_file',
235                          'remove_file', 'remove_directory']:
236                current_field = ''.join(self.current_field).strip()
237                if self.inverted:
238                    # the filenames in file modifications are outdated
239                    # if there are renames
240                    for i in self.current['entries']:
241                        if i.action_kind == i.RENAMED and current_field.startswith(i.old_name):
242                            current_field = current_field.replace(i.old_name, i.name)
243                entry = ChangesetEntry(current_field)
244                entry.action_kind = { 'add_file': entry.ADDED,
245                                      'add_directory': entry.ADDED,
246                                      'modify_file': entry.UPDATED,
247                                      'remove_file': entry.DELETED,
248                                      'remove_directory': entry.DELETED
249                                    }[name]
250
251                self.current['entries'].append(entry)
252
253        def characters(self, data):
254            self.current_field.append(data)
255
256    parser = make_parser()
257    handler = DarcsXMLChangesHandler()
258    parser.setContentHandler(handler)
259    parser.setErrorHandler(ErrorHandler())
260
261    def fixup_badchars(s, map):
262        if not map:
263            return s
264
265        ret = [map.get(c, c) for c in s]
266        return "".join(ret)
267
268    chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
269    while chunk:
270        parser.feed(chunk)
271        for cs in handler.changesets:
272            yield cs
273        handler.changesets = []
274        chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
275    parser.close()
276    for cs in handler.changesets:
277        yield cs
278
279
280class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
281    """
282    A source working directory under ``darcs``.
283    """
284
285    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
286
287    def _getUpstreamChangesets(self, sincerev):
288        """
289        Do the actual work of fetching the upstream changeset.
290        """
291
292        cmd = self.repository.command("pull", "--dry-run")
293        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
294        output = pull.execute(self.repository.repository,
295                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
296
297        if pull.exit_status:
298            raise GetUpstreamChangesetsFailure(
299                "%s returned status %d saying\n%s" %
300                (str(pull), pull.exit_status, output.read()))
301
302        return self._parseDarcsPull(output)
303
304
305    def _parseDarcsPull(self, output):
306        """Process 'darcs pull' output to Changesets.
307        """
308        from datetime import datetime
309        from time import strptime
310        from sha import new
311        from vcpx.changes import Changeset
312
313        l = output.readline()
314        while l and not (l.startswith('Would pull the following changes:') or
315                         l == 'No remote changes to pull in!\n'):
316            l = output.readline()
317
318        if l <> 'No remote changes to pull in!\n':
319            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
320            ##   * Refix _getUpstreamChangesets for darcs
321
322            fsep = re.compile('[ :]+')
323            l = output.readline()
324            while not l.startswith('Making no changes:  this is a dry run.'):
325                # Assume it's a line like
326                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
327                # Use a regular expression matching multiple spaces or colons
328                # to split it, and use the first 7 fields to build up a datetime.
329                pieces = fsep.split(l.rstrip(), 8)
330                assert len(pieces)>=7, "Cannot parse %r as a patch timestamp" % l
331                date = ' '.join(pieces[:8])
332                author = pieces[8]
333                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H %M %S %Z %Y")
334                date = datetime(y,m,d,hh,mm,ss,0,UTC)
335                l = output.readline().rstrip()
336                assert (l.startswith('  *') or
337                        l.startswith('  UNDO:') or
338                        l.startswith('  tagged')), \
339                        "Got %r but expected the start of the log" % l
340
341                if l.startswith('  *'):
342                    name = l[4:]
343                else:
344                    name = l[2:]
345
346                changelog = []
347                l = output.readline()
348                while l.startswith('  '):
349                    changelog.append(l[2:-1])
350                    l = output.readline()
351
352                cset = Changeset(name, date, author, '\n'.join(changelog))
353                compactdate = date.strftime("%Y%m%d%H%M%S")
354                if name.startswith('UNDO: '):
355                    name = name[6:]
356                    inverted = 't'
357                else:
358                    inverted = 'f'
359
360                if name.startswith('tagged '):
361                    name = name[7:]
362                    if cset.tags is None:
363                        cset.tags = [name]
364                    else:
365                        cset.tags.append(name)
366                    name = "TAG " + name
367
368                phash = new()
369                phash.update(name)
370                phash.update(author)
371                phash.update(compactdate)
372                phash.update(''.join(changelog))
373                phash.update(inverted)
374                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
375                                                   new(author).hexdigest()[:5],
376                                                   phash.hexdigest())
377
378
379                yield cset
380
381                while not l.strip():
382                    l = output.readline()
383
384    def _applyChangeset(self, changeset):
385        """
386        Do the actual work of applying the changeset to the working copy.
387        """
388
389        needspatchesopt = False
390        if hasattr(changeset, 'darcs_hash'):
391            selector = '--match'
392            revtag = 'hash ' + changeset.darcs_hash
393        elif changeset.revision.startswith('tagged '):
394            selector = '--tag'
395            revtag = changeset.revision[7:]
396        else:
397            selector = '--match'
398            revtag = 'date "%s" && author "%s"' % (
399                changeset.date.strftime("%Y%m%d%H%M%S"),
400                changeset.author)
401            # The 'exact' matcher doesn't groke double quotes:
402            # """currently there is no provision for escaping a double
403            # quote, so you have to choose between matching double
404            # quotes and matching spaces"""
405            if not '"' in changeset.revision:
406                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
407            else:
408                needspatchesopt = True
409
410        cmd = self.repository.command("pull", "--all", "--quiet",
411                                      selector, revtag)
412
413        if needspatchesopt:
414            cmd.extend(['--patches', re.escape(changeset.revision)])
415
416        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
417        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
418
419        if pull.exit_status:
420            raise ChangesetApplicationFailure(
421                "%s returned status %d saying\n%s" %
422                (str(pull), pull.exit_status, output.read()))
423
424        conflicts = []
425        line = output.readline()
426        while line:
427            if line.startswith('We have conflicts in the following files:'):
428                files = output.readline()[:-1].split(' ')
429                self.log.warning("Conflict after 'darcs pull': %s",
430                                 ' '.join(files))
431                conflicts.extend(files)
432            line = output.readline()
433
434        cmd = self.repository.command("changes", selector, revtag,
435                                      "--xml-output", "--summ")
436        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
437        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0],
438                                            replace_badchars=self.repository.replace_badchars)
439        try:
440            changeset.entries.extend(last.next().entries)
441        except StopIteration:
442            pass
443
444        return conflicts
445
446    def _handleConflict(self, changeset, conflicts, conflict):
447        """
448        Handle the conflict raised by the application of the upstream changeset.
449
450        Override parent behaviour: with darcs, we need to execute a revert
451        on the conflicted files, **trashing** local changes, but there should
452        be none of them in tailor context.
453        """
454
455        self.log.info("Reverting changes to %s, to solve the conflict",
456                      ' '.join(conflict))
457        cmd = self.repository.command("revert", "--all")
458        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
459        revert.execute(conflict)
460
461    def _checkoutUpstreamRevision(self, revision):
462        """
463        Concretely do the checkout of the upstream revision and return
464        the last applied changeset.
465        """
466
467        from os.path import join, exists
468        from os import mkdir
469        from vcpx.source import InvocationError
470
471        if not self.repository.repository:
472            raise InvocationError("Must specify a the darcs source repository")
473
474        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
475            initial = True
476
477            if revision == 'INITIAL':
478                cmd = self.repository.command("changes", "--xml-output",
479                                              "--repo", self.repository.repository,
480                                               "--reverse")
481                changes = ExternalCommand(command=cmd)
482                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
483
484                if changes.exit_status:
485                    raise ChangesetApplicationFailure(
486                        "%s returned status %d saying\n%s" %
487                        (str(changes), changes.exit_status,
488                         output and output.read() or ''))
489
490                csets = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
491                changeset = csets.next()
492
493                revision = 'hash %s' % changeset.darcs_hash
494            else:
495                revision = 'hash %s' % revision
496        else:
497            initial = False
498
499        if self.repository.subdir == '.' or exists(self.repository.basedir):
500            # This is currently *very* slow, compared to the darcs get
501            # below!
502            if not exists(join(self.repository.basedir, '_darcs')):
503                if not exists(self.repository.basedir):
504                    mkdir(self.repository.basedir)
505
506                cmd = self.repository.command("initialize")
507                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
508                init.execute()
509
510                if init.exit_status:
511                    raise TargetInitializationFailure(
512                        "%s returned status %s" % (str(init),
513                                                   init.exit_status))
514
515                cmd = self.repository.command("pull", "--all", "--quiet")
516                if revision and revision<>'HEAD':
517                    cmd.extend([initial and "--match" or "--tag", revision])
518                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
519                output = dpull.execute(self.repository.repository,
520                                       stdout=PIPE, stderr=STDOUT)[0]
521
522                if dpull.exit_status:
523                    raise TargetInitializationFailure(
524                        "%s returned status %d saying\n%s" %
525                        (str(dpull), dpull.exit_status, output.read()))
526        else:
527            # Use much faster 'darcs get'
528            cmd = self.repository.command("get", "--quiet")
529            if revision and revision<>'HEAD':
530                cmd.extend([initial and "--to-match" or "--tag", revision])
531            else:
532                cmd.append("--partial")
533            dget = ExternalCommand(command=cmd)
534            output = dget.execute(self.repository.repository, self.repository.basedir,
535                                  stdout=PIPE, stderr=STDOUT)[0]
536
537            if dget.exit_status:
538                raise TargetInitializationFailure(
539                    "%s returned status %d saying\n%s" %
540                    (str(dget), dget.exit_status, output.read()))
541
542        cmd = self.repository.command("changes", "--last", "1",
543                                      "--xml-output")
544        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
545        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
546
547        if changes.exit_status:
548            raise ChangesetApplicationFailure(
549                "%s returned status %d saying\n%s" %
550                (str(changes), changes.exit_status, output.read()))
551
552        last = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
553
554        return last.next()
Note: See TracBrowser for help on using the repository browser.