source: tailor/vcpx/repository/darcs/source.py @ 1464

Revision 1464, 22.2 KB checked in by Miklos Vajna <vmiklos@…>, 5 years ago (diff)

darcs source: handle inverted renames + modifications

here is the scenario:

UNDO: move to extra + upd

./source/xapps-extra/mtools -> ./source/xapps/mtools

M ./source/xapps-extra/mtools/FB -1

  • move to extra + upd

./source/xapps/mtools -> ./source/xapps-extra/mtools

M ./source/xapps-extra/mtools/FB +1

now in xml this will look like:

<patch inverted='True' hash='20070625100419-e2957-5f39752481ac1f758883ab8a17b03a3a47606cbb.gz'>

<name>move to extra + upd</name>

<summary>
<move from="source/xapps-extra/mtools" to="source/xapps/mtools"/>
<modify_file>
source/xapps-extra/mtools/FB<removed_lines num='1'/>
</modify_file>
</summary>

</patch>
<patch inverted='False' hash='20070625100419-e2957-16657621d68a3b47d3ad0f0153ce336f63ec2d5d.gz'>

<name>move to extra + upd</name>

<summary>
<move from="source/xapps/mtools" to="source/xapps-extra/mtools"/>
<modify_file>
source/xapps-extra/mtools/FB<added_lines num='1'/>
</modify_file>
</summary>

</patch>

so in short the xml file contains the rename list at the start of the
entry list, while the modify_file tags still refer to the old path
names. handle this case by updating the paths in the modify_file tags.

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Luckily enough (since removes are the first entries
76                    # in the list, that is) by anticipating the add we
77                    # cure also the case below, when addition follows
78                    # edit.
79                    e.action_kind = e.ADDED
80                    e.old_name = None
81                    return e
82
83                # Assert also that add_dir events must preceeds any
84                # add_file and ren_file that have that dir as target,
85                # and that add_file preceeds any edit.
86
87                if ((e.name == entry.name or e.name.startswith(dirname))
88                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
89                    self.entries.insert(i, entry)
90                    return entry
91
92        # Likewise, if this is a deletion, and there is a rename of
93        # this entry (such as "rename A B; remove B") then turn the
94        # existing rename into a deletion instead.
95
96        # If instead the removed entry was added by the same patch,
97        # annihilate the two: a bug in darcs (possibly fixed in recent
98        # versions) created patches with ADD+EDIT+REMOVE of a single
99        # file (see tailor ticket #71, or darcs issue185). Too bad
100        # another bug (still present in 1.0.8) hides that and makes
101        # very hard (read: impossible) any workaround on the tailor
102        # side. Luckily I learnt another tiny bit of Haskell and
103        # proposed a fix for that: hopefully the patch will be
104        # accepted by darcs developers. In the meantime, I attached it
105        # to ticket #71: without that, tailor does not have enough
106        # information to do the right thing.
107
108        elif entry.action_kind == entry.DELETED:
109            for i,e in enumerate(self.entries):
110                if e.action_kind == e.RENAMED and e.name == entry.name:
111                    e.action_kind = e.DELETED
112                    e.name = e.old_name
113                    e.old_name = None
114                    return e
115                elif e.action_kind == e.ADDED and e.name == entry.name:
116                    del self.entries[i]
117                    return None
118
119        # Ok, it must be either an edit or a rename: the former goes
120        # obviously to the end, and since the latter, as said, come
121        # in very early, appending is just good.
122        self.entries.append(entry)
123        return entry
124
125
126def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
127                                 chunksize=2**15, replace_badchars=None):
128    """
129    Parse XML output of ``darcs changes``.
130
131    Return a list of ``Changeset`` instances.
132
133    Filters out the (currently incorrect) tag info from
134    changesets_from_darcschanges_unsafe.
135    """
136
137    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
138                                                repodir, chunksize,
139                                                replace_badchars)
140    for cs in csets:
141        yield cs
142
143def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
144                                        chunksize=2**15, replace_badchars=None):
145    """
146    Do the real work of parsing the change log, including tags.
147    Warning: the tag information in the changsets returned by this
148    function are only correct if each darcs tag in the repo depends on
149    all of the patches that precede it.  This is not a valid
150    assumption in general--a tag that does not depend on patch P can
151    be pulled in from another darcs repo after P.  We collect the tag
152    info anyway because DarcsWorkingDir._currentTags() can use it
153    safely despite this problem.  Hopefully the problem will
154    eventually be fixed and this function can be renamed
155    changesets_from_darcschanges.
156    """
157    from xml.sax import make_parser
158    from xml.sax.handler import ContentHandler, ErrorHandler
159    from datetime import datetime
160
161    class DarcsXMLChangesHandler(ContentHandler):
162        def __init__(self):
163            self.changesets = []
164            self.current = None
165            self.current_field = []
166            if unidiff and repodir:
167                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
168                       "--patch", "%(patchname)s"]
169                self.darcsdiff = ExternalCommand(command=cmd)
170            else:
171                self.darcsdiff = None
172
173        def startElement(self, name, attributes):
174            if name == 'patch':
175                self.current = {}
176                self.current['author'] = attributes['author']
177                date = attributes['date']
178                from time import strptime
179                try:
180                    # 20040619130027
181                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
182                except ValueError:
183                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
184                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
185
186                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
187
188                self.current['date'] = timestamp
189                self.current['comment'] = ''
190                self.current['hash'] = attributes['hash']
191                self.current['entries'] = []
192                self.inverted = bool(attributes['inverted'])
193            elif name in ['name', 'comment', 'add_file', 'add_directory',
194                          'modify_file', 'remove_file', 'remove_directory']:
195                self.current_field = []
196            elif name == 'move':
197                self.old_name = attributes['from']
198                self.new_name = attributes['to']
199
200        def endElement(self, name):
201            if name == 'patch':
202                cset = DarcsChangeset(self.current['name'],
203                                      self.current['date'],
204                                      self.current['author'],
205                                      self.current['comment'],
206                                      self.current['entries'],
207                                      tags=self.current.get('tags',[]))
208                cset.darcs_hash = self.current['hash']
209                if self.darcsdiff:
210                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
211                        stdout=PIPE, patchname=cset.revision)[0].read()
212
213                self.changesets.append(cset)
214                self.current = None
215            elif name in ['name', 'comment']:
216                val = ''.join(self.current_field)
217                if val[:4] == 'TAG ':
218                    self.current.setdefault('tags',[]).append(val[4:])
219                self.current[name] = val
220            elif name == 'move':
221                entry = ChangesetEntry(self.new_name)
222                entry.action_kind = entry.RENAMED
223                entry.old_name = self.old_name
224                self.current['entries'].append(entry)
225            elif name in ['add_file', 'add_directory', 'modify_file',
226                          'remove_file', 'remove_directory']:
227                current_field = ''.join(self.current_field).strip()
228                if self.inverted:
229                    # the filenames in file modifications are outdated
230                    # if there are renames
231                    for i in self.current['entries']:
232                        if i.action_kind == i.RENAMED:
233                            current_field = current_field.replace(i.old_name, i.name)
234                entry = ChangesetEntry(current_field)
235                entry.action_kind = { 'add_file': entry.ADDED,
236                                      'add_directory': entry.ADDED,
237                                      'modify_file': entry.UPDATED,
238                                      'remove_file': entry.DELETED,
239                                      'remove_directory': entry.DELETED
240                                    }[name]
241
242                self.current['entries'].append(entry)
243
244        def characters(self, data):
245            self.current_field.append(data)
246
247    parser = make_parser()
248    handler = DarcsXMLChangesHandler()
249    parser.setContentHandler(handler)
250    parser.setErrorHandler(ErrorHandler())
251
252    def fixup_badchars(s, map):
253        if not map:
254            return s
255
256        ret = [map.get(c, c) for c in s]
257        return "".join(ret)
258
259    chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
260    while chunk:
261        parser.feed(chunk)
262        for cs in handler.changesets:
263            yield cs
264        handler.changesets = []
265        chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
266    parser.close()
267    for cs in handler.changesets:
268        yield cs
269
270
271class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
272    """
273    A source working directory under ``darcs``.
274    """
275
276    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
277
278    def _getUpstreamChangesets(self, sincerev):
279        """
280        Do the actual work of fetching the upstream changeset.
281        """
282
283        cmd = self.repository.command("pull", "--dry-run")
284        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
285        output = pull.execute(self.repository.repository,
286                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
287
288        if pull.exit_status:
289            raise GetUpstreamChangesetsFailure(
290                "%s returned status %d saying\n%s" %
291                (str(pull), pull.exit_status, output.read()))
292
293        return self._parseDarcsPull(output)
294
295
296    def _parseDarcsPull(self, output):
297        """Process 'darcs pull' output to Changesets.
298        """
299        from datetime import datetime
300        from time import strptime
301        from sha import new
302        from vcpx.changes import Changeset
303
304        l = output.readline()
305        while l and not (l.startswith('Would pull the following changes:') or
306                         l == 'No remote changes to pull in!\n'):
307            l = output.readline()
308
309        if l <> 'No remote changes to pull in!\n':
310            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
311            ##   * Refix _getUpstreamChangesets for darcs
312
313            fsep = re.compile('[ :]+')
314            l = output.readline()
315            while not l.startswith('Making no changes:  this is a dry run.'):
316                # Assume it's a line like
317                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
318                # Use a regular expression matching multiple spaces or colons
319                # to split it, and use the first 7 fields to build up a datetime.
320                pieces = fsep.split(l.rstrip(), 8)
321                assert len(pieces)>=7, "Cannot parse %r as a patch timestamp" % l
322                date = ' '.join(pieces[:8])
323                author = pieces[8]
324                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H %M %S %Z %Y")
325                date = datetime(y,m,d,hh,mm,ss,0,UTC)
326                l = output.readline().rstrip()
327                assert (l.startswith('  *') or
328                        l.startswith('  UNDO:') or
329                        l.startswith('  tagged')), \
330                        "Got %r but expected the start of the log" % l
331
332                if l.startswith('  *'):
333                    name = l[4:]
334                else:
335                    name = l[2:]
336
337                changelog = []
338                l = output.readline()
339                while l.startswith('  '):
340                    changelog.append(l[2:-1])
341                    l = output.readline()
342
343                cset = Changeset(name, date, author, '\n'.join(changelog))
344                compactdate = date.strftime("%Y%m%d%H%M%S")
345                if name.startswith('UNDO: '):
346                    name = name[6:]
347                    inverted = 't'
348                else:
349                    inverted = 'f'
350
351                if name.startswith('tagged '):
352                    name = name[7:]
353                    if cset.tags is None:
354                        cset.tags = [name]
355                    else:
356                        cset.tags.append(name)
357                    name = "TAG " + name
358
359                phash = new()
360                phash.update(name)
361                phash.update(author)
362                phash.update(compactdate)
363                phash.update(''.join(changelog))
364                phash.update(inverted)
365                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
366                                                   new(author).hexdigest()[:5],
367                                                   phash.hexdigest())
368
369
370                yield cset
371
372                while not l.strip():
373                    l = output.readline()
374
375    def _applyChangeset(self, changeset):
376        """
377        Do the actual work of applying the changeset to the working copy.
378        """
379
380        needspatchesopt = False
381        if hasattr(changeset, 'darcs_hash'):
382            selector = '--match'
383            revtag = 'hash ' + changeset.darcs_hash
384        elif changeset.revision.startswith('tagged '):
385            selector = '--tag'
386            revtag = changeset.revision[7:]
387        else:
388            selector = '--match'
389            revtag = 'date "%s" && author "%s"' % (
390                changeset.date.strftime("%Y%m%d%H%M%S"),
391                changeset.author)
392            # The 'exact' matcher doesn't groke double quotes:
393            # """currently there is no provision for escaping a double
394            # quote, so you have to choose between matching double
395            # quotes and matching spaces"""
396            if not '"' in changeset.revision:
397                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
398            else:
399                needspatchesopt = True
400
401        cmd = self.repository.command("pull", "--all", "--quiet",
402                                      selector, revtag)
403
404        if needspatchesopt:
405            cmd.extend(['--patches', re.escape(changeset.revision)])
406
407        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
408        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
409
410        if pull.exit_status:
411            raise ChangesetApplicationFailure(
412                "%s returned status %d saying\n%s" %
413                (str(pull), pull.exit_status, output.read()))
414
415        conflicts = []
416        line = output.readline()
417        while line:
418            if line.startswith('We have conflicts in the following files:'):
419                files = output.readline()[:-1].split(' ')
420                self.log.warning("Conflict after 'darcs pull': %s",
421                                 ' '.join(files))
422                conflicts.extend(files)
423            line = output.readline()
424
425        cmd = self.repository.command("changes", selector, revtag,
426                                      "--xml-output", "--summ")
427        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
428        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0],
429                                            replace_badchars=self.repository.replace_badchars)
430        try:
431            changeset.entries.extend(last.next().entries)
432        except StopIteration:
433            pass
434
435        return conflicts
436
437    def _handleConflict(self, changeset, conflicts, conflict):
438        """
439        Handle the conflict raised by the application of the upstream changeset.
440
441        Override parent behaviour: with darcs, we need to execute a revert
442        on the conflicted files, **trashing** local changes, but there should
443        be none of them in tailor context.
444        """
445
446        self.log.info("Reverting changes to %s, to solve the conflict",
447                      ' '.join(conflict))
448        cmd = self.repository.command("revert", "--all")
449        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
450        revert.execute(conflict)
451
452    def _checkoutUpstreamRevision(self, revision):
453        """
454        Concretely do the checkout of the upstream revision and return
455        the last applied changeset.
456        """
457
458        from os.path import join, exists
459        from os import mkdir
460        from vcpx.source import InvocationError
461
462        if not self.repository.repository:
463            raise InvocationError("Must specify a the darcs source repository")
464
465        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
466            initial = True
467
468            if revision == 'INITIAL':
469                cmd = self.repository.command("changes", "--xml-output",
470                                              "--repo", self.repository.repository,
471                                               "--reverse")
472                changes = ExternalCommand(command=cmd)
473                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
474
475                if changes.exit_status:
476                    raise ChangesetApplicationFailure(
477                        "%s returned status %d saying\n%s" %
478                        (str(changes), changes.exit_status,
479                         output and output.read() or ''))
480
481                csets = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
482                changeset = csets.next()
483
484                revision = 'hash %s' % changeset.darcs_hash
485            else:
486                revision = 'hash %s' % revision
487        else:
488            initial = False
489
490        if self.repository.subdir == '.' or exists(self.repository.basedir):
491            # This is currently *very* slow, compared to the darcs get
492            # below!
493            if not exists(join(self.repository.basedir, '_darcs')):
494                if not exists(self.repository.basedir):
495                    mkdir(self.repository.basedir)
496
497                cmd = self.repository.command("initialize")
498                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
499                init.execute()
500
501                if init.exit_status:
502                    raise TargetInitializationFailure(
503                        "%s returned status %s" % (str(init),
504                                                   init.exit_status))
505
506                cmd = self.repository.command("pull", "--all", "--quiet")
507                if revision and revision<>'HEAD':
508                    cmd.extend([initial and "--match" or "--tag", revision])
509                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
510                output = dpull.execute(self.repository.repository,
511                                       stdout=PIPE, stderr=STDOUT)[0]
512
513                if dpull.exit_status:
514                    raise TargetInitializationFailure(
515                        "%s returned status %d saying\n%s" %
516                        (str(dpull), dpull.exit_status, output.read()))
517        else:
518            # Use much faster 'darcs get'
519            cmd = self.repository.command("get", "--quiet")
520            if revision and revision<>'HEAD':
521                cmd.extend([initial and "--to-match" or "--tag", revision])
522            else:
523                cmd.append("--partial")
524            dget = ExternalCommand(command=cmd)
525            output = dget.execute(self.repository.repository, self.repository.basedir,
526                                  stdout=PIPE, stderr=STDOUT)[0]
527
528            if dget.exit_status:
529                raise TargetInitializationFailure(
530                    "%s returned status %d saying\n%s" %
531                    (str(dget), dget.exit_status, output.read()))
532
533        cmd = self.repository.command("changes", "--last", "1",
534                                      "--xml-output")
535        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
536        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
537
538        if changes.exit_status:
539            raise ChangesetApplicationFailure(
540                "%s returned status %d saying\n%s" %
541                (str(changes), changes.exit_status, output.read()))
542
543        last = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
544
545        return last.next()
Note: See TracBrowser for help on using the repository browser.