source: tailor/vcpx/repository/darcs/source.py @ 1311

Revision 1311, 21.7 KB checked in by Kevin Turner <kevin@…>, 6 years ago (diff)

DarcsSourceWorkingDir?._parseDarcsPull: don't include the newline as part of 'author'

also tests to check the change.darcs_hash.

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Luckily enough (since removes are the first entries
76                    # in the list, that is) by anticipating the add we
77                    # cure also the case below, when addition follows
78                    # edit.
79                    e.action_kind = e.ADDED
80                    e.old_name = None
81                    return e
82
83                # Assert also that add_dir events must preceeds any
84                # add_file and ren_file that have that dir as target,
85                # and that add_file preceeds any edit.
86
87                if ((e.name == entry.name or e.name.startswith(dirname))
88                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
89                    self.entries.insert(i, entry)
90                    return entry
91
92        # Likewise, if this is a deletion, and there is a rename of
93        # this entry (such as "rename A B; remove B") then turn the
94        # existing rename into a deletion instead.
95
96        # If instead the removed entry was added by the same patch,
97        # annihilate the two: a bug in darcs (possibly fixed in recent
98        # versions) created patches with ADD+EDIT+REMOVE of a single
99        # file (see tailor ticket #71, or darcs issue185). Too bad
100        # another bug (still present in 1.0.8) hides that and makes
101        # very hard (read: impossible) any workaround on the tailor
102        # side. Luckily I learnt another tiny bit of Haskell and
103        # proposed a fix for that: hopefully the patch will be
104        # accepted by darcs developers. In the meantime, I attached it
105        # to ticket #71: without that, tailor does not have enough
106        # information to do the right thing.
107
108        elif entry.action_kind == entry.DELETED:
109            for i,e in enumerate(self.entries):
110                if e.action_kind == e.RENAMED and e.name == entry.name:
111                    e.action_kind = e.DELETED
112                    e.name = e.old_name
113                    e.old_name = None
114                    return e
115                elif e.action_kind == e.ADDED and e.name == entry.name:
116                    del self.entries[i]
117                    return None
118
119        # Ok, it must be either an edit or a rename: the former goes
120        # obviously to the end, and since the latter, as said, come
121        # in very early, appending is just good.
122        self.entries.append(entry)
123        return entry
124
125
126def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
127                                 chunksize=2**15):
128    """
129    Parse XML output of ``darcs changes``.
130
131    Return a list of ``Changeset`` instances.
132
133    Filters out the (currently incorrect) tag info from
134    changesets_from_darcschanges_unsafe.
135    """
136
137    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
138                                                repodir, chunksize)
139    for cs in csets:
140        cs.tags = None
141        yield cs
142
143def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
144                                        chunksize=2**15):
145    """
146    Do the real work of parsing the change log, including tags.
147    Warning: the tag information in the changsets returned by this
148    function are only correct if each darcs tag in the repo depends on
149    all of the patches that precede it.  This is not a valid
150    assumption in general--a tag that does not depend on patch P can
151    be pulled in from another darcs repo after P.  We collect the tag
152    info anyway because DarcsWorkingDir._currentTags() can use it
153    safely despite this problem.  Hopefully the problem will
154    eventually be fixed and this function can be renamed
155    changesets_from_darcschanges.
156    """
157    from xml.sax import make_parser
158    from xml.sax.handler import ContentHandler, ErrorHandler
159    from datetime import datetime
160
161    class DarcsXMLChangesHandler(ContentHandler):
162        def __init__(self):
163            self.changesets = []
164            self.current = None
165            self.current_field = []
166            if unidiff and repodir:
167                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
168                       "--patch", "%(patchname)s"]
169                self.darcsdiff = ExternalCommand(command=cmd)
170            else:
171                self.darcsdiff = None
172
173        def startElement(self, name, attributes):
174            if name == 'patch':
175                self.current = {}
176                self.current['author'] = attributes['author']
177                date = attributes['date']
178                from time import strptime
179                try:
180                    # 20040619130027
181                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
182                except ValueError:
183                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
184                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
185
186                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
187
188                self.current['date'] = timestamp
189                self.current['comment'] = ''
190                self.current['hash'] = attributes['hash']
191                self.current['entries'] = []
192            elif name in ['name', 'comment', 'add_file', 'add_directory',
193                          'modify_file', 'remove_file', 'remove_directory']:
194                self.current_field = []
195            elif name == 'move':
196                self.old_name = attributes['from']
197                self.new_name = attributes['to']
198
199        def endElement(self, name):
200            if name == 'patch':
201                cset = DarcsChangeset(self.current['name'],
202                                      self.current['date'],
203                                      self.current['author'],
204                                      self.current['comment'],
205                                      self.current['entries'],
206                                      tags=self.current.get('tags',[]))
207                cset.darcs_hash = self.current['hash']
208                if self.darcsdiff:
209                    cset.unidiff = self.darcsdiff.execute(
210                        stdout=PIPE, patchname=cset.revision)[0].read()
211
212                self.changesets.append(cset)
213                self.current = None
214            elif name in ['name', 'comment']:
215                val = ''.join(self.current_field)
216                if val[:4] == 'TAG ':
217                    self.current.setdefault('tags',[]).append(val[4:])
218                self.current[name] = val
219            elif name == 'move':
220                entry = ChangesetEntry(self.new_name)
221                entry.action_kind = entry.RENAMED
222                entry.old_name = self.old_name
223                self.current['entries'].append(entry)
224            elif name in ['add_file', 'add_directory', 'modify_file',
225                          'remove_file', 'remove_directory']:
226                entry = ChangesetEntry(''.join(self.current_field).strip())
227                entry.action_kind = { 'add_file': entry.ADDED,
228                                      'add_directory': entry.ADDED,
229                                      'modify_file': entry.UPDATED,
230                                      'remove_file': entry.DELETED,
231                                      'remove_directory': entry.DELETED
232                                    }[name]
233
234                self.current['entries'].append(entry)
235
236        def characters(self, data):
237            self.current_field.append(data)
238
239    parser = make_parser()
240    handler = DarcsXMLChangesHandler()
241    parser.setContentHandler(handler)
242    parser.setErrorHandler(ErrorHandler())
243
244    chunk = changes.read(chunksize)
245    while chunk:
246        parser.feed(chunk)
247        for cs in handler.changesets:
248            yield cs
249        handler.changesets = []
250        chunk = changes.read(chunksize)
251    parser.close()
252    for cs in handler.changesets:
253        yield cs
254
255
256class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
257    """
258    A source working directory under ``darcs``.
259    """
260
261    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
262
263    def _getUpstreamChangesets(self, sincerev):
264        """
265        Do the actual work of fetching the upstream changeset.
266        """
267
268        cmd = self.repository.command("pull", "--dry-run")
269        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
270        output = pull.execute(self.repository.repository,
271                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
272
273        if pull.exit_status:
274            raise GetUpstreamChangesetsFailure(
275                "%s returned status %d saying\n%s" %
276                (str(pull), pull.exit_status, output.read()))
277
278        return self._parseDarcsPull(output)
279
280
281    def _parseDarcsPull(self, output):
282        """Process 'darcs pull' output to Changesets.
283        """
284        from datetime import datetime
285        from time import strptime
286        from sha import new
287        from vcpx.changes import Changeset
288
289        l = output.readline()
290        while l and not (l.startswith('Would pull the following changes:') or
291                         l == 'No remote changes to pull in!\n'):
292            l = output.readline()
293
294        if l <> 'No remote changes to pull in!\n':
295            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
296            ##   * Refix _getUpstreamChangesets for darcs
297
298            l = output.readline()
299            while not l.startswith('Making no changes:  this is a dry run.'):
300                # Assume it's a line like
301                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
302                # we used to split on the double space before the email,
303                # but in this case this is wrong. Then we assumed the date
304                # part to be exactly 28 chars long, but what about timezone
305                # names like 'CEST'? Waiting for xml output...
306                # We still assume there are *two* spaces before the email.
307                # The alternative is using some sort of a regex: Aaron Kaplan
308                # kindly suggested his own perl snippet
309                #    /^(... ... .\d .\d:\d\d:\d\d ...?. \d\d\d\d)  (.*)/ || die;
310                #    my ($date, $author) = ($1, $2);
311                # but that assumes the two spaces as separator, so I find the
312                # following solution easier and by any chance faster too.               
313                pieces = l.split('  ')
314                assert len(pieces)>1, "Cannot parse %r as a patch timestamp" % l
315                author = pieces.pop()[:-1]
316                date = ' '.join(pieces)
317                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y")
318                date = datetime(y,m,d,hh,mm,ss,0,UTC)
319                l = output.readline()
320                assert (l.startswith('  * ') or
321                        l.startswith('  UNDO:') or
322                        l.startswith('  tagged'))
323
324                if l.startswith('  *'):
325                    name = l[4:-1]
326                else:
327                    name = l[2:-1]
328
329                changelog = []
330                l = output.readline()
331                while l.startswith('  '):
332                    changelog.append(l[2:-1])
333                    l = output.readline()
334
335                cset = Changeset(name, date, author, '\n'.join(changelog))
336                compactdate = date.strftime("%Y%m%d%H%M%S")
337                if name.startswith('UNDO: '):
338                    name = name[6:]
339                    inverted = 't'
340                else:
341                    inverted = 'f'
342                phash = new()
343                phash.update(name)
344                phash.update(author)
345                phash.update(compactdate)
346                phash.update(''.join(changelog))
347                phash.update(inverted)
348                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
349                                                   new(author).hexdigest()[:5],
350                                                   phash.hexdigest())
351
352                if name.startswith('tagged'):
353                    self.log.warning("Skipping tag %s because I don't "
354                                     "propagate tags from darcs.", name)
355                else:
356                    yield cset
357
358                while not l.strip():
359                    l = output.readline()
360
361    def _applyChangeset(self, changeset):
362        """
363        Do the actual work of applying the changeset to the working copy.
364        """
365
366        needspatchesopt = False
367        if hasattr(changeset, 'darcs_hash'):
368            selector = '--match'
369            revtag = 'hash ' + changeset.darcs_hash
370        elif changeset.revision.startswith('tagged '):
371            selector = '--tag'
372            revtag = changeset.revision[7:]
373        else:
374            selector = '--match'
375            revtag = 'date "%s" && author "%s"' % (
376                changeset.date.strftime("%Y%m%d%H%M%S"),
377                changeset.author)
378            # The 'exact' matcher doesn't groke double quotes:
379            # """currently there is no provision for escaping a double
380            # quote, so you have to choose between matching double
381            # quotes and matching spaces"""
382            if not '"' in changeset.revision:
383                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
384            else:
385                needspatchesopt = True
386
387        cmd = self.repository.command("pull", "--all", "--quiet",
388                                      selector, revtag)
389
390        if needspatchesopt:
391            cmd.extend(['--patches', re.escape(changeset.revision)])
392
393        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
394        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
395
396        if pull.exit_status:
397            raise ChangesetApplicationFailure(
398                "%s returned status %d saying\n%s" %
399                (str(pull), pull.exit_status, output.read()))
400
401        conflicts = []
402        line = output.readline()
403        while line:
404            if line.startswith('We have conflicts in the following files:'):
405                files = output.readline()[:-1].split('./')[1:]
406                self.log.warning("Conflict after 'darcs pull': %s",
407                                 ' '.join(files))
408                conflicts.extend(['./' + f for f in files])
409            line = output.readline()
410
411        cmd = self.repository.command("changes", selector, revtag,
412                                      "--xml-output", "--summ")
413        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
414        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0])
415        try:
416            changeset.entries.extend(last.next().entries)
417        except StopIteration:
418            pass
419
420        return conflicts
421
422    def _handleConflict(self, changeset, conflicts, conflict):
423        """
424        Handle the conflict raised by the application of the upstream changeset.
425
426        Override parent behaviour: with darcs, we need to execute a revert
427        on the conflicted files, **trashing** local changes, but there should
428        be none of them in tailor context.
429        """
430
431        self.log.info("Reverting changes to %s, to solve the conflict",
432                      ' '.join(conflict))
433        cmd = self.repository.command("revert", "--all")
434        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
435        revert.execute(conflict)
436
437    def _checkoutUpstreamRevision(self, revision):
438        """
439        Concretely do the checkout of the upstream revision and return
440        the last applied changeset.
441        """
442
443        from os.path import join, exists
444        from os import mkdir
445        from vcpx.source import InvocationError
446
447        if not self.repository.repository:
448            raise InvocationError("Must specify a the darcs source repository")
449
450        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
451            initial = True
452
453            if revision == 'INITIAL':
454                cmd = self.repository.command("changes", "--xml-output",
455                                              "--repo", self.repository.repository,
456                                               "--reverse")
457                changes = ExternalCommand(command=cmd)
458                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
459
460                if changes.exit_status:
461                    raise ChangesetApplicationFailure(
462                        "%s returned status %d saying\n%s" %
463                        (str(changes), changes.exit_status,
464                         output and output.read() or ''))
465
466                csets = changesets_from_darcschanges(output)
467                changeset = csets.next()
468
469                revision = 'hash %s' % changeset.darcs_hash
470            else:
471                revision = 'hash %s' % revision
472        else:
473            initial = False
474
475        if self.repository.subdir == '.' or exists(self.repository.basedir):
476            # This is currently *very* slow, compared to the darcs get
477            # below!
478            if not exists(join(self.repository.basedir, '_darcs')):
479                if not exists(self.repository.basedir):
480                    mkdir(self.repository.basedir)
481
482                cmd = self.repository.command("initialize")
483                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
484                init.execute()
485
486                if init.exit_status:
487                    raise TargetInitializationFailure(
488                        "%s returned status %s" % (str(init),
489                                                   init.exit_status))
490
491                cmd = self.repository.command("pull", "--all", "--quiet")
492                if revision and revision<>'HEAD':
493                    cmd.extend([initial and "--match" or "--tag", revision])
494                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
495                output = dpull.execute(self.repository.repository,
496                                       stdout=PIPE, stderr=STDOUT)[0]
497
498                if dpull.exit_status:
499                    raise TargetInitializationFailure(
500                        "%s returned status %d saying\n%s" %
501                        (str(dpull), dpull.exit_status, output.read()))
502        else:
503            # Use much faster 'darcs get'
504            cmd = self.repository.command("get", "--quiet")
505            if revision and revision<>'HEAD':
506                cmd.extend([initial and "--to-match" or "--tag", revision])
507            else:
508                cmd.append("--partial")
509            dget = ExternalCommand(command=cmd)
510            output = dget.execute(self.repository.repository, self.repository.basedir,
511                                  stdout=PIPE, stderr=STDOUT)[0]
512
513            if dget.exit_status:
514                raise TargetInitializationFailure(
515                    "%s returned status %d saying\n%s" %
516                    (str(dget), dget.exit_status, output.read()))
517
518        cmd = self.repository.command("changes", "--last", "1",
519                                      "--xml-output")
520        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
521        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
522
523        if changes.exit_status:
524            raise ChangesetApplicationFailure(
525                "%s returned status %d saying\n%s" %
526                (str(changes), changes.exit_status, output.read()))
527
528        last = changesets_from_darcschanges(output)
529
530        return last.next()
Note: See TracBrowser for help on using the repository browser.