source: tailor/vcpx/repository/darcs/source.py @ 1295

Revision 1295, 21.6 KB checked in by lele@…, 7 years ago (diff)

Don't assume the timestamp in darcs log is exactly 28 chars long

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Luckily enough (since removes are the first entries
76                    # in the list, that is) by anticipating the add we
77                    # cure also the case below, when addition follows
78                    # edit.
79                    e.action_kind = e.ADDED
80                    e.old_name = None
81                    return e
82
83                # Assert also that add_dir events must preceeds any
84                # add_file and ren_file that have that dir as target,
85                # and that add_file preceeds any edit.
86
87                if ((e.name == entry.name or e.name.startswith(dirname))
88                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
89                    self.entries.insert(i, entry)
90                    return entry
91
92        # Likewise, if this is a deletion, and there is a rename of
93        # this entry (such as "rename A B; remove B") then turn the
94        # existing rename into a deletion instead.
95
96        # If instead the removed entry was added by the same patch,
97        # annihilate the two: a bug in darcs (possibly fixed in recent
98        # versions) created patches with ADD+EDIT+REMOVE of a single
99        # file (see tailor ticket #71, or darcs issue185). Too bad
100        # another bug (still present in 1.0.8) hides that and makes
101        # very hard (read: impossible) any workaround on the tailor
102        # side. Luckily I learnt another tiny bit of Haskell and
103        # proposed a fix for that: hopefully the patch will be
104        # accepted by darcs developers. In the meantime, I attached it
105        # to ticket #71: without that, tailor does not have enough
106        # information to do the right thing.
107
108        elif entry.action_kind == entry.DELETED:
109            for i,e in enumerate(self.entries):
110                if e.action_kind == e.RENAMED and e.name == entry.name:
111                    e.action_kind = e.DELETED
112                    e.name = e.old_name
113                    e.old_name = None
114                    return e
115                elif e.action_kind == e.ADDED and e.name == entry.name:
116                    del self.entries[i]
117                    return None
118
119        # Ok, it must be either an edit or a rename: the former goes
120        # obviously to the end, and since the latter, as said, come
121        # in very early, appending is just good.
122        self.entries.append(entry)
123        return entry
124
125
126def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
127                                 chunksize=2**15):
128    """
129    Parse XML output of ``darcs changes``.
130
131    Return a list of ``Changeset`` instances.
132
133    Filters out the (currently incorrect) tag info from
134    changesets_from_darcschanges_unsafe.
135    """
136
137    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
138                                                repodir, chunksize)
139    for cs in csets:
140        cs.tags = None
141        yield cs
142
143def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
144                                        chunksize=2**15):
145    """
146    Do the real work of parsing the change log, including tags.
147    Warning: the tag information in the changsets returned by this
148    function are only correct if each darcs tag in the repo depends on
149    all of the patches that precede it.  This is not a valid
150    assumption in general--a tag that does not depend on patch P can
151    be pulled in from another darcs repo after P.  We collect the tag
152    info anyway because DarcsWorkingDir._currentTags() can use it
153    safely despite this problem.  Hopefully the problem will
154    eventually be fixed and this function can be renamed
155    changesets_from_darcschanges.
156    """
157    from xml.sax import make_parser
158    from xml.sax.handler import ContentHandler, ErrorHandler
159    from datetime import datetime
160
161    class DarcsXMLChangesHandler(ContentHandler):
162        def __init__(self):
163            self.changesets = []
164            self.current = None
165            self.current_field = []
166            if unidiff and repodir:
167                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
168                       "--patch", "%(patchname)s"]
169                self.darcsdiff = ExternalCommand(command=cmd)
170            else:
171                self.darcsdiff = None
172
173        def startElement(self, name, attributes):
174            if name == 'patch':
175                self.current = {}
176                self.current['author'] = attributes['author']
177                date = attributes['date']
178                from time import strptime
179                try:
180                    # 20040619130027
181                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
182                except ValueError:
183                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
184                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
185
186                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
187
188                self.current['date'] = timestamp
189                self.current['comment'] = ''
190                self.current['hash'] = attributes['hash']
191                self.current['entries'] = []
192            elif name in ['name', 'comment', 'add_file', 'add_directory',
193                          'modify_file', 'remove_file', 'remove_directory']:
194                self.current_field = []
195            elif name == 'move':
196                self.old_name = attributes['from']
197                self.new_name = attributes['to']
198
199        def endElement(self, name):
200            if name == 'patch':
201                cset = DarcsChangeset(self.current['name'],
202                                      self.current['date'],
203                                      self.current['author'],
204                                      self.current['comment'],
205                                      self.current['entries'],
206                                      tags=self.current.get('tags',[]))
207                cset.darcs_hash = self.current['hash']
208                if self.darcsdiff:
209                    cset.unidiff = self.darcsdiff.execute(
210                        stdout=PIPE, patchname=cset.revision)[0].read()
211
212                self.changesets.append(cset)
213                self.current = None
214            elif name in ['name', 'comment']:
215                val = ''.join(self.current_field)
216                if val[:4] == 'TAG ':
217                    self.current.setdefault('tags',[]).append(val[4:])
218                self.current[name] = val
219            elif name == 'move':
220                entry = ChangesetEntry(self.new_name)
221                entry.action_kind = entry.RENAMED
222                entry.old_name = self.old_name
223                self.current['entries'].append(entry)
224            elif name in ['add_file', 'add_directory', 'modify_file',
225                          'remove_file', 'remove_directory']:
226                entry = ChangesetEntry(''.join(self.current_field).strip())
227                entry.action_kind = { 'add_file': entry.ADDED,
228                                      'add_directory': entry.ADDED,
229                                      'modify_file': entry.UPDATED,
230                                      'remove_file': entry.DELETED,
231                                      'remove_directory': entry.DELETED
232                                    }[name]
233
234                self.current['entries'].append(entry)
235
236        def characters(self, data):
237            self.current_field.append(data)
238
239    parser = make_parser()
240    handler = DarcsXMLChangesHandler()
241    parser.setContentHandler(handler)
242    parser.setErrorHandler(ErrorHandler())
243
244    chunk = changes.read(chunksize)
245    while chunk:
246        parser.feed(chunk)
247        for cs in handler.changesets:
248            yield cs
249        handler.changesets = []
250        chunk = changes.read(chunksize)
251    parser.close()
252    for cs in handler.changesets:
253        yield cs
254
255
256class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
257    """
258    A source working directory under ``darcs``.
259    """
260
261    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
262
263    def _getUpstreamChangesets(self, sincerev):
264        """
265        Do the actual work of fetching the upstream changeset.
266        """
267
268        from datetime import datetime
269        from time import strptime
270        from sha import new
271        from vcpx.changes import Changeset
272
273        cmd = self.repository.command("pull", "--dry-run")
274        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
275        output = pull.execute(self.repository.repository,
276                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
277
278        if pull.exit_status:
279            raise GetUpstreamChangesetsFailure(
280                "%s returned status %d saying\n%s" %
281                (str(pull), pull.exit_status, output.read()))
282
283        l = output.readline()
284        while l and not (l.startswith('Would pull the following changes:') or
285                         l == 'No remote changes to pull in!\n'):
286            l = output.readline()
287
288        if l <> 'No remote changes to pull in!\n':
289            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
290            ##   * Refix _getUpstreamChangesets for darcs
291
292            l = output.readline()
293            while not l.startswith('Making no changes:  this is a dry run.'):
294                # Assume it's a line like
295                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
296                # we used to split on the double space before the email,
297                # but in this case this is wrong. Then we assumed the date
298                # part to be exactly 28 chars long, but what about timezone
299                # names like 'CEST'? Waiting for xml output...
300                # We still assume there are *two* spaces before the email.
301                # The alternative is using some sort of a regex: Aaron Kaplan
302                # kindly suggested his own perl snippet
303                #    /^(... ... .\d .\d:\d\d:\d\d ...?. \d\d\d\d)  (.*)/ || die;
304                #    my ($date, $author) = ($1, $2);
305                # but that assumes the two spaces as separator, so I find the
306                # following solution easier and by any chance faster too.               
307                pieces = l.split('  ')
308                assert len(pieces)>1, "Cannot parse %r as a patch timestamp" % l
309                author = pieces.pop()
310                date = ' '.join(pieces)
311                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y")
312                date = datetime(y,m,d,hh,mm,ss,0,UTC)
313                l = output.readline()
314                assert (l.startswith('  * ') or
315                        l.startswith('  UNDO:') or
316                        l.startswith('  tagged'))
317
318                if l.startswith('  *'):
319                    name = l[4:-1]
320                else:
321                    name = l[2:-1]
322
323                changelog = []
324                l = output.readline()
325                while l.startswith('  '):
326                    changelog.append(l[2:-1])
327                    l = output.readline()
328
329                cset = Changeset(name, date, author, '\n'.join(changelog))
330                compactdate = date.strftime("%Y%m%d%H%M%S")
331                if name.startswith('UNDO: '):
332                    name = name[6:]
333                    inverted = 't'
334                else:
335                    inverted = 'f'
336                phash = new()
337                phash.update(name)
338                phash.update(author)
339                phash.update(compactdate)
340                phash.update(''.join(changelog))
341                phash.update(inverted)
342                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
343                                                   new(author).hexdigest()[:5],
344                                                   phash.hexdigest())
345
346                if name.startswith('tagged'):
347                    self.log.warning("Skipping tag %s because I don't "
348                                     "propagate tags from darcs.", name)
349                else:
350                    yield cset
351
352                while not l.strip():
353                    l = output.readline()
354
355    def _applyChangeset(self, changeset):
356        """
357        Do the actual work of applying the changeset to the working copy.
358        """
359
360        needspatchesopt = False
361        if hasattr(changeset, 'darcs_hash'):
362            selector = '--match'
363            revtag = 'hash ' + changeset.darcs_hash
364        elif changeset.revision.startswith('tagged '):
365            selector = '--tag'
366            revtag = changeset.revision[7:]
367        else:
368            selector = '--match'
369            revtag = 'date "%s" && author "%s"' % (
370                changeset.date.strftime("%Y%m%d%H%M%S"),
371                changeset.author)
372            # The 'exact' matcher doesn't groke double quotes:
373            # """currently there is no provision for escaping a double
374            # quote, so you have to choose between matching double
375            # quotes and matching spaces"""
376            if not '"' in changeset.revision:
377                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
378            else:
379                needspatchesopt = True
380
381        cmd = self.repository.command("pull", "--all", "--quiet",
382                                      selector, revtag)
383
384        if needspatchesopt:
385            cmd.extend(['--patches', re.escape(changeset.revision)])
386
387        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
388        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
389
390        if pull.exit_status:
391            raise ChangesetApplicationFailure(
392                "%s returned status %d saying\n%s" %
393                (str(pull), pull.exit_status, output.read()))
394
395        conflicts = []
396        line = output.readline()
397        while line:
398            if line.startswith('We have conflicts in the following files:'):
399                files = output.readline()[:-1].split('./')[1:]
400                self.log.warning("Conflict after 'darcs pull': %s",
401                                 ' '.join(files))
402                conflicts.extend(['./' + f for f in files])
403            line = output.readline()
404
405        cmd = self.repository.command("changes", selector, revtag,
406                                      "--xml-output", "--summ")
407        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
408        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0])
409        try:
410            changeset.entries.extend(last.next().entries)
411        except StopIteration:
412            pass
413
414        return conflicts
415
416    def _handleConflict(self, changeset, conflicts, conflict):
417        """
418        Handle the conflict raised by the application of the upstream changeset.
419
420        Override parent behaviour: with darcs, we need to execute a revert
421        on the conflicted files, **trashing** local changes, but there should
422        be none of them in tailor context.
423        """
424
425        self.log.info("Reverting changes to %s, to solve the conflict",
426                      ' '.join(conflict))
427        cmd = self.repository.command("revert", "--all")
428        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
429        revert.execute(conflict)
430
431    def _checkoutUpstreamRevision(self, revision):
432        """
433        Concretely do the checkout of the upstream revision and return
434        the last applied changeset.
435        """
436
437        from os.path import join, exists
438        from os import mkdir
439        from vcpx.source import InvocationError
440
441        if not self.repository.repository:
442            raise InvocationError("Must specify a the darcs source repository")
443
444        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
445            initial = True
446
447            if revision == 'INITIAL':
448                cmd = self.repository.command("changes", "--xml-output",
449                                              "--repo", self.repository.repository,
450                                               "--reverse")
451                changes = ExternalCommand(command=cmd)
452                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
453
454                if changes.exit_status:
455                    raise ChangesetApplicationFailure(
456                        "%s returned status %d saying\n%s" %
457                        (str(changes), changes.exit_status,
458                         output and output.read() or ''))
459
460                csets = changesets_from_darcschanges(output)
461                changeset = csets.next()
462
463                revision = 'hash %s' % changeset.darcs_hash
464            else:
465                revision = 'hash %s' % revision
466        else:
467            initial = False
468
469        if self.repository.subdir == '.' or exists(self.repository.basedir):
470            # This is currently *very* slow, compared to the darcs get
471            # below!
472            if not exists(join(self.repository.basedir, '_darcs')):
473                if not exists(self.repository.basedir):
474                    mkdir(self.repository.basedir)
475
476                cmd = self.repository.command("initialize")
477                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
478                init.execute()
479
480                if init.exit_status:
481                    raise TargetInitializationFailure(
482                        "%s returned status %s" % (str(init),
483                                                   init.exit_status))
484
485                cmd = self.repository.command("pull", "--all", "--quiet")
486                if revision and revision<>'HEAD':
487                    cmd.extend([initial and "--match" or "--tag", revision])
488                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
489                output = dpull.execute(self.repository.repository,
490                                       stdout=PIPE, stderr=STDOUT)[0]
491
492                if dpull.exit_status:
493                    raise TargetInitializationFailure(
494                        "%s returned status %d saying\n%s" %
495                        (str(dpull), dpull.exit_status, output.read()))
496        else:
497            # Use much faster 'darcs get'
498            cmd = self.repository.command("get", "--quiet")
499            if revision and revision<>'HEAD':
500                cmd.extend([initial and "--to-match" or "--tag", revision])
501            else:
502                cmd.append("--partial")
503            dget = ExternalCommand(command=cmd)
504            output = dget.execute(self.repository.repository, self.repository.basedir,
505                                  stdout=PIPE, stderr=STDOUT)[0]
506
507            if dget.exit_status:
508                raise TargetInitializationFailure(
509                    "%s returned status %d saying\n%s" %
510                    (str(dget), dget.exit_status, output.read()))
511
512        cmd = self.repository.command("changes", "--last", "1",
513                                      "--xml-output")
514        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
515        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
516
517        if changes.exit_status:
518            raise ChangesetApplicationFailure(
519                "%s returned status %d saying\n%s" %
520                (str(changes), changes.exit_status, output.read()))
521
522        last = changesets_from_darcschanges(output)
523
524        return last.next()
Note: See TracBrowser for help on using the repository browser.