source: tailor/vcpx/repository/darcs/source.py @ 1260

Revision 1260, 20.9 KB checked in by lele@…, 7 years ago (diff)

M-x whitespace-cleanup

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        """
50
51        # This should not happen, since the parser feeds us an already built
52        # list of ChangesetEntries, anyway...
53        if not isinstance(entry, ChangesetEntry):
54            return super(DarcsChangeset, self).addEntry(entry, revision)
55
56        # Ok, before adding this entry, check it against already
57        # known: if this is an add, and there's a rename (such as "add
58        # A; rename A B; ") then...
59
60        if entry.action_kind == entry.ADDED:
61            # ... we have to check existings, because of a bug in
62            # darcs: `changes --xml` (as of 1.0.7) emits the changes
63            # in the wrong order, that is, it prefers to start with
64            # renames, *always*, even when they obviously follows the
65            # add of the same entry (even, it should apply this "fix"
66            # by its own).
67            #
68            # So, if there's a rename of this entry there, change that
69            # to an addition instead, and don't insert any other entry
70
71            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
72
73            for i,e in enumerate(self.entries):
74                if e.action_kind == e.RENAMED and e.old_name == entry.name:
75                    # Luckily enough (since removes are the first entries
76                    # in the list, that is) by anticipating the add we
77                    # cure also the case below, when addition follows
78                    # edit.
79                    e.action_kind = e.ADDED
80                    e.old_name = None
81                    return e
82
83                # Assert also that add_dir events must preceeds any
84                # add_file and ren_file that have that dir as target,
85                # and that add_file preceeds any edit.
86
87                if ((e.name == entry.name or e.name.startswith(dirname))
88                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
89                    self.entries.insert(i, entry)
90                    return entry
91
92        # Likewise, if this is a deletion, and there is a rename of
93        # this entry (such as "rename A B; remove B") then turn the
94        # existing rename into a deletion instead.
95
96        # If instead the removed entry was added by the same patch,
97        # annihilate the two: a bug in darcs (possibly fixed in recent
98        # versions) created patches with ADD+EDIT+REMOVE of a single
99        # file (see tailor ticket #71, or darcs issue185). Too bad
100        # another bug (still present in 1.0.8) hides that and makes
101        # very hard (read: impossible) any workaround on the tailor
102        # side. Luckily I learnt another tiny bit of Haskell and
103        # proposed a fix for that: hopefully the patch will be
104        # accepted by darcs developers. In the meantime, I attached it
105        # to ticket #71: without that, tailor does not have enough
106        # information to do the right thing.
107
108        elif entry.action_kind == entry.DELETED:
109            for i,e in enumerate(self.entries):
110                if e.action_kind == e.RENAMED and e.name == entry.name:
111                    e.action_kind = e.DELETED
112                    e.name = e.old_name
113                    e.old_name = None
114                    return e
115                elif e.action_kind == e.ADDED and e.name == entry.name:
116                    del self.entries[i]
117                    return None
118
119        # Ok, it must be either an edit or a rename: the former goes
120        # obviously to the end, and since the latter, as said, come
121        # in very early, appending is just good.
122        self.entries.append(entry)
123        return entry
124
125
126def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
127                                 chunksize=2**15):
128    """
129    Parse XML output of ``darcs changes``.
130
131    Return a list of ``Changeset`` instances.
132
133    Filters out the (currently incorrect) tag info from
134    changesets_from_darcschanges_unsafe.
135    """
136
137    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
138                                                repodir, chunksize)
139    for cs in csets:
140        cs.tags = None
141        yield cs
142
143def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
144                                        chunksize=2**15):
145    """
146    Do the real work of parsing the change log, including tags.
147    Warning: the tag information in the changsets returned by this
148    function are only correct if each darcs tag in the repo depends on
149    all of the patches that precede it.  This is not a valid
150    assumption in general--a tag that does not depend on patch P can
151    be pulled in from another darcs repo after P.  We collect the tag
152    info anyway because DarcsWorkingDir._currentTags() can use it
153    safely despite this problem.  Hopefully the problem will
154    eventually be fixed and this function can be renamed
155    changesets_from_darcschanges.
156    """
157    from xml.sax import make_parser
158    from xml.sax.handler import ContentHandler, ErrorHandler
159    from datetime import datetime
160
161    class DarcsXMLChangesHandler(ContentHandler):
162        def __init__(self):
163            self.changesets = []
164            self.current = None
165            self.current_field = []
166            if unidiff and repodir:
167                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
168                       "--patch", "%(patchname)s"]
169                self.darcsdiff = ExternalCommand(command=cmd)
170            else:
171                self.darcsdiff = None
172
173        def startElement(self, name, attributes):
174            if name == 'patch':
175                self.current = {}
176                self.current['author'] = attributes['author']
177                date = attributes['date']
178                from time import strptime
179                try:
180                    # 20040619130027
181                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
182                except ValueError:
183                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
184                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
185
186                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
187
188                self.current['date'] = timestamp
189                self.current['comment'] = ''
190                self.current['hash'] = attributes['hash']
191                self.current['entries'] = []
192            elif name in ['name', 'comment', 'add_file', 'add_directory',
193                          'modify_file', 'remove_file', 'remove_directory']:
194                self.current_field = []
195            elif name == 'move':
196                self.old_name = attributes['from']
197                self.new_name = attributes['to']
198
199        def endElement(self, name):
200            if name == 'patch':
201                cset = DarcsChangeset(self.current['name'],
202                                      self.current['date'],
203                                      self.current['author'],
204                                      self.current['comment'],
205                                      self.current['entries'],
206                                      tags=self.current.get('tags',[]))
207                cset.darcs_hash = self.current['hash']
208                if self.darcsdiff:
209                    cset.unidiff = self.darcsdiff.execute(
210                        stdout=PIPE, patchname=cset.revision)[0].read()
211
212                self.changesets.append(cset)
213                self.current = None
214            elif name in ['name', 'comment']:
215                val = ''.join(self.current_field)
216                if val[:4] == 'TAG ':
217                    self.current.setdefault('tags',[]).append(val[4:])
218                self.current[name] = val
219            elif name == 'move':
220                entry = ChangesetEntry(self.new_name)
221                entry.action_kind = entry.RENAMED
222                entry.old_name = self.old_name
223                self.current['entries'].append(entry)
224            elif name in ['add_file', 'add_directory', 'modify_file',
225                          'remove_file', 'remove_directory']:
226                entry = ChangesetEntry(''.join(self.current_field).strip())
227                entry.action_kind = { 'add_file': entry.ADDED,
228                                      'add_directory': entry.ADDED,
229                                      'modify_file': entry.UPDATED,
230                                      'remove_file': entry.DELETED,
231                                      'remove_directory': entry.DELETED
232                                    }[name]
233
234                self.current['entries'].append(entry)
235
236        def characters(self, data):
237            self.current_field.append(data)
238
239    parser = make_parser()
240    handler = DarcsXMLChangesHandler()
241    parser.setContentHandler(handler)
242    parser.setErrorHandler(ErrorHandler())
243
244    chunk = changes.read(chunksize)
245    while chunk:
246        parser.feed(chunk)
247        for cs in handler.changesets:
248            yield cs
249        handler.changesets = []
250        chunk = changes.read(chunksize)
251    parser.close()
252    for cs in handler.changesets:
253        yield cs
254
255
256class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
257    """
258    A source working directory under ``darcs``.
259    """
260
261    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
262
263    def _getUpstreamChangesets(self, sincerev):
264        """
265        Do the actual work of fetching the upstream changeset.
266        """
267
268        from datetime import datetime
269        from time import strptime
270        from sha import new
271        from vcpx.changes import Changeset
272
273        cmd = self.repository.command("pull", "--dry-run")
274        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
275        output = pull.execute(self.repository.repository,
276                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
277
278        if pull.exit_status:
279            raise GetUpstreamChangesetsFailure(
280                "%s returned status %d saying\n%s" %
281                (str(pull), pull.exit_status, output.read()))
282
283        l = output.readline()
284        while l and not (l.startswith('Would pull the following changes:') or
285                         l == 'No remote changes to pull in!\n'):
286            l = output.readline()
287
288        if l <> 'No remote changes to pull in!\n':
289            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
290            ##   * Refix _getUpstreamChangesets for darcs
291
292            l = output.readline()
293            while not l.startswith('Making no changes:  this is a dry run.'):
294                # Assume it's a line like
295                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
296                # we used to split on the double space before the email,
297                # but in this case this is wrong. Waiting for xml output,
298                # is it really sane asserting date's length to 28 chars?
299                date = l[:28]
300                author = l[30:-1]
301                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y")
302                date = datetime(y,m,d,hh,mm,ss,0,UTC)
303                l = output.readline()
304                assert (l.startswith('  * ') or
305                        l.startswith('  UNDO:') or
306                        l.startswith('  tagged'))
307
308                if l.startswith('  *'):
309                    name = l[4:-1]
310                else:
311                    name = l[2:-1]
312
313                changelog = []
314                l = output.readline()
315                while l.startswith('  '):
316                    changelog.append(l[2:-1])
317                    l = output.readline()
318
319                cset = Changeset(name, date, author, '\n'.join(changelog))
320                compactdate = date.strftime("%Y%m%d%H%M%S")
321                if name.startswith('UNDO: '):
322                    name = name[6:]
323                    inverted = 't'
324                else:
325                    inverted = 'f'
326                phash = new()
327                phash.update(name)
328                phash.update(author)
329                phash.update(compactdate)
330                phash.update(''.join(changelog))
331                phash.update(inverted)
332                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
333                                                   new(author).hexdigest()[:5],
334                                                   phash.hexdigest())
335
336                if name.startswith('tagged'):
337                    self.log.warning("Skipping tag %s because I don't "
338                                     "propagate tags from darcs.", name)
339                else:
340                    yield cset
341
342                while not l.strip():
343                    l = output.readline()
344
345    def _applyChangeset(self, changeset):
346        """
347        Do the actual work of applying the changeset to the working copy.
348        """
349
350        needspatchesopt = False
351        if hasattr(changeset, 'darcs_hash'):
352            selector = '--match'
353            revtag = 'hash ' + changeset.darcs_hash
354        elif changeset.revision.startswith('tagged '):
355            selector = '--tag'
356            revtag = changeset.revision[7:]
357        else:
358            selector = '--match'
359            revtag = 'date "%s" && author "%s"' % (
360                changeset.date.strftime("%Y%m%d%H%M%S"),
361                changeset.author)
362            # The 'exact' matcher doesn't groke double quotes:
363            # """currently there is no provision for escaping a double
364            # quote, so you have to choose between matching double
365            # quotes and matching spaces"""
366            if not '"' in changeset.revision:
367                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
368            else:
369                needspatchesopt = True
370
371        cmd = self.repository.command("pull", "--all", "--quiet",
372                                      selector, revtag)
373
374        if needspatchesopt:
375            cmd.extend(['--patches', re.escape(changeset.revision)])
376
377        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
378        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
379
380        if pull.exit_status:
381            raise ChangesetApplicationFailure(
382                "%s returned status %d saying\n%s" %
383                (str(pull), pull.exit_status, output.read()))
384
385        conflicts = []
386        line = output.readline()
387        while line:
388            if line.startswith('We have conflicts in the following files:'):
389                files = output.readline()[:-1].split('./')[1:]
390                self.log.warning("Conflict after 'darcs pull': %s",
391                                 ' '.join(files))
392                conflicts.extend(['./' + f for f in files])
393            line = output.readline()
394
395        cmd = self.repository.command("changes", selector, revtag,
396                                      "--xml-output", "--summ")
397        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
398        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0])
399        try:
400            changeset.entries.extend(last.next().entries)
401        except StopIteration:
402            pass
403
404        return conflicts
405
406    def _handleConflict(self, changeset, conflicts, conflict):
407        """
408        Handle the conflict raised by the application of the upstream changeset.
409
410        Override parent behaviour: with darcs, we need to execute a revert
411        on the conflicted files, **trashing** local changes, but there should
412        be none of them in tailor context.
413        """
414
415        self.log.info("Reverting changes to %s, to solve the conflict",
416                      ' '.join(conflict))
417        cmd = self.repository.command("revert", "--all")
418        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
419        revert.execute(conflict)
420
421    def _checkoutUpstreamRevision(self, revision):
422        """
423        Concretely do the checkout of the upstream revision and return
424        the last applied changeset.
425        """
426
427        from os.path import join, exists
428        from os import mkdir
429        from vcpx.source import InvocationError
430
431        if not self.repository.repository:
432            raise InvocationError("Must specify a the darcs source repository")
433
434        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
435            initial = True
436
437            if revision == 'INITIAL':
438                cmd = self.repository.command("changes", "--xml-output",
439                                              "--repo", self.repository.repository,
440                                               "--reverse")
441                changes = ExternalCommand(command=cmd)
442                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
443
444                if changes.exit_status:
445                    raise ChangesetApplicationFailure(
446                        "%s returned status %d saying\n%s" %
447                        (str(changes), changes.exit_status,
448                         output and output.read() or ''))
449
450                csets = changesets_from_darcschanges(output)
451                changeset = csets.next()
452
453                revision = 'hash %s' % changeset.darcs_hash
454            else:
455                revision = 'hash %s' % revision
456        else:
457            initial = False
458
459        if self.repository.subdir == '.' or exists(self.repository.basedir):
460            # This is currently *very* slow, compared to the darcs get
461            # below!
462            if not exists(join(self.repository.basedir, '_darcs')):
463                if not exists(self.repository.basedir):
464                    mkdir(self.repository.basedir)
465
466                cmd = self.repository.command("initialize")
467                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
468                init.execute()
469
470                if init.exit_status:
471                    raise TargetInitializationFailure(
472                        "%s returned status %s" % (str(init),
473                                                   init.exit_status))
474
475                cmd = self.repository.command("pull", "--all", "--quiet")
476                if revision and revision<>'HEAD':
477                    cmd.extend([initial and "--match" or "--tag", revision])
478                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
479                output = dpull.execute(self.repository.repository,
480                                       stdout=PIPE, stderr=STDOUT)[0]
481
482                if dpull.exit_status:
483                    raise TargetInitializationFailure(
484                        "%s returned status %d saying\n%s" %
485                        (str(dpull), dpull.exit_status, output.read()))
486        else:
487            # Use much faster 'darcs get'
488            cmd = self.repository.command("get", "--quiet")
489            if revision and revision<>'HEAD':
490                cmd.extend([initial and "--to-match" or "--tag", revision])
491            else:
492                cmd.append("--partial")
493            dget = ExternalCommand(command=cmd)
494            output = dget.execute(self.repository.repository, self.repository.basedir,
495                                  stdout=PIPE, stderr=STDOUT)[0]
496
497            if dget.exit_status:
498                raise TargetInitializationFailure(
499                    "%s returned status %d saying\n%s" %
500                    (str(dget), dget.exit_status, output.read()))
501
502        cmd = self.repository.command("changes", "--last", "1",
503                                      "--xml-output")
504        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
505        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
506
507        if changes.exit_status:
508            raise ChangesetApplicationFailure(
509                "%s returned status %d saying\n%s" %
510                (str(changes), changes.exit_status, output.read()))
511
512        last = changesets_from_darcschanges(output)
513
514        return last.next()
Note: See TracBrowser for help on using the repository browser.