source: tailor/vcpx/repository/darcs/source.py @ 1475

Revision 1475, 23.2 KB checked in by Miklos Vajna <vmiklos@…>, 5 years ago (diff)

darcs source: don't fail on empty author

unfortunately darcs allows recording a patch with no author info, so we
should not fail in that case.

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: Tailor -- Darcs peculiarities when used as a source
3# :Creato:   lun 10 lug 2006 00:04:59 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains the source specific bits of the darcs backend.
10"""
11
12__docformat__ = 'reStructuredText'
13
14import re
15
16from vcpx.changes import ChangesetEntry, Changeset
17from vcpx.shwrap import ExternalCommand, PIPE, STDOUT
18from vcpx.source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
19                        GetUpstreamChangesetsFailure
20from vcpx.target import TargetInitializationFailure
21from vcpx.tzinfo import UTC
22
23
24class DarcsChangeset(Changeset):
25    """
26    Fixup darcs idiosyncrasies:
27
28    - collapse "add A; rename A B" into "add B"
29    - collapse "rename A B; remove B" into "remove A"
30    """
31
32    def __init__(self, revision, date, author, log, entries=None, **other):
33        """
34        Initialize a new DarcsChangeset.
35        """
36
37        super(DarcsChangeset, self).__init__(revision, date, author, log, entries=None, **other)
38        if entries is not None:
39            for e in entries:
40                self.addEntry(e, revision)
41
42    def addEntry(self, entry, revision):
43        """
44        Fixup darcs idiosyncrasies:
45
46        - collapse "add A; rename A B" into "add B"
47        - annihilate "add A; remove A"
48        - collapse "rename A B; remove B" into "remove A"
49        - collapse "rename A B; rename B C" into "rename A C"
50        """
51
52        # This should not happen, since the parser feeds us an already built
53        # list of ChangesetEntries, anyway...
54        if not isinstance(entry, ChangesetEntry):
55            return super(DarcsChangeset, self).addEntry(entry, revision)
56
57        # Ok, before adding this entry, check it against already
58        # known: if this is an add, and there's a rename (such as "add
59        # A; rename A B; ") then...
60
61        if entry.action_kind == entry.ADDED:
62            # ... we have to check existings, because of a bug in
63            # darcs: `changes --xml` (as of 1.0.7) emits the changes
64            # in the wrong order, that is, it prefers to start with
65            # renames, *always*, even when they obviously follows the
66            # add of the same entry (even, it should apply this "fix"
67            # by its own).
68            #
69            # So, if there's a rename of this entry there, change that
70            # to an addition instead, and don't insert any other entry
71
72            dirname = entry.name+'/' # darcs hopefully use forward slashes also under win
73
74            for i,e in enumerate(self.entries):
75                if e.action_kind == e.RENAMED and e.old_name == entry.name:
76                    # Unfortunately we have to check if the order if
77                    # messed up, in that case we should not do anything.
78                    # Example: mv a a2; mkdir a; mv a2 a/b
79                    skip = False
80                    for j in self.entries:
81                        if j.action_kind == j.RENAMED and j.name.startswith(dirname):
82                            skip = True
83                            break
84                    # Luckily enough (since removes are the first entries
85                    # in the list, that is) by anticipating the add we
86                    # cure also the case below, when addition follows
87                    # edit.
88                    if not skip:
89                        e.action_kind = e.ADDED
90                        e.old_name = None
91                        return e
92
93                # Assert also that add_dir events must preceeds any
94                # add_file and ren_file that have that dir as target,
95                # and that add_file preceeds any edit.
96
97                if ((e.name == entry.name or e.name.startswith(dirname))
98                    or (e.action_kind == e.RENAMED and e.old_name.startswith(dirname))):
99                    self.entries.insert(i, entry)
100                    return entry
101
102        # Likewise, if this is a deletion, and there is a rename of
103        # this entry (such as "rename A B; remove B") then turn the
104        # existing rename into a deletion instead.
105
106        # If instead the removed entry was added by the same patch,
107        # annihilate the two: a bug in darcs (possibly fixed in recent
108        # versions) created patches with ADD+EDIT+REMOVE of a single
109        # file (see tailor ticket #71, or darcs issue185). Too bad
110        # another bug (still present in 1.0.8) hides that and makes
111        # very hard (read: impossible) any workaround on the tailor
112        # side. Luckily I learnt another tiny bit of Haskell and
113        # proposed a fix for that: hopefully the patch will be
114        # accepted by darcs developers. In the meantime, I attached it
115        # to ticket #71: without that, tailor does not have enough
116        # information to do the right thing.
117
118        elif entry.action_kind == entry.DELETED:
119            for i,e in enumerate(self.entries):
120                if e.action_kind == e.RENAMED and e.name == entry.name:
121                    e.action_kind = e.DELETED
122                    e.name = e.old_name
123                    e.old_name = None
124                    return e
125                elif e.action_kind == e.ADDED and e.name == entry.name:
126                    del self.entries[i]
127                    return None
128
129        # The "rename A B; rename B C" to "rename A C" part
130        elif entry.action_kind == entry.RENAMED:
131            for i in self.entries:
132                if i.action_kind == i.RENAMED and i.name == entry.old_name:
133                    i.name = entry.name
134                    return i
135
136        # Ok, it must be either an edit or a rename: the former goes
137        # obviously to the end, and since the latter, as said, come
138        # in very early, appending is just good.
139        self.entries.append(entry)
140        return entry
141
142
143def changesets_from_darcschanges(changes, unidiff=False, repodir=None,
144                                 chunksize=2**15, replace_badchars=None):
145    """
146    Parse XML output of ``darcs changes``.
147
148    Return a list of ``Changeset`` instances.
149
150    Filters out the (currently incorrect) tag info from
151    changesets_from_darcschanges_unsafe.
152    """
153
154    csets = changesets_from_darcschanges_unsafe(changes, unidiff,
155                                                repodir, chunksize,
156                                                replace_badchars)
157    for cs in csets:
158        yield cs
159
160def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
161                                        chunksize=2**15, replace_badchars=None):
162    """
163    Do the real work of parsing the change log, including tags.
164    Warning: the tag information in the changsets returned by this
165    function are only correct if each darcs tag in the repo depends on
166    all of the patches that precede it.  This is not a valid
167    assumption in general--a tag that does not depend on patch P can
168    be pulled in from another darcs repo after P.  We collect the tag
169    info anyway because DarcsWorkingDir._currentTags() can use it
170    safely despite this problem.  Hopefully the problem will
171    eventually be fixed and this function can be renamed
172    changesets_from_darcschanges.
173    """
174    from xml.sax import make_parser
175    from xml.sax.handler import ContentHandler, ErrorHandler
176    from datetime import datetime
177
178    class DarcsXMLChangesHandler(ContentHandler):
179        def __init__(self):
180            self.changesets = []
181            self.current = None
182            self.current_field = []
183            if unidiff and repodir:
184                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
185                       "--patch", "%(patchname)s"]
186                self.darcsdiff = ExternalCommand(command=cmd)
187            else:
188                self.darcsdiff = None
189
190        def startElement(self, name, attributes):
191            if name == 'patch':
192                self.current = {}
193                self.current['author'] = attributes['author']
194                date = attributes['date']
195                from time import strptime
196                try:
197                    # 20040619130027
198                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
199                except ValueError:
200                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
201                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
202
203                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well
204
205                self.current['date'] = timestamp
206                self.current['comment'] = ''
207                self.current['hash'] = attributes['hash']
208                self.current['entries'] = []
209                self.inverted = (attributes['inverted'] == "True")
210            elif name in ['name', 'comment', 'add_file', 'add_directory',
211                          'modify_file', 'remove_file', 'remove_directory']:
212                self.current_field = []
213            elif name == 'move':
214                self.old_name = attributes['from']
215                self.new_name = attributes['to']
216
217        def endElement(self, name):
218            if name == 'patch':
219                cset = DarcsChangeset(self.current['name'],
220                                      self.current['date'],
221                                      self.current['author'],
222                                      self.current['comment'],
223                                      self.current['entries'],
224                                      tags=self.current.get('tags',[]))
225                cset.darcs_hash = self.current['hash']
226                if self.darcsdiff:
227                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
228                        stdout=PIPE, patchname=cset.revision)[0].read()
229
230                self.changesets.append(cset)
231                self.current = None
232            elif name in ['name', 'comment']:
233                val = ''.join(self.current_field)
234                if val[:4] == 'TAG ':
235                    self.current.setdefault('tags',[]).append(val[4:])
236                self.current[name] = val
237            elif name == 'move':
238                entry = ChangesetEntry(self.new_name)
239                entry.action_kind = entry.RENAMED
240                entry.old_name = self.old_name
241                self.current['entries'].append(entry)
242            elif name in ['add_file', 'add_directory', 'modify_file',
243                          'remove_file', 'remove_directory']:
244                current_field = ''.join(self.current_field).strip()
245                if self.inverted:
246                    # the filenames in file modifications are outdated
247                    # if there are renames
248                    for i in self.current['entries']:
249                        if i.action_kind == i.RENAMED and current_field.startswith(i.old_name):
250                            current_field = current_field.replace(i.old_name, i.name)
251                entry = ChangesetEntry(current_field)
252                entry.action_kind = { 'add_file': entry.ADDED,
253                                      'add_directory': entry.ADDED,
254                                      'modify_file': entry.UPDATED,
255                                      'remove_file': entry.DELETED,
256                                      'remove_directory': entry.DELETED
257                                    }[name]
258
259                self.current['entries'].append(entry)
260
261        def characters(self, data):
262            self.current_field.append(data)
263
264    parser = make_parser()
265    handler = DarcsXMLChangesHandler()
266    parser.setContentHandler(handler)
267    parser.setErrorHandler(ErrorHandler())
268
269    def fixup_badchars(s, map):
270        if not map:
271            return s
272
273        ret = [map.get(c, c) for c in s]
274        return "".join(ret)
275
276    chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
277    while chunk:
278        parser.feed(chunk)
279        for cs in handler.changesets:
280            yield cs
281        handler.changesets = []
282        chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
283    parser.close()
284    for cs in handler.changesets:
285        yield cs
286
287
288class DarcsSourceWorkingDir(UpdatableSourceWorkingDir):
289    """
290    A source working directory under ``darcs``.
291    """
292
293    is_hash_rx = re.compile('[0-9a-f]{14}-[0-9a-f]{5}-[0-9a-f]{40}\.gz')
294
295    def _getUpstreamChangesets(self, sincerev):
296        """
297        Do the actual work of fetching the upstream changeset.
298        """
299
300        cmd = self.repository.command("pull", "--dry-run")
301        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
302        output = pull.execute(self.repository.repository,
303                              stdout=PIPE, stderr=STDOUT, TZ='UTC0')[0]
304
305        if pull.exit_status:
306            raise GetUpstreamChangesetsFailure(
307                "%s returned status %d saying\n%s" %
308                (str(pull), pull.exit_status, output.read()))
309
310        return self._parseDarcsPull(output)
311
312
313    def _parseDarcsPull(self, output):
314        """Process 'darcs pull' output to Changesets.
315        """
316        from datetime import datetime
317        from time import strptime
318        from sha import new
319        from vcpx.changes import Changeset
320
321        l = output.readline()
322        while l and not (l.startswith('Would pull the following changes:') or
323                         l == 'No remote changes to pull in!\n'):
324            l = output.readline()
325
326        if l <> 'No remote changes to pull in!\n':
327            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
328            ##   * Refix _getUpstreamChangesets for darcs
329
330            fsep = re.compile('[ :]+')
331            l = output.readline()
332            while not l.startswith('Making no changes:  this is a dry run.'):
333                # Assume it's a line like
334                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
335                # Use a regular expression matching multiple spaces or colons
336                # to split it, and use the first 7 fields to build up a datetime.
337                pieces = fsep.split(l.rstrip(), 8)
338                assert len(pieces)>=7, "Cannot parse %r as a patch timestamp" % l
339                date = ' '.join(pieces[:8])
340                try:
341                    author = pieces[8]
342                except IndexError, s:
343                    # darcs allows patches with empty author
344                    author = ""
345                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H %M %S %Z %Y")
346                date = datetime(y,m,d,hh,mm,ss,0,UTC)
347                l = output.readline().rstrip()
348                assert (l.startswith('  *') or
349                        l.startswith('  UNDO:') or
350                        l.startswith('  tagged')), \
351                        "Got %r but expected the start of the log" % l
352
353                if l.startswith('  *'):
354                    name = l[4:]
355                else:
356                    name = l[2:]
357
358                changelog = []
359                l = output.readline()
360                while l.startswith('  '):
361                    changelog.append(l[2:-1])
362                    l = output.readline()
363
364                cset = Changeset(name, date, author, '\n'.join(changelog))
365                compactdate = date.strftime("%Y%m%d%H%M%S")
366                if name.startswith('UNDO: '):
367                    name = name[6:]
368                    inverted = 't'
369                else:
370                    inverted = 'f'
371
372                if name.startswith('tagged '):
373                    name = name[7:]
374                    if cset.tags is None:
375                        cset.tags = [name]
376                    else:
377                        cset.tags.append(name)
378                    name = "TAG " + name
379
380                phash = new()
381                phash.update(name)
382                phash.update(author)
383                phash.update(compactdate)
384                phash.update(''.join(changelog))
385                phash.update(inverted)
386                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
387                                                   new(author).hexdigest()[:5],
388                                                   phash.hexdigest())
389
390
391                yield cset
392
393                while not l.strip():
394                    l = output.readline()
395
396    def _applyChangeset(self, changeset):
397        """
398        Do the actual work of applying the changeset to the working copy.
399        """
400
401        needspatchesopt = False
402        if hasattr(changeset, 'darcs_hash'):
403            selector = '--match'
404            revtag = 'hash ' + changeset.darcs_hash
405        elif changeset.revision.startswith('tagged '):
406            selector = '--tag'
407            revtag = changeset.revision[7:]
408        else:
409            selector = '--match'
410            revtag = 'date "%s" && author "%s"' % (
411                changeset.date.strftime("%Y%m%d%H%M%S"),
412                changeset.author)
413            # The 'exact' matcher doesn't groke double quotes:
414            # """currently there is no provision for escaping a double
415            # quote, so you have to choose between matching double
416            # quotes and matching spaces"""
417            if not '"' in changeset.revision:
418                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
419            else:
420                needspatchesopt = True
421
422        cmd = self.repository.command("pull", "--all", "--quiet",
423                                      selector, revtag)
424
425        if needspatchesopt:
426            cmd.extend(['--patches', re.escape(changeset.revision)])
427
428        pull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
429        output = pull.execute(stdout=PIPE, stderr=STDOUT, input='y')[0]
430
431        if pull.exit_status:
432            raise ChangesetApplicationFailure(
433                "%s returned status %d saying\n%s" %
434                (str(pull), pull.exit_status, output.read()))
435
436        conflicts = []
437        line = output.readline()
438        while line:
439            if line.startswith('We have conflicts in the following files:'):
440                files = output.readline()[:-1].split(' ')
441                self.log.warning("Conflict after 'darcs pull': %s",
442                                 ' '.join(files))
443                conflicts.extend(files)
444            line = output.readline()
445
446        cmd = self.repository.command("changes", selector, revtag,
447                                      "--xml-output", "--summ")
448        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
449        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0],
450                                            replace_badchars=self.repository.replace_badchars)
451        try:
452            changeset.entries.extend(last.next().entries)
453        except StopIteration:
454            pass
455
456        return conflicts
457
458    def _handleConflict(self, changeset, conflicts, conflict):
459        """
460        Handle the conflict raised by the application of the upstream changeset.
461
462        Override parent behaviour: with darcs, we need to execute a revert
463        on the conflicted files, **trashing** local changes, but there should
464        be none of them in tailor context.
465        """
466
467        self.log.info("Reverting changes to %s, to solve the conflict",
468                      ' '.join(conflict))
469        cmd = self.repository.command("revert", "--all")
470        revert = ExternalCommand(cwd=self.repository.basedir, command=cmd)
471        revert.execute(conflict)
472
473    def _checkoutUpstreamRevision(self, revision):
474        """
475        Concretely do the checkout of the upstream revision and return
476        the last applied changeset.
477        """
478
479        from os.path import join, exists
480        from os import mkdir
481        from vcpx.source import InvocationError
482
483        if not self.repository.repository:
484            raise InvocationError("Must specify a the darcs source repository")
485
486        if revision == 'INITIAL' or self.is_hash_rx.match(revision):
487            initial = True
488
489            if revision == 'INITIAL':
490                cmd = self.repository.command("changes", "--xml-output",
491                                              "--repo", self.repository.repository,
492                                               "--reverse")
493                changes = ExternalCommand(command=cmd)
494                output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
495
496                if changes.exit_status:
497                    raise ChangesetApplicationFailure(
498                        "%s returned status %d saying\n%s" %
499                        (str(changes), changes.exit_status,
500                         output and output.read() or ''))
501
502                csets = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
503                changeset = csets.next()
504
505                revision = 'hash %s' % changeset.darcs_hash
506            else:
507                revision = 'hash %s' % revision
508        else:
509            initial = False
510
511        if self.repository.subdir == '.' or exists(self.repository.basedir):
512            # This is currently *very* slow, compared to the darcs get
513            # below!
514            if not exists(join(self.repository.basedir, '_darcs')):
515                if not exists(self.repository.basedir):
516                    mkdir(self.repository.basedir)
517
518                cmd = self.repository.command("initialize")
519                init = ExternalCommand(cwd=self.repository.basedir, command=cmd)
520                init.execute()
521
522                if init.exit_status:
523                    raise TargetInitializationFailure(
524                        "%s returned status %s" % (str(init),
525                                                   init.exit_status))
526
527                cmd = self.repository.command("pull", "--all", "--quiet")
528                if revision and revision<>'HEAD':
529                    cmd.extend([initial and "--match" or "--tag", revision])
530                dpull = ExternalCommand(cwd=self.repository.basedir, command=cmd)
531                output = dpull.execute(self.repository.repository,
532                                       stdout=PIPE, stderr=STDOUT)[0]
533
534                if dpull.exit_status:
535                    raise TargetInitializationFailure(
536                        "%s returned status %d saying\n%s" %
537                        (str(dpull), dpull.exit_status, output.read()))
538        else:
539            # Use much faster 'darcs get'
540            cmd = self.repository.command("get", "--quiet")
541            if revision and revision<>'HEAD':
542                cmd.extend([initial and "--to-match" or "--tag", revision])
543            else:
544                cmd.append("--partial")
545            dget = ExternalCommand(command=cmd)
546            output = dget.execute(self.repository.repository, self.repository.basedir,
547                                  stdout=PIPE, stderr=STDOUT)[0]
548
549            if dget.exit_status:
550                raise TargetInitializationFailure(
551                    "%s returned status %d saying\n%s" %
552                    (str(dget), dget.exit_status, output.read()))
553
554        cmd = self.repository.command("changes", "--last", "1",
555                                      "--xml-output")
556        changes = ExternalCommand(cwd=self.repository.basedir, command=cmd)
557        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
558
559        if changes.exit_status:
560            raise ChangesetApplicationFailure(
561                "%s returned status %d saying\n%s" %
562                (str(changes), changes.exit_status, output.read()))
563
564        last = changesets_from_darcschanges(output, replace_badchars=self.repository.replace_badchars)
565
566        return last.next()
Note: See TracBrowser for help on using the repository browser.