source: tailor/vcpx/darcs.py @ 887

Revision 887, 24.5 KB checked in by Aaron Kaplan <kaplan@…>, 8 years ago (diff)

Recognize Darcs tags
This complete the preliminary work needed to import tags from cvs
to darcs. In cvs (but not in darcs) it's possible to apply a tag to
only a subset of the files in a project; I only export a tag from cvs
if it's been applied to all (and only) the files that are currently
alive. A couple of caveats:

  • If you go behind tailor's back and use darcs to pull changes into the hybrid repository, and then tailor pulls in a tag from cvs, the tag will be applied to the wrong version. But tailor already didn't deal well with darcs going behind its back, so this patch doesn't really create a new problem.
  • If a tag is applied in cvs and then you sync from darcs->cvs, with no cvs->darcs sync in between, then the tag will never be propagated to darcs. Once again, this is a situation that tailor already didn't handle well--if you have people working simultaneously on both ends, then conflicts can arise, and tailor doesn't deal well with conflicts.
  • CVS allows one to move an already existing tag from one revision to another. If a tag is moved in CVS after the newly-tagged revision has already been imported into darcs, then the change won't be reflected in darcs. However, if a tag is moved to a revision that has not yet been imported into darcs (and this is the more common situation--you create a new version and immediately move the tag to this new version), then the tag will be properly imported.

Some code is in place for syncing tags in the other direction
(darcs->cvs) as well, but it's currently disabled because it can tag
the wrong version under certain circumstances. I hope to fix that soon
and send another patch.

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: vcpx -- Darcs details
3# :Creato:   ven 18 giu 2004 14:45:28 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9This module contains supporting classes for the ``darcs`` versioning system.
10"""
11
12__docformat__ = 'reStructuredText'
13
14from shwrap import ExternalCommand, PIPE, STDOUT
15from source import UpdatableSourceWorkingDir, ChangesetApplicationFailure, \
16     GetUpstreamChangesetsFailure
17from target import SyncronizableTargetWorkingDir, TargetInitializationFailure
18from xml.sax import SAXException
19
20MOTD = """\
21Tailorized equivalent of
22%s
23"""
24
25def changesets_from_darcschanges(changes, unidiff=False, repodir=None):
26    """
27    Parse XML output of ``darcs changes``.
28
29    Return a list of ``Changeset`` instances.
30
31    Filters out the (currently incorrect) tag info from
32    changesets_from_darcschanges_unsafe.
33    """
34
35    csets = changesets_from_darcschanges_unsafe(changes, unidiff,\
36                                                repodir)
37    for cs in csets:
38        cs.tags = None
39    return csets
40
41
42def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None):
43    """
44    Do the real work of parsing the change log, including tags.
45    Warning: the tag information in the changsets returned by this
46    function are only correct if each darcs tag in the repo depends on
47    all of the patches that precede it.  This is not a valid
48    assumption in general--a tag that does not depend on patch P can
49    be pulled in from another darcs repo after P.  We collect the tag
50    info anyway because DarcsWorkingDir._currentTags() can use it
51    safely despite this problem.  Hopefully the problem will
52    eventually be fixed and this function can be renamed
53    changesets_from_darcschanges.
54    """
55    from xml.sax import parse
56    from xml.sax.handler import ContentHandler
57    from changes import ChangesetEntry, Changeset
58    from datetime import datetime
59
60    class DarcsXMLChangesHandler(ContentHandler):
61        def __init__(self):
62            self.changesets = []
63            self.current = None
64            self.current_field = []
65            if unidiff and repodir:
66                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
67                       "--patch", "%(patchname)s"]
68                self.darcsdiff = ExternalCommand(command=cmd)
69            else:
70                self.darcsdiff = None
71
72        def startElement(self, name, attributes):
73            if name == 'patch':
74                self.current = {}
75                self.current['author'] = attributes['author']
76                date = attributes['date']
77                # 20040619130027
78                y = int(date[:4])
79                m = int(date[4:6])
80                d = int(date[6:8])
81                hh = int(date[8:10])
82                mm = int(date[10:12])
83                ss = int(date[12:14])
84                timestamp = datetime(y, m, d, hh, mm, ss)
85                self.current['date'] = timestamp
86                self.current['comment'] = ''
87                self.current['hash'] = attributes['hash']
88                self.current['entries'] = []
89            elif name in ['name', 'comment', 'add_file', 'add_directory',
90                          'modify_file', 'remove_file', 'remove_directory']:
91                self.current_field = []
92            elif name == 'move':
93                self.old_name = attributes['from']
94                self.new_name = attributes['to']
95
96        def endElement(self, name):
97            if name == 'patch':
98                # Sort the paths to make tests easier
99                self.current['entries'].sort(lambda x,y: cmp(x.name, y.name))
100                name = self.current['name']
101                log = self.current['comment']
102                if log:
103                    changelog = name + '\n' + log
104                else:
105                    changelog = name
106                cset = Changeset(name,
107                                 self.current['date'],
108                                 self.current['author'],
109                                 changelog,
110                                 self.current['entries'],
111                                 tags=self.current.get('tags',[]))
112                cset.darcs_hash = self.current['hash']
113                if self.darcsdiff:
114                    cset.unidiff = self.darcsdiff.execute(
115                        stdout=PIPE, patchname=cset.revision)[0].read()
116
117                self.changesets.append(cset)
118                self.current = None
119            elif name in ['name', 'comment']:
120                val = ''.join(self.current_field)
121                if val[:4] == 'TAG ':
122                    self.current.setdefault('tags',[]).append(val[4:])
123                self.current[name] = val
124            elif name == 'move':
125                entry = ChangesetEntry(self.new_name)
126                entry.action_kind = entry.RENAMED
127                entry.old_name = self.old_name
128                self.current['entries'].append(entry)
129            elif name in ['add_file', 'add_directory', 'modify_file',
130                          'remove_file', 'remove_directory']:
131                entry = ChangesetEntry(''.join(self.current_field).strip())
132                entry.action_kind = { 'add_file': entry.ADDED,
133                                      'add_directory': entry.ADDED,
134                                      'modify_file': entry.UPDATED,
135                                      'remove_file': entry.DELETED,
136                                      'remove_directory': entry.DELETED
137                                    }[name]
138
139                self.current['entries'].append(entry)
140
141        def characters(self, data):
142            self.current_field.append(data)
143
144
145    handler = DarcsXMLChangesHandler()
146    parse(changes, handler)
147    changesets = handler.changesets
148
149    # sort changeset by date
150    changesets.sort(lambda x, y: cmp(x.date, y.date))
151
152    return changesets
153
154
155class DarcsWorkingDir(UpdatableSourceWorkingDir,SyncronizableTargetWorkingDir):
156    """
157    A working directory under ``darcs``.
158    """
159
160    ## UpdatableSourceWorkingDir
161
162    def _getUpstreamChangesets(self, sincerev):
163        """
164        Do the actual work of fetching the upstream changeset.
165        """
166
167        from datetime import datetime
168        from time import strptime
169        from changes import Changeset
170        from sha import new
171
172        cmd = self.repository.command("pull", "--dry-run")
173        pull = ExternalCommand(cwd=self.basedir, command=cmd)
174        output = pull.execute(self.repository.repository,
175                              stdout=PIPE, stderr=STDOUT, TZ='UTC')[0]
176
177        if pull.exit_status:
178            raise GetUpstreamChangesetsFailure(
179                "%s returned status %d saying \"%s\"" %
180                (str(pull), pull.exit_status, output.read()))
181
182        l = output.readline()
183        while l and not (l.startswith('Would pull the following changes:') or
184                         l == 'No remote changes to pull in!\n'):
185            l = output.readline()
186
187        changesets = []
188
189        if l <> 'No remote changes to pull in!\n':
190            ## Sat Jul 17 01:22:08 CEST 2004  lele@nautilus
191            ##   * Refix _getUpstreamChangesets for darcs
192
193            l = output.readline()
194            while not l.startswith('Making no changes:  this is a dry run.'):
195                # Assume it's a line like
196                #    Sun Jan  2 00:24:04 UTC 2005  lele@nautilus.homeip.net
197                # we used to split on the double space before the email,
198                # but in this case this is wrong. Waiting for xml output,
199                # is it really sane asserting date's length to 28 chars?
200                date = l[:28]
201                author = l[30:-1]
202                y,m,d,hh,mm,ss,d1,d2,d3 = strptime(date, "%a %b %d %H:%M:%S %Z %Y")
203                date = datetime(y,m,d,hh,mm,ss)
204                l = output.readline()
205                assert (l.startswith('  * ') or
206                        l.startswith('  UNDO:') or
207                        l.startswith('  tagged'))
208
209                if l.startswith('  *'):
210                    name = l[4:-1]
211                else:
212                    name = l[2:-1]
213
214                changelog = []
215                l = output.readline()
216                while l.startswith('  '):
217                    changelog.append(l[2:-1])
218                    l = output.readline()
219
220                cset = Changeset(name, date, author, '\n'.join(changelog))
221                compactdate = date.strftime("%Y%m%d%H%M%S")
222                if name.startswith('UNDO: '):
223                    name = name[6:]
224                    inverted = 't'
225                else:
226                    inverted = 'f'
227                phash = new()
228                phash.update(name)
229                phash.update(author)
230                phash.update(compactdate)
231                phash.update(''.join(changelog))
232                phash.update(inverted)
233                cset.darcs_hash = '%s-%s-%s.gz' % (compactdate,
234                                                   new(author).hexdigest()[:5],
235                                                   phash.hexdigest())
236
237                if name.startswith('tagged'):
238                    print "Warning: skipping tag %s because I don't \
239                    propagate tags from darcs." % name
240                else:
241                    changesets.append(cset)
242
243                while not l.strip():
244                    l = output.readline()
245
246        return changesets
247
248    def _applyChangeset(self, changeset):
249        """
250        Do the actual work of applying the changeset to the working copy.
251        """
252
253        from re import escape
254
255        needspatchesopt = False
256        if hasattr(changeset, 'darcs_hash'):
257            selector = '--match'
258            revtag = 'hash ' + changeset.darcs_hash
259        elif changeset.revision.startswith('tagged '):
260            selector = '--tag'
261            revtag = changeset.revision[7:]
262        else:
263            selector = '--match'
264            revtag = 'date "%s" && author "%s"' % (
265                changeset.date.strftime("%Y%m%d%H%M%S"),
266                changeset.author)
267            # The 'exact' matcher doesn't groke double quotes:
268            # """currently there is no provision for escaping a double
269            # quote, so you have to choose between matching double
270            # quotes and matching spaces"""
271            if not '"' in changeset.revision:
272                revtag += ' && exact "%s"' % changeset.revision.replace('%', '%%')
273            else:
274                needspatchesopt = True
275
276        cmd = self.repository.command("pull", "--all", "--quiet",
277                                      selector, revtag)
278
279        if needspatchesopt:
280            cmd.extend(['--patches', escape(changeset.revision)])
281
282        pull = ExternalCommand(cwd=self.basedir, command=cmd)
283        output = pull.execute(stdout=PIPE, stderr=STDOUT)[0]
284
285        if pull.exit_status:
286            raise ChangesetApplicationFailure(
287                "%s returned status %d saying \"%s\"" %
288                (str(pull), pull.exit_status, output.read()))
289
290        conflicts = []
291        line = output.readline()
292        while line:
293            if line.startswith('We have conflicts in the following files:'):
294                files = output.readline()[:-1].split('./')[1:]
295                self.log_info("Conflict after 'darcs pull': '%s'" %
296                              ' '.join(files))
297                conflicts.extend(['./' + f for f in files])
298            line = output.readline()
299
300        cmd = self.repository.command("changes", selector, revtag,
301                                      "--xml-output", "--summ")
302        changes = ExternalCommand(cwd=self.basedir, command=cmd)
303        last = changesets_from_darcschanges(changes.execute(stdout=PIPE)[0])
304        if last:
305            changeset.entries.extend(last[0].entries)
306
307        return conflicts
308
309    def _handleConflict(self, changeset, conflicts, conflict):
310        """
311        Handle the conflict raised by the application of the upstream changeset.
312
313        Override parent behaviour: with darcs, we need to execute a revert
314        on the conflicted files, **trashing** local changes, but there should
315        be none of them in tailor context.
316        """
317
318        self.log_info("Reverting changes to '%s', to solve the conflict" %
319                      ' '.join(conflict))
320        cmd = self.repository.command("revert", "--all")
321        revert = ExternalCommand(cwd=self.basedir, command=cmd)
322        revert.execute(conflict)
323
324    def _checkoutUpstreamRevision(self, revision):
325        """
326        Concretely do the checkout of the upstream revision and return
327        the last applied changeset.
328        """
329
330        from os.path import join, exists
331        from os import mkdir
332        from re import escape
333
334        if revision == 'INITIAL':
335            initial = True
336            cmd = self.repository.command("changes", "--xml-output",
337                                          "--repo", self.repository.repository)
338            changes = ExternalCommand(command=cmd)
339            output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
340
341            if changes.exit_status:
342                raise ChangesetApplicationFailure(
343                    "%s returned status %d saying \"%s\"" %
344                    (str(changes), changes.exit_status,
345                     output and output.read() or ''))
346
347            csets = changesets_from_darcschanges(output)
348            changeset = csets[0]
349
350            revision = 'hash %s' % changeset.darcs_hash
351        else:
352            initial = False
353
354        if self.repository.subdir == '.':
355            # This is currently *very* slow, compared to the darcs get
356            # below!
357            if not exists(join(self.basedir, '_darcs')):
358                if not exists(self.basedir):
359                    mkdir(self.basedir)
360
361                cmd = self.repository.command("initialize")
362                init = ExternalCommand(cwd=self.basedir, command=cmd)
363                init.execute()
364
365                if init.exit_status:
366                    raise TargetInitializationFailure(
367                        "%s returned status %s" % (str(init),
368                                                   init.exit_status))
369
370                cmd = self.repository.command("pull", "--all", "--quiet")
371                if revision and revision<>'HEAD':
372                    cmd.extend([initial and "--match" or "--tag", revision])
373                dpull = ExternalCommand(cwd=self.basedir, command=cmd)
374                output = dpull.execute(self.repository.repository,
375                                       stdout=PIPE, stderr=STDOUT)[0]
376
377                if dpull.exit_status:
378                    raise TargetInitializationFailure(
379                        "%s returned status %d saying \"%s\"" %
380                        (str(dpull), dpull.exit_status, output.read()))
381        else:
382            # Use much faster 'darcs get'
383            cmd = self.repository.command("get", "--quiet")
384            if revision and revision<>'HEAD':
385                cmd.extend([initial and "--to-match" or "--tag", revision])
386            else:
387                cmd.append("--partial")
388            dget = ExternalCommand(command=cmd)
389            output = dget.execute(self.repository.repository, self.basedir,
390                                  stdout=PIPE, stderr=STDOUT)[0]
391
392            if dget.exit_status:
393                raise TargetInitializationFailure(
394                    "%s returned status %d saying \"%s\"" %
395                    (str(dget), dget.exit_status, output.read()))
396
397        cmd = self.repository.command("changes", "--last", "1",
398                                      "--xml-output")
399        changes = ExternalCommand(cwd=self.basedir, command=cmd)
400        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
401
402        if changes.exit_status:
403            raise ChangesetApplicationFailure(
404                "%s returned status %d saying \"%s\"" %
405                (str(changes), changes.exit_status, output.read()))
406
407        last = changesets_from_darcschanges(output)
408
409        return last[0]
410
411
412    ## SyncronizableTargetWorkingDir
413
414    def _addPathnames(self, names):
415        """
416        Add some new filesystems objects.
417        """
418
419        cmd = self.repository.command("add", "--case-ok", "--not-recursive",
420                                      "--quiet")
421        ExternalCommand(cwd=self.basedir, command=cmd).execute(names)
422
423    def _addSubtree(self, subdir):
424        """
425        Use the --recursive variant of ``darcs add`` to add a subtree.
426        """
427
428        cmd = self.repository.command("add", "--case-ok", "--recursive",
429                                      "--quiet")
430        ExternalCommand(cwd=self.basedir, command=cmd).execute(subdir)
431
432    def _commit(self, date, author, patchname, changelog=None, entries=None):
433        """
434        Commit the changeset.
435        """
436
437        logmessage = []
438
439        logmessage.append(date.strftime('%Y/%m/%d %H:%M:%S UTC'))
440        logmessage.append(author)
441        if patchname:
442            logmessage.append(patchname)
443        if changelog:
444            logmessage.append(changelog)
445        if not patchname and not changelog:
446            logmessage.append('Unnamed patch')
447
448        cmd = self.repository.command("record", "--all", "--pipe")
449        if not entries:
450            entries = ['.']
451
452        record = ExternalCommand(cwd=self.basedir, command=cmd)
453        record.execute(input='\n'.join(logmessage))
454
455        if record.exit_status:
456            raise ChangesetApplicationFailure(
457                "%s returned status %d" % (str(record), record.exit_status))
458
459    def _removePathnames(self, names):
460        """
461        Remove some filesystem object.
462        """
463
464        from os.path import join, exists
465
466        # darcs raises status 512 when it does not finding the entry,
467        # removed by source. Since sometime a directory is left there
468        # because it's not empty, darcs fails. So, do an explicit
469        # remove on items that are still there.
470
471        c = ExternalCommand(cwd=self.basedir,
472                            command=self.repository.command("remove"))
473        c.execute([n for n in names if exists(join(self.basedir, n))])
474
475
476    def _renamePathname(self, oldname, newname):
477        """
478        Rename a filesystem object.
479        """
480
481        from os.path import join, exists
482        from os import rename
483
484        # Check to see if the oldentry is still there. If it is,
485        # that probably means one thing: it's been moved and then
486        # replaced, see svn 'R' event. In this case, rename the
487        # existing old entry to something else to trick "darcs mv"
488        # (that will assume the move was already done manually) and
489        # finally restore its name.
490
491        absold = join(self.basedir, oldname)
492        renamed = exists(absold)
493        if renamed:
494            rename(absold, absold + '-TAILOR-HACKED-TEMP-NAME')
495
496        try:
497            cmd = self.repository.command("mv")
498            ExternalCommand(cwd=self.basedir, command=cmd).execute(oldname,
499                                                                   newname)
500        finally:
501            if renamed:
502                rename(absold + '-TAILOR-HACKED-TEMP-NAME', absold)
503
504    def _prepareTargetRepository(self):
505        """
506        Create the base directory if it doesn't exist, and execute
507        ``darcs initialize`` if needed.
508        """
509
510        from os.path import join, exists
511        from re import escape, compile
512        from dualwd import IGNORED_METADIRS
513
514        if not exists(join(self.basedir, self.repository.METADIR)):
515            cmd = self.repository.command("initialize")
516            init = ExternalCommand(cwd=self.basedir, command=cmd)
517            init.execute()
518
519            if init.exit_status:
520                raise TargetInitializationFailure(
521                    "%s returned status %s" % (str(init), init.exit_status))
522
523            boring = open(join(self.basedir, '_darcs/prefs/boring'), 'rU')
524            ignored = boring.read().split('\n')
525            boring.close()
526
527            # Augment the boring file, that contains a regexp per line
528            # with all known VCs metadirs to be skipped.
529            ignored.extend(['(^|/)%s($|/)' % escape(md)
530                            for md in IGNORED_METADIRS])
531
532            # Eventually omit our own log...
533            logfile = self.repository.project.logfile
534            if logfile.startswith(self.basedir):
535                ignored.append('^%s$' %
536                               escape(logfile[len(self.basedir)+1:]))
537
538            # ... and state file
539            sfname = self.repository.project.state_file.filename
540            if sfname.startswith(self.basedir):
541                sfrelname = sfname[len(self.basedir)+1:]
542                ignored.append('^%s$' % escape(sfrelname))
543                ignored.append('^%s$' % escape(sfrelname+'.journal'))
544
545            boring = open(join(self.basedir, '_darcs/prefs/boring'), 'wU')
546            boring.write('\n'.join(ignored))
547            boring.close()
548        else:
549            boring = open(join(self.basedir, '_darcs/prefs/boring'), 'rU')
550            ignored = boring.read().split('\n')
551            boring.close()
552
553        # Build a list of compiled regular expressions, that will be
554        # used later to filter the entries.
555        self.__unwanted_entries = [compile(rx) for rx in ignored
556                                   if rx and not rx.startswith('#')]
557
558    def _prepareWorkingDirectory(self, source_repo):
559        """
560        Tweak the default settings of the repository.
561        """
562
563        from os.path import join
564
565        motd = open(join(self.basedir, '_darcs/prefs/motd'), 'w')
566        motd.write(MOTD % str(source_repo))
567        motd.close()
568
569    def _adaptEntries(self, changeset):
570        """
571        Filter out boring files.
572        """
573
574        from copy import copy
575
576        adapted = SyncronizableTargetWorkingDir._adaptEntries(self, changeset)
577
578        # If there are no entries or no rules, there's nothing to do
579        if not adapted or not adapted.entries or not self.__unwanted_entries:
580            return adapted
581
582        entries = []
583        skipped = False
584        for e in adapted.entries:
585            skip = False
586            for rx in self.__unwanted_entries:
587                if rx.search(e.name):
588                    skip = True
589                    break
590            if skip:
591                self.log_info('Entry %r skipped per boring rules' %
592                              e.name)
593                skipped = True
594            else:
595                entries.append(e)
596
597        # All entries are gone, don't commit this changeset
598        if not entries:
599            self.log_info('All entries ignored, skipping whole '
600                          'changeset %r' % changeset.revision)
601            return None
602
603        if skipped:
604            adapted = copy(adapted)
605            adapted.entries = entries
606
607        return adapted
608
609    def _tag(self, tag):
610        """
611        Apply the given tag to the repository, unless it has already
612        been applied to the current state. (If it has been applied to
613        an earlier state, do apply it; the later tag overrides the
614        earlier one.
615        """
616        if tag not in self._currentTags():
617            cmd = self.repository.command("tag", "--author", "Unknown tagger")
618            ExternalCommand(cwd=self.basedir, command=cmd).execute(tag)
619
620    def _currentTags(self):
621        """
622        Return a list of tags that refer to the repository's current
623        state.  Does not consider tags themselves to be part of the
624        state, so if the repo was tagged with T1 and then T2, then
625        both T1 and T2 are considered to refer to the current state,
626        even though 'darcs get --tag=T1' and 'darcs get --tag=T2'
627        would have different results (the latter creates a repo that
628        contains tag T2, but the former does not).
629
630        This function assumes that a tag depends on all patches that
631        precede it in the "darcs changes" list.  This assumption is
632        valid if tags only come into the repository via tailor; if the
633        user applies a tag by hand in the hybrid repository, or pulls
634        in a tag from another darcs repository, then the assumption
635        could be violated and mistagging could result.
636        """
637        cmd = self.repository.command("changes", "--from-match=not name ^TAG",\
638                                      "--xml-output")
639        changes =  ExternalCommand(cwd=self.basedir, command=cmd)
640        output = changes.execute(stdout=PIPE, stderr=STDOUT)[0]
641        if changes.exit_status:
642            raise ChangesetApplicationFailure(
643                "%s returned status %d saying \"%s\"" %
644                (str(changes), changes.exit_status, output.read()))
645
646        tags = []
647        for cs in changesets_from_darcschanges_unsafe(output):
648            for tag in cs.tags:
649                if tag not in tags:
650                    tags.append(tag)
651        return tags
Note: See TracBrowser for help on using the repository browser.