source: tailor/vcpx/cvs.py @ 793

Revision 793, 15.5 KB checked in by lele@…, 8 years ago (diff)

Be tolerant on multiple spaces between branch and INITIAL

Line 
1# -*- mode: python; coding: utf-8 -*-
2# :Progetto: vcpx -- Pure CVS solution
3# :Creato:   dom 11 lug 2004 01:59:36 CEST
4# :Autore:   Lele Gaifax <lele@nautilus.homeip.net>
5# :Licenza:  GNU General Public License
6#
7
8"""
9Given `cvsps` shortcomings, this backend uses CVS only.
10"""
11
12__docformat__ = 'reStructuredText'
13
14from shwrap import ExternalCommand, STDOUT, PIPE
15from cvsps import CvspsWorkingDir
16from source import GetUpstreamChangesetsFailure
17
18def compare_cvs_revs(rev1, rev2):
19    """Compare two CVS revision numerically, not alphabetically."""
20
21    if not rev1: rev1 = '0'
22    if not rev2: rev2 = '0'
23
24    # handle locked files by taking only the first part of the revision string
25    # this is to handle gracefully lines like "1.1 locked" which were breaking the script before
26    rev1 = rev1.split(' ')[0]
27    rev2 = rev2.split(' ')[0]
28    r1 = [int(n) for n in rev1.split('.')]
29    r2 = [int(n) for n in rev2.split('.')]
30
31    return cmp(r1, r2)
32
33
34def changesets_from_cvslog(log, module):
35    """
36    Parse CVS log.
37    """
38
39    from datetime import timedelta
40
41    collected = ChangeSetCollector(log, module)
42    collapsed = []
43
44    threshold = timedelta(seconds=180)
45    last = None
46
47    for cs in collected:
48        if (last and last.author == cs.author and  last.log == cs.log and
49            abs(lastts - cs.date) < threshold and
50            not [e for e in cs.entries
51                 if e.name in [n.name for n in last.entries]]):
52            last.entries.extend(cs.entries)
53            if lastts < cs.date:
54                lastts = cs.date
55        else:
56            if last:
57                last.date = lastts
58            last = cs
59            lastts = cs.date
60            collapsed.append(cs)
61
62    return collapsed
63
64
65def _getGlobalCVSRevision(timestamp, author):
66    """
67    CVS does not have the notion of a repository-wide revision number,
68    since it tracks just single files.
69
70    Here we could "count" the grouped changesets ala `cvsps`,
71    but that's tricky because of branches.  Since right now there
72    is nothing that depends on this being a number, not to mention
73    a *serial* number, simply emit a (hopefully) unique signature...
74    """
75
76    return "%s by %s" % (timestamp, author)
77
78def _splitGlobalCVSRevision(revision):
79    """
80    Split what _getGlobalCVSRevision() returns into the two components.
81    """
82
83    assert ' by ' in revision, "Simple revision found, expected 'timestamp by author'"
84    return revision.split(' by ')
85
86class ChangeSetCollector(object):
87    """Collector of the applied change sets."""
88
89    # Some string constants we look for in CVS output.
90    intra_sep = '-' * 28 + '\n'
91    inter_sep = '=' * 77 + '\n'
92
93    def __init__(self, log, module):
94        """
95        Initialize a ChangeSetCollector instance.
96
97        Loop over the modified entries and collect their logs.
98        """
99
100        self.changesets = {}
101        """The dictionary mapping (date, author, log) to each entry."""
102
103        self.log = log
104        """The log to be parsed."""
105
106        self.module = module
107        """The CVS module name."""
108
109        self.__parseCvsLog()
110
111    def __iter__(self):
112        keys = self.changesets.keys()
113        keys.sort()
114        return iter([self.changesets[k] for k in keys])
115
116    def __collect(self, timestamp, author, changelog, entry, revision):
117        """Register a change set about an entry."""
118
119        from changes import Changeset
120
121        key = (timestamp, author, changelog)
122        if self.changesets.has_key(key):
123            return self.changesets[key].addEntry(entry, revision)
124        else:
125            cs = Changeset(_getGlobalCVSRevision(timestamp, author),
126                           timestamp, author, changelog)
127            self.changesets[key] = cs
128            return cs.addEntry(entry, revision)
129
130    def __readline(self):
131        """
132        Read a line from the log, intercepting the directory being listed.
133
134        This is used to determine the pathname of each entry, relative to
135        the root of the working copy.
136        """
137
138        l = self.log.readline()
139        while l.startswith('cvs rlog: Logging '):
140            currentdir = l[18:-1]
141            # If the directory starts with the module name, keep
142            # just the remaining part
143            if currentdir.startswith(self.module):
144                self.__currentdir = currentdir[len(self.module)+1:]
145            else:
146                # strip away first component, the name of the product
147                slash = currentdir.find('/')
148                if slash >= 0:
149                    self.__currentdir = currentdir[slash+1:]
150                else:
151                    self.__currentdir = ''
152            l = self.log.readline()
153
154        return l
155
156    def __parseRevision(self, entry):
157        """
158        Parse a single revision log, extracting the needed information.
159
160        Return None when there are no more logs to be parsed,
161        otherwise a tuple with the relevant data.
162        """
163
164        from datetime import datetime
165
166        revision = self.__readline()
167        if not revision or not revision.startswith('revision '):
168            return None
169        # Don't just knock off the leading 'revision ' here.
170        # There may be locks, in which case we get output like:
171        # 'revision 1.4    locked by: mem;'.
172        rev = revision[:-1].split(' ')[1]
173
174        infoline = self.__readline()
175
176        info = infoline.split(';')
177
178        assert info[0][:6] == 'date: ', infoline
179
180        # 2004-04-19 14:45:42 +0000, the timezone may be missing
181        dateparts = info[0][6:].split(' ')
182        assert len(dateparts) >= 2, `dateparts`
183
184        day = dateparts[0]
185        time = dateparts[1]
186        y,m,d = map(int, day.split(day[4]))
187        hh,mm,ss = map(int, time.split(':'))
188        date = datetime(y,m,d,hh,mm,ss)
189
190        assert info[1].strip()[:8] == 'author: ', infoline
191
192        author = info[1].strip()[8:]
193
194        assert info[2].strip()[:7] == 'state: ', infoline
195
196        state = info[2].strip()[7:]
197
198        # Fourth element, if present and like "lines +x -y", indicates
199        # this is a change to an existing file. Otherwise its a new
200        # one.
201
202        newentry = not info[3].strip().startswith('lines: ')
203
204        # The next line may be either the first of the changelog or a
205        # continuation (?) of the preceeding info line with the
206        # "branches"
207
208        l = self.__readline()
209        if l.startswith('branches: ') and l.endswith(';\n'):
210            infoline = infoline[:-1] + ';' + l
211            # read the effective first line of log
212            l = self.__readline()
213
214        mesg = []
215        while l not in (None, '', self.inter_sep, self.intra_sep):
216            mesg.append(l[:-1])
217            l = self.__readline()
218
219        if len(mesg)==1 and mesg[0] == '*** empty log message ***':
220            changelog = ''
221        else:
222            changelog = '\n'.join(mesg)
223
224        return (date, author, changelog, entry, rev, state, newentry)
225
226    def __parseCvsLog(self):
227        """Parse a complete CVS log."""
228
229        from os.path import split, join
230        import sre
231
232        revcount_regex = sre.compile('\\bselected revisions:\\s*(\\d+)\\b')
233
234        self.__currentdir = None
235
236        while 1:
237            l = self.__readline()
238            while l and not l.startswith('RCS file: '):
239                l = self.__readline()
240
241            if not l.startswith('RCS file: '):
242                break
243
244            assert self.__currentdir is not None, \
245                   "Missed 'cvs rlog: Logging XX' line"
246
247            entry = join(self.__currentdir, split(l[10:-1])[1][:-2])
248
249            expected_revisions = None
250            while 1:
251                l = self.__readline()
252                if l in (self.inter_sep, self.intra_sep):
253                    break
254
255                m = revcount_regex.search(l)
256                if m is not None:
257                    expected_revisions = int(m.group(1))
258
259            last = previous = None
260            found_revisions = 0
261            while l <> self.inter_sep:
262                cs = self.__parseRevision(entry)
263                if cs is None:
264                    break
265                date,author,changelog,e,rev,state,newentry = cs
266
267                # Skip spurious entries added in a branch
268                if not (rev == '1.1' and state == 'dead' and
269                        changelog.startswith('file ') and
270                        ' was initially added on branch ' in changelog):
271                    last = self.__collect(date, author, changelog, e, rev)
272                    if state == 'dead':
273                        last.action_kind = last.DELETED
274                    elif newentry:
275                        last.action_kind = last.ADDED
276                    else:
277                        last.action_kind = last.UPDATED
278                found_revisions = found_revisions + 1
279
280                if previous and last.action_kind == last.DELETED:
281                    previous.action_kind = previous.ADDED
282
283                previous = last
284
285            if expected_revisions <> found_revisions:
286                print 'warning: expecting %s revisions, read %s revisions' % \
287                      ( expected_revisions, found_revisions )
288
289    # end of __parseCvsLog()
290
291
292class CvsWorkingDir(CvspsWorkingDir):
293    """
294    Reimplement the mechanism used to get a *changeset* view of the
295    CVS commits.
296    """
297
298    def _getUpstreamChangesets(self, sincerev):
299        from os.path import join, exists
300        from datetime import timedelta
301
302        from codecs import getreader
303
304        branch = ''
305        fname = join(self.basedir, 'CVS', 'Tag')
306        if exists(fname):
307            tag = open(fname).read()
308            if tag[0] == 'T':
309                branch=tag[1:-1]
310
311        cmd = self.repository.command("-f", "-d", "%(repository)s", "rlog",
312                                      "-N")
313
314        if not sincerev or sincerev in ("INITIAL", "HEAD"):
315            # We are bootstrapping, trying to collimate the actual
316            # revision on disk with the changesets, or figuring out
317            # the first revision
318            since = None
319            if sincerev == "HEAD":
320                if branch and branch<>'HEAD':
321                    cmd.append("-r%(branch)s.")
322                else:
323                    cmd.append("-rHEAD:HEAD")
324            else:
325                cmd.append("-r:HEAD")
326        elif ' by ' in sincerev:
327            since, author = _splitGlobalCVSRevision(sincerev)
328            cmd.extend(["-d", "%(since)s UTC<", "-r:%(branch)s"])
329        elif sincerev[0] in '0123456789':
330            since = sincerev
331            cmd.extend(["-d", "%(since)s UTC<"])
332        elif ' ' in sincerev:
333            branch, since = sincerev.split(' ', 1)
334            if since.strip() == 'INITIAL':
335                cmd.extend(["-r%(branch)s"])
336            else:
337                cmd.extend(["-d", "%(since)s UTC<", "-r:%(branch)s"])
338        else:
339            branch = sincerev
340            since = None
341            cmd.extend(["-r:%(branch)s"])
342
343        cvslog = ExternalCommand(command=cmd)
344
345        log = cvslog.execute(self.repository.module, stdout=PIPE, stderr=STDOUT,
346                             repository=self.repository.repository,
347                             since=since, branch=branch or 'HEAD', TZ='UTC')[0]
348
349        if cvslog.exit_status:
350            raise GetUpstreamChangesetsFailure(
351                "%s returned status %d" % (str(cvslog), cvslog.exit_status))
352
353        reader = getreader(self.repository.encoding)
354        log = reader(log)
355        return changesets_from_cvslog(log, self.repository.module)
356
357    def _checkoutUpstreamRevision(self, revision):
358        """
359        Adjust the 'revision' slot of the changeset, to make it a
360        repository wide unique id.
361        """
362
363        last = CvspsWorkingDir._checkoutUpstreamRevision(self, revision)
364        last.revision = _getGlobalCVSRevision(last.date, last.author)
365        return last
366
367
368class CvsEntry(object):
369    """Collect the info about a file in a CVS working dir."""
370
371    __slots__ = ('filename', 'cvs_version', 'timestamp', 'cvs_tag')
372
373    def __init__(self, entry):
374        """Initialize a CvsEntry."""
375
376        from datetime import datetime
377        from time import strptime
378
379        dummy, fn, rev, ts, dummy, tag = entry.split('/')
380
381        self.filename = fn
382        self.cvs_version = rev
383
384        if ts == 'Result of merge':
385            self.timestamp = datetime.today()
386        else:
387            if ts.startswith('Result of merge+'):
388                ts = ts[16:]
389            y,m,d,hh,mm,ss,d1,d2,d3 = strptime(ts, "%a %b %d %H:%M:%S %Y")
390            self.timestamp = datetime(y,m,d,hh,mm,ss)
391
392        self.cvs_tag = tag
393
394    def __str__(self):
395        return "CvsEntry('%s', '%s', '%s')" % (self.filename,
396                                               self.cvs_version,
397                                               self.cvs_tag)
398
399
400class CvsEntries(object):
401    """Collection of CvsEntry."""
402
403    __slots__ = ('files', 'directories', 'deleted')
404
405    def __init__(self, root):
406        """Parse CVS/Entries file.
407
408           Walk down the working directory, collecting info from each
409           CVS/Entries found."""
410
411        from os.path import join, exists, isdir
412        from os import listdir
413
414        self.files = {}
415        """Dict of `CvsEntry`, keyed on each file under revision control."""
416
417        self.directories = {}
418        """Dict of `CvsEntries`, keyed on subdirectories under revision
419           control."""
420
421        self.deleted = False
422        """Flag to denote that this directory was removed."""
423
424        entries = join(root, 'CVS', 'Entries')
425        if exists(entries):
426            for entry in open(entries).readlines():
427                entry = entry[:-1]
428
429                if entry.startswith('/'):
430                    e = CvsEntry(entry)
431                    if file and e.filename==file:
432                        return e
433                    else:
434                        self.files[e.filename] = e
435                elif entry.startswith('D/'):
436                    d = entry.split('/')[1]
437                    subdir = CvsEntries(join(root, d))
438                    self.directories[d] = subdir
439                elif entry == 'D':
440                    self.deleted = True
441
442            # Sometimes the Entries file does not contain the directories:
443            # crawl the current directory looking for missing ones.
444
445            for entry in listdir(root):
446                if entry == '.svn':
447                    continue
448                dir = join(root, entry)
449                if (isdir(dir) and exists(join(dir, 'CVS', 'Entries'))
450                    and not self.directories.has_key(entry)):
451                    self.directories[entry] = CvsEntries(dir)
452
453            if self.deleted:
454                self.deleted = not self.files and not self.directories
455
456    def __str__(self):
457        return "CvsEntries(%d files, %d subdirectories)" % (
458            len(self.files), len(self.directories))
459
460    def getFileInfo(self, fpath):
461        """Fetch the info about a path, if known.  Otherwise return None."""
462
463        try:
464            if '/' in fpath:
465                dir,rest = fpath.split('/', 1)
466                return self.directories[dir].getFileInfo(rest)
467            else:
468                return self.files[fpath]
469        except KeyError:
470            return None
471
472    def getYoungestEntry(self):
473        """Find and return the most recently changed entry."""
474
475        latest = None
476
477        for e in self.files.values():
478            if not latest or e.timestamp > latest.timestamp:
479                latest = e
480
481        for d in self.directories.values():
482            e = d.getYoungestEntry()
483
484            # skip if there are no entries in the directory
485            if not e:
486                continue
487
488            if not latest or e.timestamp > latest.timestamp:
489                latest = e
490
491        return latest
Note: See TracBrowser for help on using the repository browser.