source: tracdarcs/tracdarcs/changesparser.py @ 176

Revision 176, 5.4 KB checked in by lele@…, 3 years ago (diff)

Strip off ending .gz from darcs hashes

Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2005 Edgewall Software
4# Copyright (C) 2006 K.S.Sreeram <sreeram@tachyontech.net>
5# Copyright (C) 2007,2008,2009 Lele Gaifax <lele@metapensiero.it>
6#
7# This software is licensed as described in the file COPYING, which
8# you should have received as part of this distribution. The terms
9# are also available at http://trac.edgewall.com/license.html.
10#
11# This software consists of voluntary contributions made by many
12# individuals. For the exact contribution history, see the revision
13# history and logs, available at http://projects.edgewall.com/trac/.
14#
15# Author: K.S.Sreeram <sreeram@tachyontech.net>
16
17from datetime import datetime
18from re import compile
19from time import mktime, strptime, timezone
20from xml.sax import parseString
21from xml.sax.handler import ContentHandler
22from xml.sax._exceptions import SAXParseException
23
24from trac.util.datefmt import utc
25
26# Strip away darcs 2.1 junk: hopefully this regex is strict enough to
27# not obliterate useful info...
28IGNORE_THIS = compile('^Ignore-this: [a-f\\d]+\\n?')
29
30# Filter out bad chars that can cause the XML parser to give up in despair.
31# (Thanks to lelit of the tailor project and ndurner and warner for this hack.)
32BADCHARS = "".join([chr(i) for i in range(0x0a) +
33                    [0x0b, 0x0c] + range(0x0e, 0x20) + range(0x7f,0x100)])
34
35class Patch(object):
36    '''
37    Represents a single darcs changeset.
38
39    'time' is an int, the number of seconds since the epoch.
40    It is in the local timezone.
41    'entries' is a list of changes performed in the current changeset.
42    The elements of 'entries' can be of the following form:
43    ('add_file', 'some/file/path')
44    ('remove_file', 'some/file/path')
45    ('modify_file', 'some/file/path')
46    ('add_directory', 'some/dir/path')
47    ('remove_directory', 'some/dir/path')
48    ('move', 'from/path', 'to/path')
49    'move' is used for both directories and files.
50    '''
51    def __init__(self, author, time, hash, name, comment, entries):
52        self.author = author
53        self.time = time
54        self.hash = hash
55        self.name = name
56        self.comment = comment
57        self.entries = entries
58
59    def __repr__(self):
60        return "author: %s, time: %s, hash: %s, name: %s, comment: %s, entries: %s" % (self.author, self.time, self.hash, self.name, self.comment, self.entries)
61
62def parse_changes(changes, log):
63    '''
64    Parses the output of 'darcs changes --xml-output --summary', and
65    returns a list of 'Patch' objects.
66    '''
67
68    class SAXHandler(ContentHandler):
69        RESET_CONTENT_ELTS = set('name comment add_file modify_file remove_file '
70                                 'add_directory remove_directory'.split())
71        ACTION_ELTS = set('add_file modify_file remove_file '
72                          'add_directory remove_directory'.split())
73
74        def __init__(self):
75            self.patches = []
76            self.content = []
77
78        def startElement(self, name, attr):
79            if name == 'patch':
80                self.author = attr['author']
81                date = attr['date']
82                try:
83                    # 20040619130027
84                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
85                except ValueError:
86                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
87                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
88                self.time = timestamp.replace(tzinfo=utc)
89                # Strip useless trailing .gz
90                phash = attr['hash']
91                if phash.endswith('.gz'):
92                    phash = phash[:-3]
93                self.hash = phash
94                # darcs-1 rollbacks: darcs-2 uses a different way, rollbacks are
95                # normal patches, and "inverted" is always False
96                self.inverted = attr['inverted'] == 'True'
97                self.name = ''
98                self.comment = ''
99                self.entries = []
100            elif name == 'move':
101                self.entries.append(('move',attr['from'],attr['to']))
102            elif name in self.RESET_CONTENT_ELTS:
103                self.content = []
104
105        def characters(self, data):
106            self.content.append(data)
107
108        def endElement(self, name):
109            if name == 'name':
110                self.name = (self.inverted and 'UNDO: ' or '') + ''.join(self.content)
111            elif name == 'comment':
112                content = ''.join(self.content)
113                self.comment = IGNORE_THIS.sub('', content)
114            elif name in self.ACTION_ELTS:
115                path = ''.join(self.content).strip()
116                self.entries.append((name,path))
117            elif name == 'patch':
118                patch = Patch(self.author, self.time, self.hash,
119                              self.name, self.comment, self.entries)
120                self.patches.append(patch)
121
122    from string import maketrans
123    tt = maketrans(BADCHARS, "-"*len(BADCHARS))
124    changes = changes.translate(tt)
125
126    handler = SAXHandler()
127    try:
128        parseString(changes, handler)
129    except SAXParseException, e:
130        lines = changes.splitlines()
131        line = lines[e._linenum-1]
132        log.critical("Cannot complete the parse of the XML output due to a SAX Error, "
133                     "on the changeset %d: %s", len(handler.patches), e)
134        log.warning("%s", line)
135        if e._colnum:
136            log.warning('-'*(e._colnum-1)+'^'*5)
137    return handler.patches
Note: See TracBrowser for help on using the repository browser.