source: tracdarcs/tracdarcs/changesparser.py @ 113

Revision 113, 4.8 KB checked in by lele@…, 5 years ago (diff)

Clean dirty chars in "darcs changes --xml-output"

Line 
1# -*- coding: iso-8859-1 -*-
2#
3# Copyright (C) 2005 Edgewall Software
4# Copyright (C) 2006 K.S.Sreeram <sreeram@tachyontech.net>
5# Copyright (C) 2007,2008 Lele Gaifax <lele@metapensiero.it>
6#
7# This software is licensed as described in the file COPYING, which
8# you should have received as part of this distribution. The terms
9# are also available at http://trac.edgewall.com/license.html.
10#
11# This software consists of voluntary contributions made by many
12# individuals. For the exact contribution history, see the revision
13# history and logs, available at http://projects.edgewall.com/trac/.
14#
15# Author: K.S.Sreeram <sreeram@tachyontech.net>
16
17from datetime import datetime
18from time import mktime, strptime, timezone
19from xml.sax import parseString
20from xml.sax.handler import ContentHandler
21
22from trac.util.datefmt import utc
23
24class Patch(object):
25    '''
26    Represents a single darcs changeset.
27
28    'time' is an int, the number of seconds since the epoch.
29    It is in the local timezone.
30    'entries' is a list of changes performed in the current changeset.
31    The elements of 'entries' can be of the following form:
32    ('add_file', 'some/file/path')
33    ('remove_file', 'some/file/path')
34    ('modify_file', 'some/file/path')
35    ('add_directory', 'some/dir/path')
36    ('remove_directory', 'some/dir/path')
37    ('move', 'from/path', 'to/path')
38    'move' is used for both directories and files.
39    '''
40    def __init__(self, author, time, hash, name, comment, entries):
41        self.author = author
42        self.time = time
43        self.hash = hash
44        self.name = name
45        self.comment = comment
46        self.entries = entries
47
48    def __repr__(self):
49        return "author: %s, time: %s, hash: %s, name: %s, comment: %s, entries: %s" % (self.author, self.time, self.hash, self.name, self.comment, self.entries)
50
51def parse_changes(changes):
52    '''
53    Parses the output of 'darcs changes --xml-output --summary', and
54    returns a list of 'Patch' objects.
55    '''
56
57    class SAXHandler(ContentHandler):
58        RESET_CONTENT_ELTS = set('name comment add_file modify_file remove_file '
59                                 'add_directory remove_directory'.split())
60        ACTION_ELTS = set('add_file modify_file remove_file '
61                          'add_directory remove_directory'.split())
62
63        def __init__(self):
64            self.patches = []
65            self.content = []
66
67        def startElement(self, name, attr):
68            if name == 'patch':
69                self.author = attr['author']
70                date = attr['date']
71                try:
72                    # 20040619130027
73                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
74                except ValueError:
75                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
76                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])
77                self.time = timestamp.replace(tzinfo=utc)
78                self.hash = attr['hash']
79                # darcs-1 rollbacks: darcs-2 uses a different way, rollbacks are
80                # normal patches, and "inverted" is always False
81                self.inverted = attr['inverted'] == 'True'
82                self.name = ''
83                self.comment = ''
84                self.entries = []
85            elif name == 'move':
86                self.entries.append(('move',attr['from'],attr['to']))
87            elif name in self.RESET_CONTENT_ELTS:
88                self.content = []
89
90        def characters(self, data):
91            self.content.append(data)
92
93        def endElement(self, name):
94            if name == 'name':
95                self.name = (self.inverted and 'UNDO: ' or '') + ''.join(self.content)
96            elif name == 'comment':
97                self.comment = ''.join(self.content)
98            elif name in self.ACTION_ELTS:
99                path = ''.join(self.content).strip()
100                self.entries.append((name,path))
101            elif name == 'patch':
102                patch = Patch(self.author, self.time, self.hash,
103                              self.name, self.comment, self.entries)
104                self.patches.append(patch)
105
106    handler = SAXHandler()
107
108    def fixup_badchars(s, map):
109        if not map:
110            return s
111
112        ret = [map.get(c, c) for c in s]
113        return "".join(ret)
114
115    replace_badchars = {
116        '\xc1': '&#193;', '\xc9': '&#201;', '\xcd': '&#205;',
117        '\xd3': '&#211;', '\xd6': '&#214;', '\xd5': '&#336;',
118        '\xda': '&#218;', '\xdc': '&#220;', '\xdb': '&#368;',
119        '\xe1': '&#225;', '\xe9': '&#233;', '\xed': '&#237;',
120        '\xf3': '&#243;', '\xf6': '&#246;', '\xf5': '&#337;',
121        '\xfa': '&#250;', '\xfc': '&#252;', '\xfb': '&#369;',
122        '\xf1': '&#241;', '\xdf': '&#223;', '\xe5': '&#229;',
123        }
124
125    cleaned = fixup_badchars(changes, replace_badchars)
126    parseString(cleaned, handler)
127    return handler.patches
Note: See TracBrowser for help on using the repository browser.