# -*- coding: utf-8 -*-
#
# Copyright (C) 2005 Edgewall Software
# Copyright (C) 2006 K.S.Sreeram <sreeram@tachyontech.net>
# Copyright (C) 2007-2010 Lele Gaifax <lele@metapensiero.it>
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://trac.edgewall.com/license.html.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://projects.edgewall.com/trac/.
#
# Author: K.S.Sreeram <sreeram@tachyontech.net>

'''
This module implements the trac versioncontrol backend API.
The API consists of 3 classes: DarcsRepository, DarcsNode
and DarcsChangeset.

Please see the docs in trac.versioncontrol.api for the interface
which is implemented by this module.
'''

import os, StringIO, mimetypes

from trac.util import TracError
from trac.versioncontrol import (Repository, Node, Changeset,
                                 NoSuchChangeset, NoSuchNode)

from command import DarcsCommand
from updatedb import update_darcsdb
from dbutil import (CHANGE_ADDED, CHANGE_EDITED, CHANGE_MOVED,
                    CHANGE_MOVED_EDITED, CHANGE_REMOVED,
                    IS_TRAC_0_10_X, IS_TRAC_0_11_X, TimedDB,
                    IS_TRAC_0_12_OR_BETTER, NODE_DIR_TYPE,
                    NODE_FILE_TYPE, get_node_type, get_prev_path_rev,
                    get_repository_id, query_nodes_for_revision)

# mapping from node types used by the darcs backend and the types
# used by the trac api
_node_type_map = {
    NODE_FILE_TYPE : Node.FILE,
    NODE_DIR_TYPE : Node.DIRECTORY
    }

# mapping from change types used by the darcs backend and the types
# used by the trac api
_change_map = {
    CHANGE_ADDED : Changeset.ADD,
    CHANGE_REMOVED : Changeset.DELETE,
    CHANGE_MOVED : Changeset.MOVE,
    CHANGE_EDITED : Changeset.EDIT,
    #FIXME: treat moved&edited as just moved?
    CHANGE_MOVED_EDITED : Changeset.MOVE
    }

class DarcsRepository(Repository):
    """Implement the IRepository interface for a darcs repository."""

    def __init__(self, db, path, log, darcscmd, possible_encodings,
                 params, eager_annotations):
        if IS_TRAC_0_12_OR_BETTER:
            Repository.__init__(self, 'darcs:%s' % path, params, log)
        else:
            Repository.__init__(self, 'darcs:%s' % path, params, None, log)
        self.db = TimedDB(db, log)
        self.path = path
        self.__cmd = DarcsCommand(darcscmd, path, log, possible_encodings)
        if not IS_TRAC_0_12_OR_BETTER:
            self.log = log
            self.id = get_repository_id(db, path) or 0
        self.eager_annotations = eager_annotations
        if IS_TRAC_0_10_X:
            self.sync()

    def close(self):
        pass

    def get_changeset(self, rev):
        rev = self.normalize_rev(rev)
        return DarcsChangeset(self, rev)

    def get_node(self, path, rev=None):
        path = self.normalize_path(path)
        rev = self.normalize_rev(rev)
        # compute node_id, node_type and last_rev and then
        # create a DarcsNode object.
        # 'last_rev' is the last revision <= rev where this
        # node was modified.
        if path == '/':
            node_id = None
            node_type = NODE_DIR_TYPE
            last_rev = rev
        else:
            c = self.db.cursor()
            q,args = query_nodes_for_revision(self.id, rev, 'dnc.path = %s')
            args.append(path)
            c.execute(q, args)
            row = c.fetchone()
            if row is None:
                raise NoSuchNode(path, rev)
            node_id,last_rev = row[:2]
            node_type = get_node_type(self.db, self.id, node_id)
        return DarcsNode(node_id, node_type, path, last_rev,
                         self, self.__cmd, self.log)

    def get_oldest_rev(self):
        if self.get_youngest_rev() is None:
            return None
        return 1

    def get_youngest_rev(self):
        c = self.db.cursor()
        c.execute('SELECT max(rev) FROM darcs_changesets '
                  'WHERE repo_id = %s', (self.id,))
        row = c.fetchone()
        return row and row[0] or None

    def previous_rev(self, rev, path=''):
        rev = self.normalize_rev(rev)
        if rev > 1:
            return rev-1
        return None

    def next_rev(self, rev, path=''):
        rev = self.normalize_rev(rev)
        if rev < self.get_youngest_rev():
            return rev+1
        return None

    def rev_older_than(self, rev1, rev2):
        return self.normalize_rev(rev1) < self.normalize_rev(rev2)

    def get_path_history(self, path, rev=None, limit=None):
        # FIXME: this is not correct
        return self.get_node(path, rev).get_history(limit)

    def normalize_path(self, path):
        return path and path.strip('/') or '/'

    def normalize_rev(self, rev):
        if isinstance(rev, basestring) and len(rev) in (61,64):
            if rev.endswith('.gz'):
                # We don't store ending .gz in the db
                rev = rev[:-3]
            c = self.db.cursor()
            c.execute('SELECT rev FROM darcs_changesets '
                      'WHERE repo_id = %s AND hash = %s', (self.id, rev))
            row = c.fetchone()
            if row is None:
                raise NoSuchChangeset(rev)
            rev = int(row[0])
        else:
            youngest = self.get_youngest_rev()
            if rev is None or rev == "":
                return youngest
            try:
                rev = int(rev)
            except ValueError, le:
                raise TracError('Ill-formed revision: %s, error: %s' % (rev, le))
            if rev > youngest:
                rev = youngest
        return rev

    def get_changes(self, old_path, old_rev, new_path, new_rev, ignore_ancestry=1):
        old_path = self.normalize_path(old_path)
        old_rev = self.normalize_rev(old_rev)
        new_path = self.normalize_path(new_path)
        new_rev = self.normalize_rev(new_rev)
        old_node = self.get_node(old_path, old_rev)
        new_node = self.get_node(new_path, new_rev)

        node_id = old_node._get_node_id()
        if node_id != new_node._get_node_id():
            raise TracError('Node mismatch: base is %s in rev %d '
                            'and target is %s in rev %d' % (old_path, old_rev,
                                                            new_path, new_rev))

        if old_node.kind == Node.FILE:
            if old_node.rev != new_node.rev:
                yield (old_node, new_node, Node.FILE, Changeset.EDIT)
            return

        c = self.db.cursor()
        if node_id is not None:
            c.execute('SELECT rev,path FROM darcs_node_changes '
                      'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s',
                      (self.id, node_id, old_rev, new_rev))
        else:
            c.execute('SELECT rev,path FROM darcs_node_changes '
                      'WHERE repo_id = %s AND rev >= %s AND rev <= %s',
                      (self.id, old_rev, new_rev))
        node_set = dict()
        node_list = []
        c1 = self.db.cursor()
        for rev,path in c:
            c1.execute('SELECT node_id FROM darcs_node_changes '
                       'WHERE repo_id = %s AND rev = %s AND path LIKE %s',
                       (self.id, rev, path+'/%'))
            for nid, in c1:
                if nid not in node_set:
                    node_set[nid] = 1
                    node_list.append(nid)
        for nid in node_list:
            old_node = new_node = None
            c1.execute('SELECT rev,path FROM darcs_node_changes '
                       'WHERE repo_id = %s AND node_id = %s AND rev < %s '
                       'ORDER BY rev DESC LIMIT 1',
                       (self.id, nid, old_rev))
            row = c1.fetchone()
            if row is not None:
                rev,path = row
                old_node = self.get_node(path, rev)
            c1.execute('SELECT rev,path,change_kind FROM darcs_node_changes '
                       'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s '
                       'ORDER BY rev DESC LIMIT 1',
                       (self.id, nid, old_rev, new_rev))
            rev,path,change = c1.fetchone()
            if change != CHANGE_REMOVED:
                new_node = self.get_node(path, rev)
            assert (old_node is not None) or (new_node is not None)
            kind = old_node and old_node.kind or new_node.kind
            if old_node is None:
                change = Changeset.ADD
            elif new_node is None:
                change = Changeset.DELETE
            elif old_node.path != new_node.path:
                change = Changeset.MOVE
            else:
                change = Changeset.EDIT
            yield (old_node,new_node,kind,change)

    def sync(self, rev_callback=None, clean=False):
        from time import time
        from dbutil import format_elapsed_time as trepr

        # Import any new changesets, if any

        newrevs = update_darcsdb(self.db, self.__cmd, self.log, self.id,
                                 rev_callback=rev_callback, clean=clean)

        # In eager mode, precompute the content and the annotations for
        # each file modified or added by latest changesets

        if self.eager_annotations and newrevs:
            i = 1
            count = len(newrevs)
            for rev in newrevs:
                t0 = time()

                c = self.get_changeset(rev)
                for path,kind,change,prev_path,prev_rev in c.get_changes():
                    if kind==Node.FILE and (change==Changeset.EDIT or change==Changeset.ADD):
                        node = self.get_node(path, rev)
                        node.get_content()
                        node.get_annotations()

                t1 = time()
                usec = (t1-t0) * 1e6
                self.log.info('Preannotated changeset %d/%d: %s', i, count, trepr(usec))
                i += 1

    def get_path_url(self, path, rev):
        url = self.params.get('url', '').rstrip('/')
        if url:
            if not path or path == '/':
                return url
            return url + '/' + path.lstrip('/')

class DarcsNode(Node):
    """Implement darcs specific logic around a ``Node``."""

    def __init__(self, node_id, node_type, path, rev,
                 repos, cmd, log=None):
        kind = _node_type_map[node_type]
        Node.__init__(self, repos, path, rev, kind)
        self.__node_id = node_id
        self.__node_type = node_type
        self.__cmd = cmd
        self.__log = log
        self.created_path = path
        self.created_rev = rev

    def _get_node_id(self):
        return self.__node_id

    def _get_cached_rev(self):
        # if there are no future versions, use the HEAD
        nrev = self._get_next()
        if nrev is None:
            return None

        maxrange = nrev[1]-1

        # if it's just one hop from node's revision, we're done
        if maxrange == self.rev:
            return maxrange

        # ok, let's see if there is already a cache in the range
        c = self.repos.db.cursor()
        c.execute('SELECT max(rev) FROM darcs_cache '
                  'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s AND content IS NOT NULL',
                  (self.repos.id, self.__node_id, self.rev, maxrange))
        row = c.fetchone()
        if row[0] is not None:
            return row[0]

        # No luck, return the most recent revision before the next
        return maxrange

    def get_content(self):
        if self.__node_type == NODE_DIR_TYPE:
            return None
        c = self.repos.db.cursor()

        # Since darcs is faster and faster in building the content
        # of a file for more and more recent changes, compute the
        # optimal revision to build the cache of
        crev = self._get_cached_rev()

        if crev is not None:
            # check if the file content is there in the cache
            c.execute('SELECT content FROM darcs_cache '
                      'WHERE repo_id = %s AND node_id = %s AND rev = %s',
                      (self.repos.id, self.__node_id, crev))
            row = c.fetchone()
            if row is not None:
                self.__log.debug('Cache hit %s at rev %s', self.path, crev)
                # if present just return it
                data = str(buffer(row[0]))
            else:
                self.__log.debug('Building content cache for %s at rev %s', self.path, crev)

                # load the file content from the repo
                c.execute('SELECT hash FROM darcs_changesets '
                          'WHERE repo_id = %s AND rev = %s', (self.repos.id, crev,))
                hash = c.fetchone()[0]
                data = self.__cmd.cat(hash, self.path)

                # save the file content in the cache
                c = self.repos.db.cursor()
                try:
                    c.execute('INSERT INTO darcs_cache (repo_id,node_id,rev,content,size) '
                              'VALUES (%s,%s,%s,%s,%s)',
                              (self.repos.id, self.__node_id, crev, buffer(data), len(data)))
                except:
                    self.repos.db.rollback()
                    c = self.repos.db.cursor()
                    # Maybe some other thread computed the same content
                    c.execute('SELECT content FROM darcs_cache '
                              'WHERE repo_id = %s AND node_id = %s AND rev = %s',
                              (self.repos.id, self.__node_id, crev))
                    row = c.fetchone()
                    if row is not None:
                        self.__log.debug('Late cache hit %s at rev %s', self.path, crev)
                        data = str(buffer(row[0]))
                    else:
                        raise
                else:
                    self.repos.db.commit()
        else:
            # Use the HEAD
            self.__log.debug('Serving pristine file %s, no changes since rev %s', self.path, self.rev)
            data = self.__cmd.cat(None, self.path)

        return StringIO.StringIO(data)

    def get_entries(self):
        if self.__node_type == NODE_FILE_TYPE:
            return
        if self.__node_id is None:
            cond = 'dnc.parent_id IS NULL'
        else:
            cond = 'dnc.parent_id = %d' % self.__node_id
        q,args = query_nodes_for_revision(self.repos.id, self.rev, cond)
        c = self.repos.db.cursor()
        c.execute(q, args)
        for node_id,rev,path,_ in c:
            node_type = get_node_type(self.repos.db, self.repos.id, node_id)
            yield DarcsNode(node_id, node_type, path, rev,
                            self.repos, self.__cmd, self.__log)

    def get_history(self, limit=None):
        if self.path == '/':
            for i in range(self.rev,0,-1):
                yield (self.path, i, Changeset.EDIT)
            return
        c = self.repos.db.cursor()
        q = ('SELECT path,rev,change_kind FROM darcs_node_changes '
             'WHERE repo_id = %s AND node_id = %s AND rev <= %s '
             'ORDER BY rev DESC')
        if limit is not None:
            q += ' LIMIT %d' % limit
        c.execute(q, (self.repos.id, self.__node_id, self.rev))
        for path,rev,change in c:
            yield (path, rev, _change_map[change])

    def _get_next(self):
        try:
            return self._get_future(1).next()
        except StopIteration:
            return None

    def _get_future(self, limit=None):
        if self.path == '/':
            youngest = self.get_youngest_rev()
            for i in range(youngest, self.rev, -1):
                yield (self.path, i, Changeset.EDIT)
            return
        c = self.repos.db.cursor()
        q = ('SELECT path,rev,change_kind FROM darcs_node_changes '
             'WHERE repo_id = %s AND node_id = %s AND rev > %s '
             'ORDER BY rev')
        if limit is not None:
            q += ' LIMIT %d' % limit
        c.execute(q, (self.repos.id, self.__node_id, self.rev))
        for path,rev,change in c:
            yield (path, rev, _change_map[change])

    def get_annotations(self):
        """Provide detailed backward history for the content of this Node.

        Retrieve an array of revisions parsing `darcs annotate`. Since
        that is (still) not fast enough for some repository, we write
        a cache of the information: a future annotate on the same file
        at the same revision won't reexecute `darcs annotate`.
        """

        from xml.sax import make_parser
        from xml.sax.handler import ContentHandler, ErrorHandler

        c = self.repos.db.cursor()

        # Since darcs is faster and faster in building the content
        # of a file for more and more recent changes, compute the
        # optimal revision to build the cache of
        crev = self._get_cached_rev()
        if crev is None:
            crev = self.rev

        # Check if the annotate cache is already present
        c.execute('SELECT up_to_line,blame_rev FROM darcs_annotate_cache '
                  'WHERE repo_id = %s AND node_id = %s AND rev = %s '
                  'ORDER BY up_to_line', (self.repos.id, self.__node_id, crev))
        row = c.fetchone()
        if row is not None:
            self.__log.debug('Annotate cache hit for %s at rev %s', self.path, crev)
            revs = []
            line = 0
            # Expand the cache, producing a list of revisions, one per line
            while row is not None:
                while line<row[0]:
                    revs.append(row[1])
                    line += 1
                row = c.fetchone()
            return revs

        # No cache, build it

        self.__log.debug('Building annotate cache for %s at rev %s', self.path, crev)

        class DarcsXMLAnnotateHandler(ContentHandler):
            def __init__(self):
                self.revisions = []
                self.known_hashes = {}

            def startElement(self, name, attributes):
                if name == 'patch':
                    self.current_hash = attributes['hash'][:-3]

            def endElement(self, name):
                if name == 'normal_line':
                    self.revisions.append(self.findRevision(self.current_hash))
                elif name == 'added_line':
                    self.revisions.append(self.findRevision(self.last_changed_hash))
                elif name == 'modified':
                    self.last_changed_hash = self.current_hash

            def findRevision(self, hash):
                # Return the trac revision for the given patch hash
                try:
                    return self.known_hashes[hash]
                except KeyError:
                    c.execute('SELECT rev FROM darcs_changesets '
                              'WHERE hash = %s', (hash,))
                    rev = self.known_hashes[hash] = c.fetchone()[0]
                    return rev

        # Get the hash of the patch
        c.execute('SELECT hash FROM darcs_changesets '
                  'WHERE rev = %s', (self.rev,))
        hash = c.fetchone()[0]

        # Get darcs annotate output for the given entry and patch hash
        annotate = self.__cmd.annotate(hash, self.path)

        parser = make_parser()
        handler = DarcsXMLAnnotateHandler()
        parser.setContentHandler(handler)
        parser.setErrorHandler(ErrorHandler())

        parser.feed(annotate)
        parser.close()

        revs = handler.revisions
        if not revs:
            self.__log.debug('Empty file, no annotations for %s at rev %s', self.path, crev)
            return revs

        # Write a compressed representation

        prev = None
        for i,rev in enumerate(revs):
            if prev is not None:
                if prev != rev:
                    c.execute('INSERT INTO darcs_annotate_cache (repo_id,node_id,rev,up_to_line,blame_rev) '
                              'VALUES (%s,%s,%s,%s,%s)',
                              (self.repos.id, self.__node_id, crev, i, prev))
                    prev = rev
                    lastline = i
            else:
                prev = rev
                lastline = 0
        if lastline != len(revs):
            c.execute('INSERT INTO darcs_annotate_cache (repo_id,node_id,rev,up_to_line,blame_rev) '
                      'VALUES (%s,%s,%s,%s,%s)',
                      (self.repos.id, self.__node_id, crev, len(revs), revs[-1]))

        self.repos.db.commit()

        return revs

    def get_properties(self):
        return {}

    def get_content_length(self):
        if self.isdir:
            return None

        # first check if the file is already in the cache
        c = self.repos.db.cursor()
        c.execute('SELECT size FROM darcs_cache '
                  'WHERE repo_id = %s AND node_id = %s AND rev = %s',
                  (self.repos.id, self.__node_id, self.rev))
        row = c.fetchone()
        if row is not None:
            return row[0]

        # if it's not, get the whole content and count...
        # next time you'll be luckier, promise!
        return len(self.get_content().read())

    def get_content_type(self):
        if self.isdir:
            return None
        return mimetypes.guess_type(self.path)[0]

    def get_name(self):
        return os.path.split(self.path)[1]

    def get_last_modified(self):
        if self.__node_id is None:
            return 0
        c = self.repos.db.cursor()
        c.execute('SELECT rev FROM darcs_node_changes '
                  'WHERE repo_id = %s AND node_id = %s AND rev = %s',
                  (self.repos.id, self.__node_id, self.rev))
        rev = c.fetchone()[0]
        if IS_TRAC_0_10_X:
            from datetime import datetime
            from trac.util.datefmt import utc
            c.execute('SELECT time FROM revision '
                      'WHERE rev = %s', (rev,))
            return datetime.fromtimestamp(c.fetchone()[0], utc)
        else:
            from trac.util.datefmt import from_utimestamp
            c.execute('SELECT time FROM revision '
                      'WHERE repos = %s AND rev = %s', (self.repos.id, rev))
            return from_utimestamp(c.fetchone()[0])

class DarcsChangeset(Changeset):
    """Represent a cached darcs changeset."""

    def __init__(self, repos, rev):
        repo_id = repos.id
        c = repos.db.cursor()
        if IS_TRAC_0_12_OR_BETTER:
            c.execute('SELECT r.author,r.time,r.message,c.hash '
                      'FROM revision as r, darcs_changesets as c '
                      'WHERE r.repos = %s AND c.repo_id = r.repos '
                      '  AND r.rev = %s AND c.rev = r.rev',
                      (repo_id, rev))
        else:
            c.execute('SELECT r.author,r.time,r.message,c.hash '
                      'FROM revision as r, darcs_changesets as c '
                      'WHERE r.rev = %s '
                      '  AND c.rev = r.rev AND c.repo_id = %s', (rev, 0))
        row = c.fetchone()
        if row is None:
            raise NoSuchChangeset(rev)
        author,time,message,hash = row
        if IS_TRAC_0_10_X:
            from datetime import datetime
            from time import mktime
            from trac.util.datefmt import utc
            date = datetime.fromtimestamp(time, utc)
            date = mktime(date.timetuple())
        else:
            from trac.util.datefmt import from_utimestamp
            date = from_utimestamp(time)
        Changeset.__init__(self, repos, rev, message, author, date)
        self.__hash = hash

    def get_changes(self):
        c = self.repos.db.cursor()
        repo_id = self.repos.id
        c.execute('SELECT node_id,path,change_kind FROM darcs_node_changes '
                  'WHERE repo_id = %s AND rev = %s', (repo_id, self.rev,))
        for node_id,path,change in c:
            node_type = get_node_type(self.repos.db, repo_id, node_id)
            kind = _node_type_map[node_type]
            if change == CHANGE_ADDED:
                prev_path = prev_rev = None
            else:
                prev_path,prev_rev = get_prev_path_rev(self.repos.db, repo_id,
                                                       node_id, self.rev)
            change = _change_map[change]
            yield (path,kind,change,prev_path,prev_rev)

    def get_properties(self):
        props = dict(Hashname=self.__hash)

        # See if there are any "related repository", with a common "identity"
        # property.

        c = self.repos.db.cursor()
        c.execute("SELECT rep2.id "
                  "FROM repository rep, repository rep2 "
                  "WHERE rep.id=%s"
                  "  AND rep.name='identity'"
                  "  AND rep2.id<>rep.id"
                  "  AND rep2.name='identity'"
                  "  AND rep2.value=rep.value", (self.repos.id,))
        other_rids = [r[0] for r in c.fetchall()]

        if not other_rids:
            return props

        # Compute a list of "equivalent changesets", when the same
        # changeset is present in other related repositories.

        other_rids = ','.join(str(rid) for rid in other_rids),

        c.execute("SELECT rep.value, dcs.rev "
                  "FROM darcs_changesets dcs, repository rep "
                  "WHERE dcs.hash = %%s"
                  "  AND dcs.repo_id IN (%s)"
                  "  AND rep.id = dcs.repo_id AND rep.name = 'name'"
                  "ORDER BY rep.value" % other_rids,
                  (self.__hash,))
        eqcsets = [(repo, rev) for repo,rev in c.fetchall()]
        if eqcsets:
            props['PresentIn'] = eqcsets

        # Compute the opposite list, that is the repositories where
        # the changeset is missing.

        c.execute("SELECT rep.value "
                  "FROM repository rep "
                  "WHERE rep.name='name'"
                  "  AND rep.id IN (%s)"
                  "  AND NOT EXISTS ("
                  "SELECT dcs.rev "
                  "FROM darcs_changesets dcs "
                  "WHERE dcs.repo_id=rep.id"
                  "  AND dcs.hash=%%s) "
                  "ORDER BY rep.value" % other_rids,
                  (self.__hash,))
        mir = [r[0] for r in c.fetchall()]
        if mir:
            props['MissingIn'] = mir

        return props
