# -*- coding: utf-8 -*-
#
# Copyright (C) 2005 Edgewall Software
# Copyright (C) 2006 K.S.Sreeram <sreeram@tachyontech.net>
# Copyright (C) 2007,2008,2009 Lele Gaifax <lele@metapensiero.it>
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://trac.edgewall.com/license.html.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://projects.edgewall.com/trac/.
#
# Author: K.S.Sreeram <sreeram@tachyontech.net>

'''
This module implements the trac versioncontrol backend API.
The API consists of 3 classes: DarcsRepository, DarcsNode
and DarcsChangeset.

Please see the docs in trac.versioncontrol.api for the interface
which is implemented by this module.
'''

import os, StringIO, mimetypes
from datetime import tzinfo, timedelta, datetime
import time

from trac.util import TracError
from trac.util.datefmt import to_timestamp, utc
from trac.versioncontrol import (Repository, Node, Changeset,
                                 NoSuchChangeset, NoSuchNode)

from command import DarcsCommand
from updatedb import update_darcsdb
from dbutil import (CHANGE_ADDED, CHANGE_EDITED, CHANGE_MOVED,
                    CHANGE_MOVED_EDITED, CHANGE_REMOVED,
                    IS_TRAC_0_10_X, IS_TRAC_0_11_X,
                    IS_TRAC_0_12_OR_BETTER, NODE_DIR_TYPE,
                    NODE_FILE_TYPE, get_node_type, get_prev_path_rev,
                    get_repository_id, query_nodes_for_revision)

# mapping from node types used by the darcs backend and the types
# used by the trac api
_node_type_map = {
    NODE_FILE_TYPE : Node.FILE,
    NODE_DIR_TYPE : Node.DIRECTORY
    }

# mapping from change types used by the darcs backend and the types
# used by the trac api
_change_map = {
    CHANGE_ADDED : Changeset.ADD,
    CHANGE_REMOVED : Changeset.DELETE,
    CHANGE_MOVED : Changeset.MOVE,
    CHANGE_EDITED : Changeset.EDIT,
    #FIXME: treat moved&edited as just moved?
    CHANGE_MOVED_EDITED : Changeset.MOVE
    }

class DarcsRepository(Repository):
    def __init__(self, db, path, log, darcscmd, possible_encodings):
        Repository.__init__(self, path, None, log)
        self.db = db
        self.path = path
        self.log = log
        self.__cmd = DarcsCommand(darcscmd, path, log, possible_encodings)
        self.repo_id = get_repository_id(db, path)
        if IS_TRAC_0_10_X:
            self.sync()

    def close(self):
        pass

    def get_changeset(self, rev):
        rev = self.normalize_rev(rev)
        return DarcsChangeset(self.db, self.repo_id, rev)

    def get_node(self, path, rev=None):
        path = self.normalize_path(path)
        rev = self.normalize_rev(rev)
        # compute node_id, node_type and last_rev and then
        # create a DarcsNode object.
        # 'last_rev' is the last revision <= rev where this
        # node was modified.
        if path == '/':
            node_id = None
            node_type = NODE_DIR_TYPE
            last_rev = rev
        else:
            c = self.db.cursor()
            q = query_nodes_for_revision(self.repo_id, rev)
            q += ' AND dnc.path = %s'
            c.execute(q, (path,))
            row = c.fetchone()
            if row is None:
                raise NoSuchNode(path, rev)
            node_id,last_rev = row[:2]
            node_type = get_node_type(self.db, self.repo_id, node_id)
        return DarcsNode(node_id, node_type, path, last_rev,
                         self.db, self.repo_id, self.__cmd, self.log)

    def get_oldest_rev(self):
        if self.get_youngest_rev() is None:
            return None
        return 1

    def get_youngest_rev(self):
        c = self.db.cursor()
        c.execute('SELECT max(rev) FROM darcs_changesets '
                  'WHERE repo_id = %s', (self.repo_id,))
        row = c.fetchone()
        return row and row[0] or None

    def previous_rev(self, rev):
        rev = self.normalize_rev(rev)
        if rev > 1:
            return rev-1
        return None

    def next_rev(self, rev, path=''):
        rev = self.normalize_rev(rev)
        if rev < self.get_youngest_rev():
            return rev+1
        return None

    def rev_older_than(self, rev1, rev2):
        return self.normalize_rev(rev1) < self.normalize_rev(rev2)

    def get_path_history(self, path, rev=None, limit=None):
        # FIXME: this is not correct
        return self.get_node(path, rev).get_history(limit)

    def normalize_path(self, path):
        return path and path.strip('/') or '/'

    def normalize_rev(self, rev):
        if isinstance(rev, basestring) and len(rev) in (61,64):
            if not rev.endswith('.gz'):
                # We store the complete hashname in the db
                rev = rev + '.gz'
            c = self.db.cursor()
            c.execute('SELECT rev FROM darcs_changesets '
                      'WHERE repo_id = %s AND hash = %s', (self.repo_id, rev))
            row = c.fetchone()
            if row is None:
                raise NoSuchChangeset(rev)
            rev = int(row[0])
        else:
            youngest = self.get_youngest_rev()
            if rev is None or rev == "":
                return youngest
            try:
                rev = int(rev)
            except ValueError, le:
                raise TracError('Ill-formed revision: %s, error: %s' % (rev, le))
            if rev > youngest:
                rev = youngest
        return rev

    def get_changes(self, old_path, old_rev, new_path, new_rev, ignore_ancestry=1):
        old_path = self.normalize_path(old_path)
        old_rev = self.normalize_rev(old_rev)
        new_path = self.normalize_path(new_path)
        new_rev = self.normalize_rev(new_rev)
        old_node = self.get_node(old_path, old_rev)
        new_node = self.get_node(new_path, new_rev)

        node_id = old_node._get_node_id()
        if node_id != new_node._get_node_id():
            raise TracError('Node mismatch: base is %s in rev %d '
                            'and target is %s in rev %d' % (old_path,old_rev,
                                                            new_path,new_rev))

        if old_node.kind == Node.FILE:
            if old_node.rev != new_node.rev:
                yield (old_node,new_node,Node.FILE,Changeset.EDIT)
            return

        c = self.db.cursor()
        if node_id is not None:
            c.execute('SELECT rev,path FROM darcs_node_changes '
                      'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s',
                      (self.repo_id,node_id,old_rev,new_rev))
        else:
            c.execute('SELECT rev,path FROM darcs_node_changes '
                      'WHERE repo_id = %s AND rev >= %s AND rev <= %s',
                      (self.repo_id,old_rev,new_rev))
        node_set = dict()
        node_list = []
        c1 = self.db.cursor()
        for rev,path in c:
            c1.execute('SELECT node_id FROM darcs_node_changes '
                       'WHERE repo_id = %s AND rev = %s AND path LIKE %s',
                       (self.repo_id,rev,path+'/%'))
            for nid, in c1:
                if nid not in node_set:
                    node_set[nid] = 1
                    node_list.append(nid)
        for nid in node_list:
            old_node = new_node = None
            c1.execute('SELECT rev,path FROM darcs_node_changes '
                       'WHERE repo_id = %s AND node_id = %s AND rev < %s '
                       'ORDER BY rev DESC LIMIT 1',
                       (self.repo_id,nid,old_rev))
            row = c1.fetchone()
            if row is not None:
                rev,path = row
                old_node = self.get_node(path, rev)
            c1.execute('SELECT rev,path,the_change FROM darcs_node_changes '
                       'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s '
                       'ORDER BY rev DESC LIMIT 1',
                       (self.repo_id,nid,old_rev,new_rev))
            rev,path,change = c1.fetchone()
            if change != CHANGE_REMOVED:
                new_node = self.get_node(path, rev)
            assert (old_node is not None) or (new_node is not None)
            kind = old_node and old_node.kind or new_node.kind
            if old_node is None:
                change = Changeset.ADD
            elif new_node is None:
                change = Changeset.DELETE
            elif old_node.path != new_node.path:
                change = Changeset.MOVE
            else:
                change = Changeset.EDIT
            yield (old_node,new_node,kind,change)

    def sync(self, rev_callback=None):
        # Import any new changesets, if any
        update_darcsdb(self.db, self.__cmd, self.log, rev_callback=rev_callback)

class DarcsNode(Node):
    def __init__(self, node_id, node_type, path, rev,
                 db, repo_id, cmd, log=None):
        kind = _node_type_map[node_type]
        Node.__init__(self, path, rev, kind)
        self.__node_id = node_id
        self.__node_type = node_type
        self.__db = db
        self.__repo_id = repo_id
        self.__cmd = cmd
        self.__log = log
        self.created_path = path
        self.created_rev = rev

    def _get_node_id(self):
        return self.__node_id

    def _get_cached_rev(self):
        # if there are no future versions, use the HEAD
        nrev = self._get_next()
        if nrev is None:
            return None

        maxrange = nrev[1]-1

        # if it's just one hop from node's revision, we're done
        if maxrange == self.rev:
            return maxrange

        # ok, let's see if there is already a cache in the range
        c = self.__db.cursor()
        c.execute('SELECT max(rev) FROM darcs_cache '
                  'WHERE repo_id = %s AND node_id = %s AND rev >= %s AND rev <= %s AND content IS NOT NULL',
                  (self.__repo_id, self.__node_id, self.rev, maxrange))
        row = c.fetchone()
        if row[0] is not None:
            return row[0]

        # No luck, return the most recent revision before the next
        return maxrange

    def get_content(self):
        if self.__node_type == NODE_DIR_TYPE:
            return None
        c = self.__db.cursor()

        # Since darcs is faster and faster in building the content
        # of a file for more and more recent changes, compute the
        # optimal revision to build the cache of
        crev = self._get_cached_rev()

        if crev is not None:
            # check if the file content is there in the cache
            c.execute('SELECT content FROM darcs_cache '
                      'WHERE repo_id = %s AND node_id = %s AND rev = %s',
                      (self.__repo_id,self.__node_id,crev))
            row = c.fetchone()
            if row is not None:
                self.__log.debug('Cache hit %s at rev %s', self.path, crev)
                # if present just return it
                data = str(buffer(row[0]))
            else:
                self.__log.debug('Building cache for %s at rev %s', self.path, crev)

                # load the file content from the repo
                c.execute('SELECT hash FROM darcs_changesets '
                          'WHERE repo_id = %s AND rev = %s', (self.__repo_id, crev,))
                hash = c.fetchone()[0]
                data = self.__cmd.cat(hash, self.path)

                # save the file content in the cache
                c = self.__db.cursor()
                c.execute('INSERT INTO darcs_cache (repo_id,node_id,rev,content,size) '
                          'VALUES (%s,%s,%s,%s,%s)',
                          (self.__repo_id, self.__node_id, crev, buffer(data), len(data)))
        else:
            # Use the HEAD
            self.__log.debug('Serving pristine file %s, no changes since rev %s', self.path, self.rev)
            data = self.__cmd.cat(None, self.path)

        return StringIO.StringIO(data)

    def get_entries(self):
        if self.__node_type == NODE_FILE_TYPE:
            return
        q = query_nodes_for_revision(self.__repo_id, self.rev)
        if self.__node_id is None:
            q += ' AND dnc.parent_id IS NULL'
        else:
            q += ' AND dnc.parent_id = %d' % self.__node_id
        c = self.__db.cursor()
        c.execute(q)
        for node_id,rev,path,_ in c:
            node_type = get_node_type(self.__db, self.__repo_id, node_id)
            yield DarcsNode(node_id, node_type, path, rev,
                            self.__db, self.__repo_id, self.__cmd, self.__log)

    def get_history(self, limit=None):
        if self.path == '/':
            for i in range(self.rev,0,-1):
                yield (self.path, i, Changeset.EDIT)
            return
        c = self.__db.cursor()
        q = ('SELECT path,rev,the_change FROM darcs_node_changes '
             'WHERE repo_id = %s AND node_id = %s AND rev <= %s '
             'ORDER BY rev DESC')
        if limit is not None:
            q += ' LIMIT %d' % limit
        c.execute(q, (self.__repo_id, self.__node_id, self.rev))
        for path,rev,change in c:
            yield (path, rev, _change_map[change])

    def _get_next(self):
        try:
            return self._get_future(1).next()
        except StopIteration:
            return None

    def _get_future(self, limit=None):
        if self.path == '/':
            youngest = self.get_youngest_rev()
            for i in range(youngest, self.rev, -1):
                yield (self.path, i, Changeset.EDIT)
            return
        c = self.__db.cursor()
        q = ('SELECT path,rev,the_change FROM darcs_node_changes '
             'WHERE repo_id = %s AND node_id = %s AND rev > %s '
             'ORDER BY rev')
        if limit is not None:
            q += ' LIMIT %d' % limit
        c.execute(q, (self.__repo_id, self.__node_id, self.rev))
        for path,rev,change in c:
            yield (path, rev, _change_map[change])

    def get_annotations(self):
        """Provide detailed backward history for the content of this Node.

        Retrieve an array of revisions parsing `darcs annotate`.
        """

        from xml.sax import make_parser
        from xml.sax.handler import ContentHandler, ErrorHandler
        from datetime import datetime

        c = self.__db.cursor()

        class DarcsXMLAnnotateHandler(ContentHandler):
            def __init__(self):
                self.revisions = []
                self.known_hashes = {}

            def startElement(self, name, attributes):
                if name == 'patch':
                    self.current_hash = attributes['hash']

            def endElement(self, name):
                if name == 'normal_line':
                    self.revisions.append(self.findRevision(self.current_hash))
                elif name == 'added_line':
                    self.revisions.append(self.findRevision(self.last_changed_hash))
                elif name == 'modified':
                    self.last_changed_hash = self.current_hash

            def findRevision(self, hash):
                # Return the trac revision for the given patch hash
                try:
                    return self.known_hashes[hash]
                except KeyError:
                    c.execute('SELECT rev FROM darcs_changesets '
                              'WHERE hash = %s', (hash,))
                    rev = self.known_hashes[hash] = c.fetchone()[0]
                    return rev

        # Get the hash of the patch
        c.execute('SELECT hash FROM darcs_changesets '
                  'WHERE rev = %s', (self.rev,))
        hash = c.fetchone()[0]

        # Get darcs annotate output for the given entry and patch hash
        annotate = self.__cmd.annotate(hash, self.path)

        parser = make_parser()
        handler = DarcsXMLAnnotateHandler()
        parser.setContentHandler(handler)
        parser.setErrorHandler(ErrorHandler())

        parser.feed(annotate)
        parser.close()

        return handler.revisions

    def get_properties(self):
        return {}

    def get_content_length(self):
        if self.isdir:
            return None

        # first check if the file is already in the cache
        c = self.__db.cursor()
        c.execute('SELECT size FROM darcs_cache '
                  'WHERE repo_id = %s AND node_id = %s AND rev = %s',
                  (self.__repo_id, self.__node_id,self.rev))
        row = c.fetchone()
        if row is not None:
            return row[0]

        # if it's not, get the whole content and count...
        # next time you'll be luckier, promise!
        return len(self.get_content().read())

    def get_content_type(self):
        if self.isdir:
            return None
        return mimetypes.guess_type(self.path)[0]

    def get_name(self):
        return os.path.split(self.path)[1]

    def get_last_modified(self):
        if self.__node_id is None:
            return 0
        c = self.__db.cursor()
        c.execute('SELECT rev FROM darcs_node_changes '
                  'WHERE repo_id = %s AND node_id = %s AND rev = %s',
                  (self.__repo_id,self.__node_id,self.rev))
        rev = c.fetchone()[0]
        c.execute('SELECT time FROM revision '
                  'WHERE repos = %s AND rev = %s', (self.__repo_id,rev,))
        return datetime.fromtimestamp(c.fetchone()[0], utc)

class DarcsChangeset(Changeset):
    def __init__(self, db, repo_id, rev):
        self.repo_id = repo_id
        c = db.cursor()
        if IS_TRAC_0_12_OR_BETTER:
            c.execute('SELECT r.author,r.time,c.name,r.message,c.hash '
                      'FROM revision as r, darcs_changesets as c '
                      'WHERE r.repos = %s AND c.repo_id = r.repos '
                      '  AND r.rev = %s AND c.rev = r.rev',
                      (self.repo_id, rev))
        else:
            c.execute('SELECT r.author,r.time,c.name,r.message,c.hash '
                      'FROM revision as r, darcs_changesets as c '
                      'WHERE r.rev = %s '
                      '  AND c.rev = r.rev AND c.repo_id = %s', (rev,''))
        row = c.fetchone()
        if row is None:
            raise NoSuchChangeset(rev)
        author,date,name,comment,hash = row
        date = datetime.fromtimestamp(date, utc)
        # Trac 0.10.x hack
        if IS_TRAC_0_10_X:
            date = time.mktime(date.timetuple())
        msg = name
        if comment:
            msg += '\n' + comment
        Changeset.__init__(self, rev, msg, author, date)
        self.__db = db
        self.__hash = hash

    def get_changes(self):
        c = self.__db.cursor()
        c.execute('SELECT node_id,path,the_change FROM darcs_node_changes '
                   'WHERE repo_id = %s AND rev = %s', (self.repo_id,self.rev,))
        for node_id,path,change in c:
            node_type = get_node_type(self.__db, self.repo_id, node_id)
            kind = _node_type_map[node_type]
            if change == CHANGE_ADDED:
                prev_path = prev_rev = None
            else:
                prev_path,prev_rev = get_prev_path_rev(self.__db, self.repo_id,
                                                       node_id, self.rev)
            change = _change_map[change]
            yield (path,kind,change,prev_path,prev_rev)

    def get_properties(self):
        # omit ending .gz, because under some configuration the Apache
        # web server automatically tags such URLs with something like
        # "Content-Encoding: gzip" that in turn may confuse the browser.
        # Darcs recognizes also extension-stripped hashnames.

        props = dict(Hashname=self.__hash[:-3])

        c = self.__db.cursor()
        c.execute('SELECT dcs.repo_id, dcs.rev '
                  'FROM darcs_changesets dcs, darcs_changesets dcs2 '
                  'WHERE dcs2.repo_id = %s AND dcs2.rev = %s '
                  '  AND dcs.hash = dcs2.hash '
                  '  AND dcs.repo_id <> dcs2.repo_id', (self.repo_id, self.rev))
        eqcsets = [(repo, rev) for repo,rev in c.fetchall()]
        if eqcsets:
            props['EqChangesets'] = eqcsets

        return props
