import functools
import logging
import xml.etree.ElementTree as ET

import psycopg2

from lib.binary import is_binary_or_large
from lib.db import DB
from lib.db_revision import DBRevision
from lib.git import Git
from lib.history import History
from lib.obs import OBS
from lib.obs_revision import OBSRevision
from lib.proxy_sha256 import ProxySHA256, md5, sha256
from lib.tree_builder import TreeBuilder
from lib.user import User


def _files_hash(hash_alg, dirpath):
    """List of (filepath, md5) for a directory"""
    # TODO: do it async or multythread
    files = [f for f in dirpath.iterdir() if f.is_file()]
    return [(f.parts[-1], hash_alg(f)) for f in files]


files_md5 = functools.partial(_files_hash, md5)
files_sha256 = functools.partial(_files_hash, sha256)


class Importer:
    def __init__(self, projects, package, repodir, search_ancestor, rebase_devel):
        # The idea is to create each commit in order, and draw the
        # same graph described by the revisions timeline.  For that we
        # need first to fetch all the revisions and sort them
        # linearly, based on the timestamp.
        #
        # After that we recreate the commits, and if one revision is a
        # request that contains a target inside the projects in the
        # "history", we create a merge commit.
        #
        # Optionally, if a flag is set, we will try to find a common
        # "Initial commit" from a reference branch (the first one in
        # "projects", that is safe to assume to be "openSUSE:Factory".
        # This is not always a good idea.  For example, in a normal
        # situation the "devel" project history is older than
        # "factory", and we can root the tree on it.  But for some
        # other projects we lost partially the "devel" history project
        # (could be moved), and "factory" is not the root.

        self.package = package
        self.search_ancestor = search_ancestor
        self.rebase_devel = rebase_devel

        self.obs = OBS()
        self.git = Git(
            repodir,
            committer="Git OBS Bridge",
            committer_email="obsbridge@suse.de",
        ).create()
        self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)

        self.history = History(self.obs, self.package)

        # Add the "devel" project
        (project, branch, api_url) = projects[0]
        assert project == "openSUSE:Factory"
        self.obs.change_url(api_url)
        devel_project = self.obs.devel_project(project, package)
        if devel_project:
            self.projects = [(devel_project, "devel", api_url)] + projects
        else:
            self.projects = projects

        # Associate the branch and api_url information per project
        self.projects_info = {
            project: (branch, api_url) for (project, branch, api_url) in self.projects
        }

    def download(self, revision):
        obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
        git_files = {
            (f.name, f.stat().st_size, md5(f))
            for f in self.git.path.iterdir()
            if f.is_file() and f.name not in (".gitattributes")
        }

        # Overwrite ".gitattributes" with the
        self.git.add_default_lfs_gitattributes(force=True)

        # Download each file in OBS if it is not a binary (or large)
        # file
        for (name, size, file_md5) in obs_files:
            # this file creates easily 100k commits and is just useless data :(
            # unfortunately it's stored in the same meta package as the project config
            if revision.package == "_project" and name == "_staging_workflow":
                continue
            # have such files been detected as text mimetype before?
            is_text = self.proxy_sha256.is_text(name)
            if not is_text and is_binary_or_large(name, size):
                file_sha256 = self.proxy_sha256.get_or_put(
                    revision.project,
                    revision.package,
                    name,
                    revision.srcmd5,
                    file_md5,
                    size,
                )
                self.git.add_lfs(name, file_sha256["sha256"], size)
            else:
                if (name, size, file_md5) not in git_files:
                    print(f"Download {name}")
                    self.obs.download(
                        revision.project,
                        revision.package,
                        name,
                        revision.srcmd5,
                        self.git.path,
                    )
                    # Validate the MD5 of the downloaded file
                    if md5(self.git.path / name) != file_md5:
                        raise Exception(f"Download error in {name}")
                    self.git.add(name)

        # Remove extra files
        obs_names = {n for (n, _, _) in obs_files}
        git_names = {n for (n, _, _) in git_files}
        for name in git_names - obs_names:
            print(f"Remove {name}")
            self.git.remove(name)

    def update_db_package(self, db, project, package):
        root = self.obs._history(project, package)
        if root is None:
            return
        latest = DBRevision.latest_revision(db, project, package)
        for r in root.findall("revision"):
            rev = OBSRevision(self.obs, self, project, package).parse(r)
            if not latest or rev.rev > latest.rev:
                dbrev = DBRevision.import_obs_rev(db, rev)
                try:
                    root = rev.read_link()
                except ET.ParseError:
                    dbrev.set_broken(db)
                    continue
                if root is not None:
                    tprj = root.get("project") or project
                    tpkg = root.get("package") or package
                    dbrev.links_to(db, tprj, tpkg)

    def find_linked_revs(self, db):
        with db.cursor() as cur:
            cur.execute(
                """SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l
                            LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id
                            WHERE lrevs.id IS NULL) and broken is FALSE;"""
            )
            for row in cur.fetchall():
                rev = DBRevision(row)
                linked_rev = rev.linked_rev(db)
                if not linked_rev:
                    logging.debug(f"No link {rev}")
                    continue
                cur.execute(
                    """INSERT INTO linked_revs (revision_id, linked_id)
                                VALUES (%s,%s)""",
                    (rev.dbid, linked_rev.dbid),
                )

    def fetch_all_linked_packages(self, db, project, package):
        with db.cursor() as cur:
            cur.execute(
                """SELECT DISTINCT l.project, l.package from links l JOIN revisions r
                    on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""",
                (project, package),
            )
            for row in cur.fetchall():
                (lproject, lpackage) = row
                self.update_db_package(db, lproject, lpackage)

    def find_fake_revisions(self, db):
        with db.cursor() as cur:
            cur.execute(
                "SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)"
            )
            for row in cur.fetchall():
                self._find_fake_revision(db, DBRevision(row))

    def _find_fake_revision(self, db, rev):
        prev = rev.previous_commit(db)
        if not prev:
            with db.cursor() as cur:
                cur.execute(
                    "UPDATE linked_revs SET considered=TRUE where linked_id=%s",
                    (rev.dbid,),
                )
            return
        with db.cursor() as cur:
            cur.execute(
                """SELECT * FROM revisions WHERE id IN
                            (SELECT revision_id from linked_revs WHERE linked_id=%s) 
                            AND commit_time <= %s ORDER BY commit_time""",
                (prev.dbid, rev.commit_time),
            )
            last_linked = None
            for linked in cur.fetchall():
                linked = DBRevision(linked)
                nextrev = linked.next_commit(db)
                if nextrev and nextrev.commit_time < rev.commit_time:
                    continue
                last_linked = linked
            cur.execute(
                "UPDATE linked_revs SET considered=TRUE where linked_id=%s",
                (rev.dbid,),
            )
        if not last_linked:
            return

        with db.cursor() as cur:
            linked = last_linked
            cur.execute(
                "SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s",
                (rev.dbid, linked.dbid),
            )
            if cur.fetchone():
                cur.execute(
                    "UPDATE linked_revs SET considered=TRUE where linked_id=%s",
                    (rev.dbid,),
                )
                return
            fake_rev = linked.rev + rev.rev / 1000.0
            comment = f"Updating link to change in {rev.project}/{rev.package} revision {rev.rev}"
            cur.execute(
                """INSERT INTO revisions (project,package,rev,unexpanded_srcmd5, 
                            commit_time, userid, comment) VALUES(%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
                (
                    linked.project,
                    linked.package,
                    fake_rev,
                    linked.unexpanded_srcmd5,
                    rev.commit_time,
                    "buildservice-autocommit",
                    comment,
                ),
            )
            new_id = cur.fetchone()[0]
            cur.execute(
                """INSERT INTO linked_revs (revision_id, linked_id) VALUES (%s,%s)""",
                (new_id, rev.dbid),
            )
            cur.execute(
                """INSERT INTO fake_revs (revision_id, linked_id) VALUES (%s,%s)""",
                (rev.dbid, linked.dbid),
            )

    def revisions_without_files(self, db):
        with db.cursor() as cur:
            cur.execute(
                "SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL"
            )
            return [DBRevision(row) for row in cur.fetchall()]

    def import_into_db(self):
        db = DB()
        for project, _, api_url in self.projects:
            self.obs.change_url(api_url)
            self.update_db_package(db, project, self.package)
            self.fetch_all_linked_packages(db, project, self.package)
            # all remaining, no filtering here
            self.find_linked_revs(db)

        missing_users = User.missing_users(db)
        for userid in missing_users:
            missing_user = self.obs.user(userid)
            if missing_user:
                missing_user.import_into_db(db)

        self.find_fake_revisions(db)
        for rev in self.revisions_without_files(db):
            with db.cursor() as cur:
                cur.execute(
                    """SELECT unexpanded_srcmd5 from revisions WHERE
                            id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""",
                    (rev.dbid,),
                )
                linked_rev = cur.fetchone()
            if linked_rev:
                linked_rev = linked_rev[0]
            list = self.obs.list(
                rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev
            )
            if list:
                rev.import_dir_list(db, list)
                md5 = rev.calculate_files_hash(db)
                with db.cursor() as cur:
                    cur.execute(
                        "UPDATE revisions SET files_hash=%s WHERE id=%s",
                        (md5, rev.dbid),
                    )
            else:
                rev.set_broken(db)

        for number in DBRevision.requests_to_fetch(db):
            self.obs.request(number).import_into_db(db)

        db.conn.commit()
        TreeBuilder(db).build(self.package).print()

    def import_all_revisions(self, gc):
        # Fetch all the requests and sort them.  Ideally we should
        # build the graph here, to avoid new commits before the merge.
        # For now we will sort them and invalidate the commits if
        # "rebase_devel" is set.
        self.history.fetch_all_revisions(self.projects)
        revisions = self.history.sort_all_revisions()

        logging.debug(f"Selected import order for {self.package}")
        for revision in revisions:
            logging.debug(revision)

        gc_cnt = gc
        for revision in revisions:
            gc_cnt -= 1
            if gc_cnt <= 0 and gc:
                self.git.gc()
                gc_cnt = gc
            self.import_revision(revision)

    def import_new_revision_with_request(self, revision, request):
        """Create a new branch as a result of a merge"""

        submitted_revision = self.history.find_revision(
            request.source, request.revisionid, revision.time
        )
        if not submitted_revision:
            logging.warning(f"Request {request} does not connect to a known revision")
            return False

        if not submitted_revision.commit:
            # If the revision appointed by the request is not part of
            # the git history, we can have an ordering problem.  One
            # example is "premake4".
            self.import_revision(submitted_revision)

        assert submitted_revision.commit is not None

        project = revision.project
        branch, _ = self.projects_info[project]

        # TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858)
        self.git.branch(branch, submitted_revision.commit)
        self.git.clean()
        self.git.checkout(branch)

        logging.info(f"Create new branch based on {submitted_revision.commit}")
        revision.commit = submitted_revision.commit

    def _rebase_branch_history(self, project, revision):
        branch, _ = self.projects_info[project]
        history = self.history[project]
        revision_index = history.index(revision)
        for index in range(revision_index + 1, len(history)):
            revision = history[index]
            # We are done when we have one non-commited revision
            if not revision.commit:
                return
            logging.info(f"Rebasing {revision} from {branch}")
            revision.commit = None
            self.import_revision(revision)

    def import_revision_with_request(self, revision, request):
        """Import a single revision via a merge"""

        submitted_revision = self.history.find_revision(
            request.source, request.revisionid, revision.time
        )
        if not submitted_revision:
            logging.warning(f"Request {request} does not connect to a known revision")
            return False
        assert submitted_revision.commit is not None

        # TODO: detect a revision, case in point
        # Base:System/bash/284 -> rq683701 -> accept O:F/151
        #   -> autocommit Base:System/bash/285
        # Revert lead to openSUSE:Factory/bash/152
        # Base:System/286 restored the reverted code in devel project
        # rq684575 was created and accepted as O:F/153
        # But the 284-285 and the 285-286 changeset is seen as empty
        # as the revert was never in Base:System, so the
        # submitted_revision of 684575 has no commit
        if submitted_revision.commit == "EMPTY":
            logging.warning("Empty commit submitted?!")
            return False

        message = (
            f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}"
        )
        commit = self.git.merge(
            # TODO: revision.userid or request.creator?
            f"OBS User {revision.userid}",
            "null@suse.de",
            revision.time,
            message,
            submitted_revision.commit,
        )

        if commit == "EMPTY":
            logging.warning("Empty merge. Ignoring the revision and the request")
            self.git.merge_abort()
            revision.commit = commit
            return False

        if commit == "CONFLICT":
            logging.info("Merge conflict. Downloading revision")
            self.download(revision)
            message = f"CONFLICT {message}"
            commit = self.git.merge(
                f"OBS User {revision.userid}",
                "null@suse.de",
                revision.time,
                message,
                submitted_revision.commit,
                merged=True,
            )

        assert commit and commit != "CONFLICT"
        logging.info(f"Merge with {submitted_revision.commit} into {commit}")
        revision.commit = commit

        # TODO: There are more checks to do, like for example, the
        # last commit into the non-devel branch should be a merge from
        # the devel branch
        if self.rebase_devel:
            branch, _ = self.projects_info.get(request.source, (None, None))
            if branch == "devel":
                self.git.repo.references[f"refs/heads/{branch}"].set_target(commit)
                self._rebase_branch_history(request.source, submitted_revision)

        return True

    def matching_request(self, revision):
        request = self.obs.request(revision.requestid)
        if not request:
            return None

        # to be handled by the caller
        if request.type() != "submit":
            return request

        if request.source not in self.projects_info:
            logging.info("Request from a non exported project")
            return None

        if request.target != revision.project:
            # This seems to happen when the devel project gets
            # reinitialized (for example, SR#943593 in 7zip, or
            # SR#437901 in ColorFull)
            logging.info("Request target different from current project")
            return None

        if request.source == request.target:
            # this is not a merge, but a different way to do a
            # contribution to the (devel) project - see bindfs's rev 1
            logging.info("Request within the same project")
            return None

        return request

    def import_revision(self, revision):
        """Import a single revision into git"""
        project = revision.project
        branch, api_url = self.projects_info[project]

        logging.info(f"Importing [{revision}] to {branch}")

        self.obs.change_url(api_url)

        # Populate linkrev and replace srcmd5 from the linked
        # revision.  If the expansion fails, the revision will be ignored
        # and not imported.
        if not revision.check_expanded():
            logging.warning(f"Broken revision")
            revision.ignored = True
            return

        # When doing a SR, we see also a revision in the origin
        # project with the outgoing request, but without changes in
        # the project.  We can ignore them.
        #
        # If there is a request ID, it will be filtered out later,
        # when the target project is different from itself.
        if revision.userid == "autobuild" and not revision.requestid:
            logging.info("Ignoring autocommit")
            revision.ignored = True
            return

        if revision.userid == "buildservice-autocommit":
            logging.info("Ignoring autocommit")
            revision.ignored = True
            return

        # Create the reference if the branch is new.  If so return
        # True.
        new_branch = self.git.checkout(branch)

        if revision.requestid:
            request = self.matching_request(revision)
            if request:
                if request.type() == "delete":
                    # TODO: after this comes a restore, this should be collapsed
                    # before even hitting git
                    logging.info("Delete request ignored")
                    revision.ignored = True
                    return

                logging.debug(f"Found matching request: #{revision.project} #{request}")
                if new_branch:
                    self.import_new_revision_with_request(revision, request)
                    return
                if self.import_revision_with_request(revision, request):
                    return

        # Import revision as a single commit (without merging)
        self.download(revision)

        if new_branch or self.git.is_dirty():
            commit = self.git.commit(
                f"OBS User {revision.userid}",
                "null@suse.de",
                revision.time,
                # TODO: Normalize better the commit message
                f"{revision.comment}\n\n{revision}",
                # Create an empty commit only if is a new branch
                allow_empty=new_branch,
            )
            revision.commit = commit
            logging.info(f"Commit {commit}")
        else:
            logging.info("Skip empty commit")
            revision.ignored = True