forked from importers/git-importer
		
	Compare commits
	
		
			53 Commits
		
	
	
		
			add_old_st
			...
			main
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 94e57852a0 | |||
| 5a28f62fb9 | |||
| 17888407df | |||
| d5eb5c0db6 | |||
| 5a6a55868f | |||
| 5d55a8c2fe | |||
| 32d1924a0d | |||
| 7684388193 | |||
| 4ef980d1c8 | |||
| fb80d0c105 | |||
| 
						 | 
					44b4d690db | ||
| 
						 | 
					a69e861614 | ||
| 
						 | 
					1da740bd8b | ||
| 
						 | 
					b3107ba3bf | ||
| 86f82325d8 | |||
| 
						 | 
					39ba616226 | ||
| 531dbc7c1b | |||
| 
						 | 
					1318f9e0c4 | ||
| 
						 | 
					d563076d9e | ||
| b11b3f1adb | |||
| 
						 | 
					479738d4b2 | ||
| 
						 | 
					2d04136ca5 | ||
| 
						 | 
					40ad64ddff | ||
| 
						 | 
					6bd5d72100 | ||
| 
						 | 
					022ae5ab58 | ||
| 
						 | 
					2ff8ed76d0 | ||
| 
						 | 
					5f228dc046 | ||
| 
						 | 
					4e07d8272e | ||
| 
						 | 
					2a3475ab6e | ||
| 
						 | 
					574bc9aa10 | ||
| 
						 | 
					0414b33206 | ||
| 
						 | 
					b9670821a9 | ||
| 
						 | 
					073550825c | ||
| 
						 | 
					5a353c98d3 | ||
| 
						 | 
					1fc466d15b | ||
| 
						 | 
					39fde7744a | ||
| 
						 | 
					f5ffc83a69 | ||
| 
						 | 
					d0ccf83684 | ||
| 
						 | 
					b0ffb01c59 | ||
| 
						 | 
					28d5c6e606 | ||
| 
						 | 
					1e22c2895a | ||
| 
						 | 
					5da7861c2a | ||
| 
						 | 
					c9e07e536f | ||
| 
						 | 
					dc0f33354e | ||
| 
						 | 
					56cbe0a125 | ||
| 
						 | 
					4353f015c8 | ||
| 
						 | 
					9cbe0899bc | ||
| 
						 | 
					9e80a64fe0 | ||
| 
						 | 
					12001b1640 | ||
| 
						 | 
					3797ea178a | ||
| 
						 | 
					999dcabcfa | ||
| 9962673eff | |||
| 
						 | 
					7b20c03256 | 
@@ -1,4 +1,4 @@
 | 
			
		||||
sudo zypper in python3-psycopg2
 | 
			
		||||
sudo zypper in python3-psycopg
 | 
			
		||||
sudo su - postgres
 | 
			
		||||
# `createdb -O <LOCAL_USER> imported_git`
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -7,8 +7,6 @@ import sys
 | 
			
		||||
 | 
			
		||||
import osc.core
 | 
			
		||||
 | 
			
		||||
from lib.db import DB
 | 
			
		||||
from lib.db_revision import DBRevision
 | 
			
		||||
from lib.git_exporter import GitExporter
 | 
			
		||||
from lib.importer import Importer
 | 
			
		||||
from lib.test_exporter import TestExporter
 | 
			
		||||
@@ -44,8 +42,8 @@ PROJECTS = [
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def export_package(package, repodir, cachedir, gc):
 | 
			
		||||
    exporter = GitExporter(URL_OBS, "openSUSE:Factory", package, repodir, cachedir)
 | 
			
		||||
def export_package(project, package, repodir, cachedir, gc):
 | 
			
		||||
    exporter = GitExporter(URL_OBS, project, package, repodir, cachedir)
 | 
			
		||||
    exporter.set_gc_interval(gc)
 | 
			
		||||
    exporter.export_as_git()
 | 
			
		||||
 | 
			
		||||
@@ -53,6 +51,12 @@ def export_package(package, repodir, cachedir, gc):
 | 
			
		||||
def main():
 | 
			
		||||
    parser = argparse.ArgumentParser(description="OBS history importer into git")
 | 
			
		||||
    parser.add_argument("packages", help="OBS package names", nargs="*")
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "-p",
 | 
			
		||||
        "--project",
 | 
			
		||||
        default="openSUSE:Factory",
 | 
			
		||||
        help="Project to import/export, default is openSUSE:Factory",
 | 
			
		||||
    )
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "-r",
 | 
			
		||||
        "--repodir",
 | 
			
		||||
@@ -102,56 +106,6 @@ def main():
 | 
			
		||||
            requests_log.setLevel(logging.DEBUG)
 | 
			
		||||
            requests_log.propagate = True
 | 
			
		||||
 | 
			
		||||
    def check_old_package(db: DB, dir: pathlib.Path):
 | 
			
		||||
        md5file = dir / "MD5SUMS"
 | 
			
		||||
        print(md5file)
 | 
			
		||||
        valid_revisions = None
 | 
			
		||||
        with open(md5file, "rb") as f:
 | 
			
		||||
            for line in f.readlines():
 | 
			
		||||
                try:
 | 
			
		||||
                    md5, file = line.decode("utf-8").strip().split("  ")
 | 
			
		||||
                except UnicodeDecodeError:
 | 
			
		||||
                    logging.error(f"Corrupt MD5 file: {md5file}")
 | 
			
		||||
                    return
 | 
			
		||||
                if file == "ready":
 | 
			
		||||
                    continue
 | 
			
		||||
                if len(md5) != 32:
 | 
			
		||||
                    logging.error(f"Corrupt MD5 file: {md5file}")
 | 
			
		||||
                    return
 | 
			
		||||
                with db.cursor() as cur:
 | 
			
		||||
                    cur.execute(
 | 
			
		||||
                        "SELECT revision_id FROM files WHERE md5=%s AND name=%s",
 | 
			
		||||
                        (md5, file),
 | 
			
		||||
                    )
 | 
			
		||||
                    nrevs = set([row[0] for row in cur.fetchall()])
 | 
			
		||||
                    if valid_revisions is None:
 | 
			
		||||
                        valid_revisions = nrevs
 | 
			
		||||
                    else:
 | 
			
		||||
                        valid_revisions = valid_revisions & nrevs
 | 
			
		||||
                    if not valid_revisions:
 | 
			
		||||
                        break
 | 
			
		||||
 | 
			
		||||
        with db.cursor() as cur:
 | 
			
		||||
            cur.execute(
 | 
			
		||||
                "SELECT * FROM revisions WHERE id = ANY(%s) AND project=%s",
 | 
			
		||||
                (list(valid_revisions), "openSUSE:Factory"),
 | 
			
		||||
            )
 | 
			
		||||
            for row in cur.fetchall():
 | 
			
		||||
                r = DBRevision(db, row)
 | 
			
		||||
                print("Valid", r, r.files_hash)
 | 
			
		||||
                return True
 | 
			
		||||
 | 
			
		||||
    if False:
 | 
			
		||||
        import os
 | 
			
		||||
 | 
			
		||||
        db = DB()
 | 
			
		||||
        basedir = pathlib.Path(
 | 
			
		||||
            f"/mounts/work/SAVE/oldpackages/stable/{args.packages[0]}"
 | 
			
		||||
        )
 | 
			
		||||
        for subdir in sorted(os.listdir(basedir)):
 | 
			
		||||
            if check_old_package(db, basedir / subdir):
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
    if args.export:
 | 
			
		||||
        if len(args.packages) != 1:
 | 
			
		||||
            print("Can only export one package")
 | 
			
		||||
@@ -162,10 +116,13 @@ def main():
 | 
			
		||||
    if not args.cachedir:
 | 
			
		||||
        args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser()
 | 
			
		||||
 | 
			
		||||
    importer = Importer(URL_OBS, "openSUSE:Factory", args.packages)
 | 
			
		||||
    importer = Importer(URL_OBS, args.project, args.packages)
 | 
			
		||||
    importer.import_into_db()
 | 
			
		||||
    for package in args.packages:
 | 
			
		||||
        export_package(package, args.repodir, args.cachedir, args.gc)
 | 
			
		||||
        if not importer.package_with_scmsync(args.project, package):
 | 
			
		||||
            export_package(args.project, package, args.repodir, args.cachedir, args.gc)
 | 
			
		||||
        else:
 | 
			
		||||
            logging.debug(f"{args.project}/{package} has scmsync links - skipping export")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1355
									
								
								gone-packages.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1355
									
								
								gone-packages.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -14,8 +14,6 @@ def config(filename="database.ini", section="production"):
 | 
			
		||||
        for param in params:
 | 
			
		||||
            db[param[0]] = param[1]
 | 
			
		||||
    else:
 | 
			
		||||
        raise Exception(
 | 
			
		||||
            "Section {0} not found in the {1} file".format(section, filename)
 | 
			
		||||
        )
 | 
			
		||||
        raise Exception(f"Section {section} not found in the {filename} file")
 | 
			
		||||
 | 
			
		||||
    return db
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										19
									
								
								lib/db.py
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								lib/db.py
									
									
									
									
									
								
							@@ -1,7 +1,6 @@
 | 
			
		||||
import logging
 | 
			
		||||
 | 
			
		||||
import psycopg2
 | 
			
		||||
from psycopg2.extras import LoggingConnection
 | 
			
		||||
import psycopg
 | 
			
		||||
 | 
			
		||||
from lib.config import config
 | 
			
		||||
 | 
			
		||||
@@ -17,22 +16,20 @@ class DB:
 | 
			
		||||
            # read the connection parameters
 | 
			
		||||
            params = config(section=self.config_section)
 | 
			
		||||
            # connect to the PostgreSQL server
 | 
			
		||||
            self.conn = psycopg2.connect(connection_factory=LoggingConnection, **params)
 | 
			
		||||
            logger = logging.getLogger(__name__)
 | 
			
		||||
            self.conn.initialize(logger)
 | 
			
		||||
            self.conn = psycopg.connect(conninfo=f"dbname={params['database']}")
 | 
			
		||||
            logging.getLogger("psycopg.pool").setLevel(logging.INFO)
 | 
			
		||||
 | 
			
		||||
        except (Exception, psycopg2.DatabaseError) as error:
 | 
			
		||||
        except (Exception, psycopg.DatabaseError) as error:
 | 
			
		||||
            print(error)
 | 
			
		||||
            raise error
 | 
			
		||||
 | 
			
		||||
    def schema_version(self):
 | 
			
		||||
        # create a cursor
 | 
			
		||||
        with self.conn.cursor() as cur:
 | 
			
		||||
 | 
			
		||||
            # execute a statement
 | 
			
		||||
            try:
 | 
			
		||||
                cur.execute("SELECT MAX(version) from scheme")
 | 
			
		||||
            except psycopg2.errors.UndefinedTable as error:
 | 
			
		||||
            except psycopg.errors.UndefinedTable:
 | 
			
		||||
                cur.close()
 | 
			
		||||
                self.close()
 | 
			
		||||
                self.connect()
 | 
			
		||||
@@ -146,9 +143,9 @@ class DB:
 | 
			
		||||
        )
 | 
			
		||||
        schemes[10] = (
 | 
			
		||||
            "ALTER TABLE revisions ADD COLUMN request_id INTEGER",
 | 
			
		||||
            """ALTER TABLE revisions 
 | 
			
		||||
            """ALTER TABLE revisions
 | 
			
		||||
               ADD CONSTRAINT request_id_foreign_key
 | 
			
		||||
               FOREIGN KEY (request_id) 
 | 
			
		||||
               FOREIGN KEY (request_id)
 | 
			
		||||
               REFERENCES requests (id)""",
 | 
			
		||||
            "UPDATE scheme SET version=10",
 | 
			
		||||
        )
 | 
			
		||||
@@ -273,7 +270,7 @@ class DB:
 | 
			
		||||
                        cur.execute(command)
 | 
			
		||||
            # commit the changes
 | 
			
		||||
            self.conn.commit()
 | 
			
		||||
        except (Exception, psycopg2.DatabaseError) as error:
 | 
			
		||||
        except (Exception, psycopg.DatabaseError) as error:
 | 
			
		||||
            print(error)
 | 
			
		||||
            self.close()
 | 
			
		||||
            raise error
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,6 @@ from __future__ import annotations
 | 
			
		||||
 | 
			
		||||
from hashlib import md5
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import Optional
 | 
			
		||||
 | 
			
		||||
from lib.db import DB
 | 
			
		||||
from lib.obs_revision import OBSRevision
 | 
			
		||||
@@ -205,8 +204,13 @@ class DBRevision:
 | 
			
		||||
                    and self.package == "_project"
 | 
			
		||||
                ):
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                # do not import _service:* files as those are created by OBS on source imports
 | 
			
		||||
                if entry.get("name")[0:9] == "_service:":
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                cur.execute(
 | 
			
		||||
                    """INSERT INTO files (name, md5, size, mtime, revision_id) 
 | 
			
		||||
                    """INSERT INTO files (name, md5, size, mtime, revision_id)
 | 
			
		||||
                            VALUES (%s,%s,%s,%s,%s)""",
 | 
			
		||||
                    (
 | 
			
		||||
                        entry.get("name"),
 | 
			
		||||
@@ -255,7 +259,7 @@ class DBRevision:
 | 
			
		||||
        self._files.sort(key=lambda x: x["name"])
 | 
			
		||||
        return self._files
 | 
			
		||||
 | 
			
		||||
    def calc_delta(self, current_rev: Optional[DBRevision]):
 | 
			
		||||
    def calc_delta(self, current_rev: DBRevision | None):
 | 
			
		||||
        """Calculate the list of files to download and to delete.
 | 
			
		||||
        Param current_rev is the revision that's currently checked out.
 | 
			
		||||
        If it's None, the repository is empty.
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@ class FlatTreeWalker(AbstractWalker):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, rebase_devel=False) -> None:
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        self.flats = []
 | 
			
		||||
        self.flats:list[FlatNode] = []
 | 
			
		||||
        # the rebase_devel won't work as such as rebasing the branch needs an explicit action
 | 
			
		||||
        self.rebase_devel = rebase_devel
 | 
			
		||||
        # remember the last merge point so we can know the parent of it for the root of the sources
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										203
									
								
								lib/git.py
									
									
									
									
									
								
							
							
						
						
									
										203
									
								
								lib/git.py
									
									
									
									
									
								
							@@ -4,7 +4,6 @@ import os
 | 
			
		||||
import pathlib
 | 
			
		||||
import subprocess
 | 
			
		||||
 | 
			
		||||
import pygit2
 | 
			
		||||
import requests
 | 
			
		||||
 | 
			
		||||
from lib.binary import BINARY
 | 
			
		||||
@@ -20,11 +19,6 @@ class Git:
 | 
			
		||||
        self.committer = committer
 | 
			
		||||
        self.committer_email = committer_email
 | 
			
		||||
 | 
			
		||||
        self.repo = None
 | 
			
		||||
 | 
			
		||||
    def is_open(self):
 | 
			
		||||
        return self.repo is not None
 | 
			
		||||
 | 
			
		||||
    def exists(self):
 | 
			
		||||
        """Check if the path is a valid git repository"""
 | 
			
		||||
        return (self.path / ".git").exists()
 | 
			
		||||
@@ -34,36 +28,70 @@ class Git:
 | 
			
		||||
        self.path.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
        self.open()
 | 
			
		||||
 | 
			
		||||
    def git_run(self, args, **kwargs):
 | 
			
		||||
        """Run a git command"""
 | 
			
		||||
        if "env" in kwargs:
 | 
			
		||||
            envs = kwargs["env"].copy()
 | 
			
		||||
            del kwargs["env"]
 | 
			
		||||
        else:
 | 
			
		||||
            envs = os.environ.copy()
 | 
			
		||||
        envs["GIT_LFS_SKIP_SMUDGE"] = "1"
 | 
			
		||||
        envs["GIT_CONFIG_GLOBAL"] = "/dev/null"
 | 
			
		||||
        return subprocess.run(
 | 
			
		||||
            ["git"] + args,
 | 
			
		||||
            cwd=self.path,
 | 
			
		||||
            check=True,
 | 
			
		||||
            env=envs,
 | 
			
		||||
            **kwargs,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def open(self):
 | 
			
		||||
        # Convert the path to string, to avoid some limitations in
 | 
			
		||||
        # older pygit2
 | 
			
		||||
        self.repo = pygit2.init_repository(str(self.path))
 | 
			
		||||
        if not self.exists():
 | 
			
		||||
            self.git_run(["init", "--object-format=sha256", "-b", "factory"])
 | 
			
		||||
        self.git_run(["config", "lfs.allowincompletepush", "true"])
 | 
			
		||||
 | 
			
		||||
    def is_dirty(self):
 | 
			
		||||
        """Check if there is something to commit"""
 | 
			
		||||
        assert self.is_open()
 | 
			
		||||
 | 
			
		||||
        return self.repo.status()
 | 
			
		||||
        status_str = self.git_run(
 | 
			
		||||
            ["status", "--porcelain=2"],
 | 
			
		||||
            stdout=subprocess.PIPE,
 | 
			
		||||
        ).stdout.decode("utf-8")
 | 
			
		||||
        return len(list(filter(None, status_str.split("\n")))) > 0
 | 
			
		||||
 | 
			
		||||
    def branches(self):
 | 
			
		||||
        return list(self.repo.branches)
 | 
			
		||||
        br = (
 | 
			
		||||
            self.git_run(
 | 
			
		||||
                ["for-each-ref", "--format=%(refname:short)", "refs/heads/"],
 | 
			
		||||
                stdout=subprocess.PIPE,
 | 
			
		||||
            )
 | 
			
		||||
            .stdout.decode("utf-8")
 | 
			
		||||
            .split()
 | 
			
		||||
        )
 | 
			
		||||
        if len(br) == 0:
 | 
			
		||||
            br.append("factory")  # unborn branch?
 | 
			
		||||
        return br
 | 
			
		||||
 | 
			
		||||
    def branch(self, branch, commit=None):
 | 
			
		||||
        if not commit:
 | 
			
		||||
            commit = self.repo.head
 | 
			
		||||
        else:
 | 
			
		||||
            commit = self.repo.get(commit)
 | 
			
		||||
        self.repo.branches.local.create(branch, commit)
 | 
			
		||||
    def branch(self, branch, commit="HEAD"):
 | 
			
		||||
        commit = (
 | 
			
		||||
            self.git_run(
 | 
			
		||||
                ["rev-parse", "--verify", "--end-of-options", commit + "^{commit}"],
 | 
			
		||||
                stdout=subprocess.PIPE,
 | 
			
		||||
            )
 | 
			
		||||
            .stdout.decode("utf-8")
 | 
			
		||||
            .strip()
 | 
			
		||||
        )
 | 
			
		||||
        return self.git_run(["branch", branch, commit])
 | 
			
		||||
 | 
			
		||||
    def checkout(self, branch):
 | 
			
		||||
        """Checkout into the branch HEAD"""
 | 
			
		||||
        new_branch = False
 | 
			
		||||
        ref = f"refs/heads/{branch}"
 | 
			
		||||
        if branch not in self.branches():
 | 
			
		||||
            self.repo.references["HEAD"].set_target(ref)
 | 
			
		||||
            self.git_run(["switch", "-q", "--orphan", branch])
 | 
			
		||||
            new_branch = True
 | 
			
		||||
        else:
 | 
			
		||||
            self.repo.checkout(ref)
 | 
			
		||||
            ref = f"refs/heads/{branch}"
 | 
			
		||||
            if (self.path / ".git" / ref).exists():
 | 
			
		||||
                self.git_run(["switch", "--no-guess", "-q", branch])
 | 
			
		||||
        return new_branch
 | 
			
		||||
 | 
			
		||||
    def commit(
 | 
			
		||||
@@ -87,51 +115,85 @@ class Git:
 | 
			
		||||
            committer_time = committer_time if committer_time else user_time
 | 
			
		||||
 | 
			
		||||
        if self.is_dirty():
 | 
			
		||||
            self.repo.index.add_all()
 | 
			
		||||
            self.git_run(["add", "--all", "."])
 | 
			
		||||
 | 
			
		||||
        self.repo.index.write()
 | 
			
		||||
        author = pygit2.Signature(user, user_email, int(user_time.timestamp()))
 | 
			
		||||
        committer = pygit2.Signature(
 | 
			
		||||
            committer, committer_email, int(committer_time.timestamp())
 | 
			
		||||
        tree_id = (
 | 
			
		||||
            self.git_run(["write-tree"], stdout=subprocess.PIPE)
 | 
			
		||||
            .stdout.decode("utf-8")
 | 
			
		||||
            .strip()
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        tree = self.repo.index.write_tree()
 | 
			
		||||
        return self.repo.create_commit(
 | 
			
		||||
            "HEAD", author, committer, message, tree, parents
 | 
			
		||||
        parent_array = []
 | 
			
		||||
        if isinstance(parents, list):
 | 
			
		||||
            for parent in filter(None, parents):
 | 
			
		||||
                parent_array = parent_array + ["-p", parent]
 | 
			
		||||
        elif isinstance(parents, str):
 | 
			
		||||
            parent_array = ["-p", parents]
 | 
			
		||||
 | 
			
		||||
        commit_id = (
 | 
			
		||||
            self.git_run(
 | 
			
		||||
                ["commit-tree"] + parent_array + [tree_id],
 | 
			
		||||
                env={
 | 
			
		||||
                    "GIT_AUTHOR_NAME": user,
 | 
			
		||||
                    "GIT_AUTHOR_EMAIL": user_email,
 | 
			
		||||
                    "GIT_AUTHOR_DATE": f"{int(user_time.timestamp())} +0000",
 | 
			
		||||
                    "GIT_COMMITTER_NAME": committer,
 | 
			
		||||
                    "GIT_COMMITTER_EMAIL": committer_email,
 | 
			
		||||
                    "GIT_COMMITTER_DATE": f"{int(committer_time.timestamp())} +0000",
 | 
			
		||||
                },
 | 
			
		||||
                input=message.encode("utf-8"),
 | 
			
		||||
                stdout=subprocess.PIPE,
 | 
			
		||||
            )
 | 
			
		||||
            .stdout.decode("utf-8")
 | 
			
		||||
            .rstrip()
 | 
			
		||||
        )
 | 
			
		||||
        self.git_run(["reset", "--soft", commit_id])
 | 
			
		||||
        return commit_id
 | 
			
		||||
 | 
			
		||||
    def branch_head(self, branch="HEAD"):
 | 
			
		||||
        return (
 | 
			
		||||
            self.git_run(
 | 
			
		||||
                ["rev-parse", "--verify", "--end-of-options", branch],
 | 
			
		||||
                stdout=subprocess.PIPE,
 | 
			
		||||
            )
 | 
			
		||||
            .stdout.decode("utf-8")
 | 
			
		||||
            .strip()
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def last_commit(self):
 | 
			
		||||
        try:
 | 
			
		||||
            return self.repo.head.target
 | 
			
		||||
        except:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def branch_head(self, branch):
 | 
			
		||||
        return self.repo.references["refs/heads/" + branch].target
 | 
			
		||||
    def branch_commit(self, branch="HEAD"):
 | 
			
		||||
      try:
 | 
			
		||||
        return (self.git_run(["cat-file", "commit", branch], stdout=subprocess.PIPE).stdout.decode("utf-8").strip())
 | 
			
		||||
      except:
 | 
			
		||||
        return ''
 | 
			
		||||
 | 
			
		||||
    def set_branch_head(self, branch, commit):
 | 
			
		||||
        self.repo.references["refs/heads/" + branch].set_target(commit)
 | 
			
		||||
        return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
 | 
			
		||||
 | 
			
		||||
    def gc(self):
 | 
			
		||||
        logging.debug(f"Garbage recollect and repackage {self.path}")
 | 
			
		||||
        subprocess.run(
 | 
			
		||||
            ["git", "gc", "--auto"],
 | 
			
		||||
            cwd=self.path,
 | 
			
		||||
        self.git_run(
 | 
			
		||||
            ["gc", "--auto"],
 | 
			
		||||
            stdout=subprocess.PIPE,
 | 
			
		||||
            stderr=subprocess.STDOUT,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def clean(self):
 | 
			
		||||
        for path, _ in self.repo.status().items():
 | 
			
		||||
            logging.debug(f"Cleaning {path}")
 | 
			
		||||
            try:
 | 
			
		||||
                (self.path / path).unlink()
 | 
			
		||||
                self.repo.index.remove(path)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logging.warning(f"Error removing file {path}: {e}")
 | 
			
		||||
    #    def clean(self):
 | 
			
		||||
    #        for path, _ in self.repo.status().items():
 | 
			
		||||
    #            logging.debug(f"Cleaning {path}")
 | 
			
		||||
    #            try:
 | 
			
		||||
    #                (self.path / path).unlink()
 | 
			
		||||
    #                self.repo.index.remove(path)
 | 
			
		||||
    #            except Exception as e:
 | 
			
		||||
    #                logging.warning(f"Error removing file {path}: {e}")
 | 
			
		||||
 | 
			
		||||
    def add(self, filename):
 | 
			
		||||
        self.repo.index.add(filename)
 | 
			
		||||
        self.git_run(["add", ":(literal)" + str(filename)])
 | 
			
		||||
 | 
			
		||||
    def add_default_gitignore(self):
 | 
			
		||||
        if not (self.path / ".gitignore").exists():
 | 
			
		||||
            with (self.path / ".gitignore").open("w") as f:
 | 
			
		||||
                f.write(".osc\n")
 | 
			
		||||
            self.add(".gitignore")
 | 
			
		||||
 | 
			
		||||
    def add_default_lfs_gitattributes(self, force=False):
 | 
			
		||||
        if not (self.path / ".gitattributes").exists() or force:
 | 
			
		||||
@@ -185,9 +247,9 @@ class Git:
 | 
			
		||||
            return any(fnmatch.fnmatch(filename, line) for line in patterns)
 | 
			
		||||
 | 
			
		||||
    def remove(self, file: pathlib.Path):
 | 
			
		||||
        self.repo.index.remove(file.name)
 | 
			
		||||
        (self.path / file).unlink()
 | 
			
		||||
 | 
			
		||||
        self.git_run(
 | 
			
		||||
            ["rm", "-q", "-f", "--ignore-unmatch", ":(literal)" + file.name],
 | 
			
		||||
        )
 | 
			
		||||
        patterns = self.get_specific_lfs_gitattributes()
 | 
			
		||||
        if file.name in patterns:
 | 
			
		||||
            patterns.remove(file.name)
 | 
			
		||||
@@ -195,16 +257,16 @@ class Git:
 | 
			
		||||
 | 
			
		||||
    def add_gitea_remote(self, package):
 | 
			
		||||
        repo_name = package.replace("+", "_")
 | 
			
		||||
        org_name = "rpm"
 | 
			
		||||
        org_name = "pool"
 | 
			
		||||
 | 
			
		||||
        if not os.getenv("GITEA_TOKEN"):
 | 
			
		||||
            logging.warning("Not adding a remote due to missing $GITEA_TOKEN")
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        url = f"https://gitea.opensuse.org/api/v1/org/{org_name}/repos"
 | 
			
		||||
        url = f"https://src.opensuse.org/api/v1/org/{org_name}/repos"
 | 
			
		||||
        response = requests.post(
 | 
			
		||||
            url,
 | 
			
		||||
            data={"name": repo_name},
 | 
			
		||||
            data={"name": repo_name, "object_format_name": "sha256"},
 | 
			
		||||
            headers={"Authorization": f"token {os.getenv('GITEA_TOKEN')}"},
 | 
			
		||||
            timeout=10,
 | 
			
		||||
        )
 | 
			
		||||
@@ -212,16 +274,21 @@ class Git:
 | 
			
		||||
        # 201 Created
 | 
			
		||||
        if response.status_code not in (201, 409):
 | 
			
		||||
            print(response.data)
 | 
			
		||||
        url = f"gitea@gitea.opensuse.org:{org_name}/{repo_name}.git"
 | 
			
		||||
        self.repo.remotes.create("origin", url)
 | 
			
		||||
        url = f"gitea@src.opensuse.org:{org_name}/{repo_name}.git"
 | 
			
		||||
        self.git_run(
 | 
			
		||||
            ["remote", "add", "origin", url],
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def push(self):
 | 
			
		||||
        remo = self.repo.remotes["origin"]
 | 
			
		||||
    def push(self, force=False):
 | 
			
		||||
        if "origin" not in self.git_run(
 | 
			
		||||
            ["remote"],
 | 
			
		||||
            stdout=subprocess.PIPE,
 | 
			
		||||
        ).stdout.decode("utf-8"):
 | 
			
		||||
            logging.warning("Not pushing to remote because no 'origin' configured")
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        keypair = pygit2.KeypairFromAgent("gitea")
 | 
			
		||||
        callbacks = pygit2.RemoteCallbacks(credentials=keypair)
 | 
			
		||||
 | 
			
		||||
        refspecs = ["refs/heads/factory"]
 | 
			
		||||
        if "refs/heads/devel" in self.repo.references:
 | 
			
		||||
            refspecs.append("refs/heads/devel")
 | 
			
		||||
        remo.push(refspecs, callbacks=callbacks)
 | 
			
		||||
        cmd = ["push"]
 | 
			
		||||
        if force:
 | 
			
		||||
            cmd.append("-f")
 | 
			
		||||
        cmd += ["origin", "--all"]
 | 
			
		||||
        self.git_run(cmd)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,12 @@
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
from urllib.parse import parse_qs
 | 
			
		||||
 | 
			
		||||
import psycopg
 | 
			
		||||
from urllib3.util import url
 | 
			
		||||
import yaml
 | 
			
		||||
 | 
			
		||||
from lib import db
 | 
			
		||||
from lib.binary import is_binary_or_large
 | 
			
		||||
from lib.db import DB
 | 
			
		||||
from lib.git import Git
 | 
			
		||||
@@ -12,6 +16,12 @@ from lib.proxy_sha256 import ProxySHA256
 | 
			
		||||
from lib.tree_builder import TreeBuilder
 | 
			
		||||
from lib.user import User
 | 
			
		||||
 | 
			
		||||
def is_number(s):
 | 
			
		||||
    try:
 | 
			
		||||
        float(s)
 | 
			
		||||
        return True
 | 
			
		||||
    except ValueError:
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
class GitExporter:
 | 
			
		||||
    def __init__(self, api_url, project, package, repodir, cachedir):
 | 
			
		||||
@@ -30,49 +40,106 @@ class GitExporter:
 | 
			
		||||
        else:
 | 
			
		||||
            self.git.create()
 | 
			
		||||
            self.git.add_gitea_remote(package)
 | 
			
		||||
        self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
 | 
			
		||||
        self.gc_interval = 200
 | 
			
		||||
        self.cachedir = cachedir
 | 
			
		||||
 | 
			
		||||
    def set_gc_interval(self, gc):
 | 
			
		||||
        self.gc_interval = gc
 | 
			
		||||
 | 
			
		||||
    def check_repo_state(self, flats, branch_state):
 | 
			
		||||
    def reconstruct_state(self, flats):
 | 
			
		||||
        state_data = dict()
 | 
			
		||||
        if os.path.exists(self.state_file):
 | 
			
		||||
            with open(self.state_file, "r") as f:
 | 
			
		||||
                state_data = yaml.safe_load(f)
 | 
			
		||||
                if type(state_data) != dict:
 | 
			
		||||
                    state_data = {}
 | 
			
		||||
        prefix = "OBS-URL: "
 | 
			
		||||
        for line in self.git.branch_commit("factory").splitlines():
 | 
			
		||||
            if line.startswith(prefix):
 | 
			
		||||
                u = url.parse_url(line.strip(prefix))
 | 
			
		||||
                if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
 | 
			
		||||
                    continue
 | 
			
		||||
                v = parse_qs(u.query)
 | 
			
		||||
                rev = v['rev'][0]
 | 
			
		||||
                with self.db.cursor() as cur:
 | 
			
		||||
                    try:
 | 
			
		||||
                        if is_number(rev):
 | 
			
		||||
                            cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
 | 
			
		||||
                        else:
 | 
			
		||||
                            cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
 | 
			
		||||
                    except psycopg.Error as e:
 | 
			
		||||
                        logging.error(e)
 | 
			
		||||
                        self.db.conn.rollback()
 | 
			
		||||
                    row = cur.fetchone()
 | 
			
		||||
                    if not row:
 | 
			
		||||
                        return state_data
 | 
			
		||||
                    state_data['factory'] = row[0]
 | 
			
		||||
        try:
 | 
			
		||||
            print("devel reconstruct")
 | 
			
		||||
            d = self.devel_rev(flats)
 | 
			
		||||
            if d is not None:
 | 
			
		||||
                prj = d.commit.project
 | 
			
		||||
                for line in self.git.branch_commit("devel").splitlines():
 | 
			
		||||
                    if line.startswith(prefix):
 | 
			
		||||
                        u = url.parse_url(line.strip(prefix))
 | 
			
		||||
                        if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
 | 
			
		||||
                            continue
 | 
			
		||||
                        v = parse_qs(u.query)
 | 
			
		||||
                        rev = v['rev'][0]
 | 
			
		||||
                        try:
 | 
			
		||||
                            with self.db.cursor() as cur:
 | 
			
		||||
                                logging.debug(f"finding id for ({prj, self.package, rev}")
 | 
			
		||||
                                if is_number(rev):
 | 
			
		||||
                                    cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
 | 
			
		||||
                                else:
 | 
			
		||||
                                    cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
 | 
			
		||||
                                row = cur.fetchone()
 | 
			
		||||
                                if not row:
 | 
			
		||||
                                    logging.info(" ** cannot find revision for devel branch:", rev)
 | 
			
		||||
                                    return state_data
 | 
			
		||||
                                state_data['devel'] = row[0]
 | 
			
		||||
                        except psycopg.Error as e:
 | 
			
		||||
                            logging.error(e)
 | 
			
		||||
                            self.db.conn.rollback()
 | 
			
		||||
                            if state_data['factory'] is not None:
 | 
			
		||||
                                state_data['devel'] = state_data['factory']
 | 
			
		||||
        except:
 | 
			
		||||
            if state_data['factory'] is not None:
 | 
			
		||||
                state_data['devel'] = state_data['factory']
 | 
			
		||||
        return state_data
 | 
			
		||||
 | 
			
		||||
    def check_repo_state(self, flats, branch_state, branch):
 | 
			
		||||
        state_data = self.reconstruct_state(flats)
 | 
			
		||||
 | 
			
		||||
        logging.debug(f"state data: {state_data}")
 | 
			
		||||
        left_to_commit = []
 | 
			
		||||
        for flat in reversed(flats):
 | 
			
		||||
            found_state = False
 | 
			
		||||
            for branch in ["factory", "devel"]:
 | 
			
		||||
                if flat.commit.dbid == state_data.get(branch):
 | 
			
		||||
                    branch_state[branch] = flat.commit
 | 
			
		||||
                    flat.commit.git_commit = self.git.branch_head(branch)
 | 
			
		||||
                    logging.debug(
 | 
			
		||||
                        f"Found {self.git.path}'s {branch} branch in state {flat}"
 | 
			
		||||
                    )
 | 
			
		||||
                    left_to_commit = []
 | 
			
		||||
                    found_state = True
 | 
			
		||||
            if flat.commit.dbid == state_data.get(branch):
 | 
			
		||||
                branch_state[branch] = flat.commit
 | 
			
		||||
                flat.commit.git_commit = self.git.branch_head(branch)
 | 
			
		||||
                logging.debug(
 | 
			
		||||
                    f"Found {self.git.path}'s {branch} branch in state {flat}"
 | 
			
		||||
                )
 | 
			
		||||
                left_to_commit = []
 | 
			
		||||
                found_state = True
 | 
			
		||||
            if not found_state:
 | 
			
		||||
                left_to_commit.append(flat)
 | 
			
		||||
        return left_to_commit
 | 
			
		||||
 | 
			
		||||
    def devel_rev(self, tree):
 | 
			
		||||
        for flat in tree:
 | 
			
		||||
            if flat.branch == "devel":
 | 
			
		||||
                return flat
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def export_as_git(self):
 | 
			
		||||
        if os.getenv("CHECK_ALL_LFS"):
 | 
			
		||||
            LFSOid.check_all(self.db, self.package)
 | 
			
		||||
        tree = TreeBuilder(self.db).build(self.project, self.package)
 | 
			
		||||
        flats = tree.as_flat_list()
 | 
			
		||||
        added_commits = False
 | 
			
		||||
 | 
			
		||||
        branch_state = {"factory": None, "devel": None}
 | 
			
		||||
        left_to_commit = self.check_repo_state(flats, branch_state)
 | 
			
		||||
 | 
			
		||||
        if not left_to_commit:
 | 
			
		||||
        if tree == None:  # eg. python-M2Crypto errors
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        logging.info(f"Commiting into {self.git.path}")
 | 
			
		||||
        flats = tree.as_flat_list()
 | 
			
		||||
        branch_state = {"factory": None, "devel": None}
 | 
			
		||||
        left_to_commit = self.check_repo_state(flats, branch_state, "factory")
 | 
			
		||||
        logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
 | 
			
		||||
        self.run_gc()
 | 
			
		||||
        users = dict()
 | 
			
		||||
 | 
			
		||||
@@ -85,8 +152,40 @@ class GitExporter:
 | 
			
		||||
                self.run_gc()
 | 
			
		||||
            logging.debug(f"Committing {flat}")
 | 
			
		||||
            self.commit_flat(flat, branch_state)
 | 
			
		||||
            added_commits = True
 | 
			
		||||
 | 
			
		||||
        self.git.push()
 | 
			
		||||
        # export the devel_tree head commits based on the devel branch
 | 
			
		||||
        if self.project == "openSUSE:Factory":
 | 
			
		||||
            devel_head = self.devel_rev(flats)
 | 
			
		||||
            flat_devel = None
 | 
			
		||||
            if devel_head is not None:
 | 
			
		||||
                logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
 | 
			
		||||
                flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
 | 
			
		||||
                for f in flat_devel:
 | 
			
		||||
                    f.branch = "devel"
 | 
			
		||||
 | 
			
		||||
            if flat_devel is not None:
 | 
			
		||||
                left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
 | 
			
		||||
                logging.debug(branch_state)
 | 
			
		||||
                logging.debug(f"appending {len(left_to_commit)} items on top of devel")
 | 
			
		||||
                for flat in left_to_commit:
 | 
			
		||||
                    if flat.commit.userid not in users:
 | 
			
		||||
                        users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
 | 
			
		||||
                    flat.user = users[flat.commit.userid]
 | 
			
		||||
                    self.gc_cnt -= 1
 | 
			
		||||
                    if self.gc_cnt <= 0 and self.gc_interval:
 | 
			
		||||
                        self.run_gc()
 | 
			
		||||
                    logging.debug(f"Committing {flat}")
 | 
			
		||||
                    self.commit_flat(flat, branch_state)
 | 
			
		||||
                    added_commits = True
 | 
			
		||||
 | 
			
		||||
        # make sure that we create devel branch
 | 
			
		||||
        if not branch_state["devel"]:
 | 
			
		||||
            logging.debug("force creating devel")
 | 
			
		||||
            self.git.set_branch_head("devel", self.git.branch_head("factory"))
 | 
			
		||||
 | 
			
		||||
        #if added_commits:
 | 
			
		||||
        #    self.git.push(force=True)
 | 
			
		||||
 | 
			
		||||
    def run_gc(self):
 | 
			
		||||
        self.gc_cnt = self.gc_interval
 | 
			
		||||
@@ -98,6 +197,10 @@ class GitExporter:
 | 
			
		||||
        return not self.proxy_sha256.is_text(package, filename)
 | 
			
		||||
 | 
			
		||||
    def commit_file(self, flat, file, size, md5):
 | 
			
		||||
        # don't export imported _service: files, if any
 | 
			
		||||
        if file.name[0:9] == '_service:':
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # have such files been detected as text mimetype before?
 | 
			
		||||
        if self.is_lfs_file(flat.commit.package, file.name, size):
 | 
			
		||||
            file_sha256 = self.proxy_sha256.get_or_put(
 | 
			
		||||
@@ -150,6 +253,7 @@ class GitExporter:
 | 
			
		||||
 | 
			
		||||
        # create file if not existant
 | 
			
		||||
        self.git.add_default_lfs_gitattributes(force=False)
 | 
			
		||||
        self.git.add_default_gitignore()
 | 
			
		||||
 | 
			
		||||
        to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
 | 
			
		||||
        for file in to_delete:
 | 
			
		||||
@@ -166,10 +270,3 @@ class GitExporter:
 | 
			
		||||
        )
 | 
			
		||||
        flat.commit.git_commit = commit
 | 
			
		||||
        branch_state[flat.branch] = flat.commit
 | 
			
		||||
        with open(self.state_file, "w") as f:
 | 
			
		||||
            data = {}
 | 
			
		||||
            for branch in ["factory", "devel"]:
 | 
			
		||||
                commit = branch_state[branch]
 | 
			
		||||
                if commit:
 | 
			
		||||
                    data[branch] = commit.dbid
 | 
			
		||||
            yaml.dump(data, f)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										103
									
								
								lib/importer.py
									
									
									
									
									
								
							
							
						
						
									
										103
									
								
								lib/importer.py
									
									
									
									
									
								
							@@ -1,5 +1,5 @@
 | 
			
		||||
import concurrent.futures
 | 
			
		||||
import logging
 | 
			
		||||
import pathlib
 | 
			
		||||
import xml.etree.ElementTree as ET
 | 
			
		||||
 | 
			
		||||
from lib.db import DB
 | 
			
		||||
@@ -26,11 +26,15 @@ class Importer:
 | 
			
		||||
        # Import multiple Factory packages into the database
 | 
			
		||||
        self.packages = packages
 | 
			
		||||
        self.project = project
 | 
			
		||||
        self.scmsync_cache = dict()
 | 
			
		||||
        self.packages_with_scmsync = set()
 | 
			
		||||
 | 
			
		||||
        self.db = DB()
 | 
			
		||||
        self.obs = OBS(api_url)
 | 
			
		||||
        assert project == "openSUSE:Factory"
 | 
			
		||||
        assert not self.has_scmsync(project)
 | 
			
		||||
        self.refreshed_packages = set()
 | 
			
		||||
        self.gone_packages_set = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def import_request(self, number):
 | 
			
		||||
        self.obs.request(number).import_into_db(self.db)
 | 
			
		||||
@@ -38,6 +42,8 @@ class Importer:
 | 
			
		||||
    def update_db_package(self, project, package):
 | 
			
		||||
        root = self.obs._history(project, package)
 | 
			
		||||
        if root is None:
 | 
			
		||||
            if self.project == "openSUSE:Factory" and project == self.project:
 | 
			
		||||
                exit(10)
 | 
			
		||||
            return
 | 
			
		||||
        latest = DBRevision.max_rev(self.db, project, package)
 | 
			
		||||
        for r in root.findall("revision"):
 | 
			
		||||
@@ -105,7 +111,7 @@ class Importer:
 | 
			
		||||
        with self.db.cursor() as cur:
 | 
			
		||||
            cur.execute(
 | 
			
		||||
                """SELECT * FROM revisions WHERE id IN
 | 
			
		||||
                            (SELECT revision_id from linked_revs WHERE linked_id=%s) 
 | 
			
		||||
                            (SELECT revision_id from linked_revs WHERE linked_id=%s)
 | 
			
		||||
                            AND commit_time <= %s ORDER BY commit_time""",
 | 
			
		||||
                (prev.dbid, rev.commit_time),
 | 
			
		||||
            )
 | 
			
		||||
@@ -138,7 +144,7 @@ class Importer:
 | 
			
		||||
            fake_rev = linked.rev + rev.rev / 1000.0
 | 
			
		||||
            comment = f"Updating link to change in {rev.project}/{rev.package} revision {int(rev.rev)}"
 | 
			
		||||
            cur.execute(
 | 
			
		||||
                """INSERT INTO revisions (project,package,rev,unexpanded_srcmd5, 
 | 
			
		||||
                """INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,
 | 
			
		||||
                            commit_time, userid, comment, api_url) VALUES(%s,%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
 | 
			
		||||
                (
 | 
			
		||||
                    linked.project,
 | 
			
		||||
@@ -161,10 +167,12 @@ class Importer:
 | 
			
		||||
                (rev.dbid, linked.dbid),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    def revisions_without_files(self):
 | 
			
		||||
    def revisions_without_files(self, package):
 | 
			
		||||
        logging.debug(f"revisions_without_files({package})")
 | 
			
		||||
        with self.db.cursor() as cur:
 | 
			
		||||
            cur.execute(
 | 
			
		||||
                "SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL"
 | 
			
		||||
                "SELECT * FROM revisions WHERE package=%s AND broken=FALSE AND expanded_srcmd5 IS NULL",
 | 
			
		||||
                (package,),
 | 
			
		||||
            )
 | 
			
		||||
            return [DBRevision(self.db, row) for row in cur.fetchall()]
 | 
			
		||||
 | 
			
		||||
@@ -178,11 +186,11 @@ class Importer:
 | 
			
		||||
            linked_rev = cur.fetchone()
 | 
			
		||||
        if linked_rev:
 | 
			
		||||
            linked_rev = linked_rev[0]
 | 
			
		||||
        list = self.obs.list(
 | 
			
		||||
        obs_dir_list = self.obs.list(
 | 
			
		||||
            rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev
 | 
			
		||||
        )
 | 
			
		||||
        if list:
 | 
			
		||||
            rev.import_dir_list(list)
 | 
			
		||||
        if obs_dir_list:
 | 
			
		||||
            rev.import_dir_list(obs_dir_list)
 | 
			
		||||
            md5 = rev.calculate_files_hash()
 | 
			
		||||
            with self.db.cursor() as cur:
 | 
			
		||||
                cur.execute(
 | 
			
		||||
@@ -196,53 +204,47 @@ class Importer:
 | 
			
		||||
        self.find_linked_revs()
 | 
			
		||||
 | 
			
		||||
        self.find_fake_revisions()
 | 
			
		||||
        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
 | 
			
		||||
            fs = [
 | 
			
		||||
                executor.submit(import_rev, self, rev)
 | 
			
		||||
                for rev in self.revisions_without_files()
 | 
			
		||||
            ]
 | 
			
		||||
        concurrent.futures.wait(fs)
 | 
			
		||||
        for package in self.packages:
 | 
			
		||||
            for rev in self.revisions_without_files(package):
 | 
			
		||||
                print(f"rev {rev} is without files")
 | 
			
		||||
                self.import_rev(rev)
 | 
			
		||||
 | 
			
		||||
    def refresh_package(self, project, package):
 | 
			
		||||
        key = f"{project}/{package}"
 | 
			
		||||
        if key in self.refreshed_packages:
 | 
			
		||||
            # refreshing once is good enough
 | 
			
		||||
            return
 | 
			
		||||
        if self.package_gone(key):
 | 
			
		||||
            return
 | 
			
		||||
        logging.debug(f"Refresh {project}/{package}")
 | 
			
		||||
        self.refreshed_packages.add(key)
 | 
			
		||||
        if self.has_scmsync(project) or self.has_scmsync(key):
 | 
			
		||||
            self.packages_with_scmsync.add((project, package))
 | 
			
		||||
            logging.debug(f"{project}/{package} already in Git - skipping")
 | 
			
		||||
            return
 | 
			
		||||
        self.update_db_package(project, package)
 | 
			
		||||
        self.fetch_all_linked_packages(project, package)
 | 
			
		||||
 | 
			
		||||
    def import_into_db(self):
 | 
			
		||||
        for package in self.packages:
 | 
			
		||||
            refresh_package(self, self.project, package)
 | 
			
		||||
 | 
			
		||||
        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
 | 
			
		||||
            fs = [
 | 
			
		||||
                executor.submit(refresh_package, self, self.project, package)
 | 
			
		||||
                for package in self.packages
 | 
			
		||||
            ]
 | 
			
		||||
            concurrent.futures.wait(fs)
 | 
			
		||||
        self.db.conn.commit()
 | 
			
		||||
 | 
			
		||||
            self.db.conn.commit()
 | 
			
		||||
        for number in DBRevision.requests_to_fetch(self.db):
 | 
			
		||||
            self.import_request(number)
 | 
			
		||||
 | 
			
		||||
            fs = [
 | 
			
		||||
                executor.submit(import_request, self, number)
 | 
			
		||||
                for number in DBRevision.requests_to_fetch(self.db)
 | 
			
		||||
            ]
 | 
			
		||||
            concurrent.futures.wait(fs)
 | 
			
		||||
        self.db.conn.commit()
 | 
			
		||||
 | 
			
		||||
            self.db.conn.commit()
 | 
			
		||||
        with self.db.cursor() as cur:
 | 
			
		||||
            cur.execute(
 | 
			
		||||
                """SELECT DISTINCT source_project,source_package FROM requests
 | 
			
		||||
                        WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
 | 
			
		||||
                (self.project, self.packages),
 | 
			
		||||
            )
 | 
			
		||||
            for project, package in cur.fetchall():
 | 
			
		||||
                self.refresh_package(project, package)
 | 
			
		||||
 | 
			
		||||
            with self.db.cursor() as cur:
 | 
			
		||||
                cur.execute(
 | 
			
		||||
                    """SELECT DISTINCT source_project,source_package FROM requests 
 | 
			
		||||
                            WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
 | 
			
		||||
                    (self.project, self.packages),
 | 
			
		||||
                )
 | 
			
		||||
                fs = [
 | 
			
		||||
                    executor.submit(refresh_package, self, project, package)
 | 
			
		||||
                    for project, package in cur.fetchall()
 | 
			
		||||
                ]
 | 
			
		||||
                concurrent.futures.wait(fs)
 | 
			
		||||
        self.db.conn.commit()
 | 
			
		||||
 | 
			
		||||
        missing_users = User.missing_users(self.db)
 | 
			
		||||
@@ -254,3 +256,26 @@ class Importer:
 | 
			
		||||
 | 
			
		||||
        self.fill_file_lists()
 | 
			
		||||
        self.db.conn.commit()
 | 
			
		||||
 | 
			
		||||
    def package_gone(self, key):
 | 
			
		||||
        if not self.gone_packages_set:
 | 
			
		||||
            self.gone_packages_set = set()
 | 
			
		||||
            with open(pathlib.Path(__file__).parent.parent / "gone-packages.txt") as f:
 | 
			
		||||
                for line in f.readlines():
 | 
			
		||||
                    self.gone_packages_set.add(line.strip())
 | 
			
		||||
        return key in self.gone_packages_set
 | 
			
		||||
 | 
			
		||||
    def has_scmsync(self, key):
 | 
			
		||||
        if key in self.scmsync_cache:
 | 
			
		||||
            return self.scmsync_cache[key]
 | 
			
		||||
 | 
			
		||||
        root = self.obs._meta(key)
 | 
			
		||||
        scmsync_exists = False
 | 
			
		||||
        if root is not None:
 | 
			
		||||
            scmsync_exists = root.find('scmsync') is not None
 | 
			
		||||
        self.scmsync_cache[key] = scmsync_exists
 | 
			
		||||
        return scmsync_exists
 | 
			
		||||
 | 
			
		||||
    def package_with_scmsync(self, project, package):
 | 
			
		||||
        return (project, package) in self.packages_with_scmsync
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -68,7 +68,7 @@ class LFSOid:
 | 
			
		||||
            row = cur.fetchone()
 | 
			
		||||
            lfs_oid_id = row[0]
 | 
			
		||||
            cur.execute(
 | 
			
		||||
                """INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id) 
 | 
			
		||||
                """INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)
 | 
			
		||||
                        VALUES (%s,%s,%s)""",
 | 
			
		||||
                (package, filename, lfs_oid_id),
 | 
			
		||||
            )
 | 
			
		||||
@@ -83,7 +83,7 @@ class LFSOid:
 | 
			
		||||
            self.register()
 | 
			
		||||
 | 
			
		||||
    def check(self):
 | 
			
		||||
        url = f"http://gitea.opensuse.org:9999/check/{self.sha256}/{self.size}"
 | 
			
		||||
        url = f"http://localhost:9999/check/{self.sha256}/{self.size}"
 | 
			
		||||
        response = requests.get(
 | 
			
		||||
            url,
 | 
			
		||||
            timeout=10,
 | 
			
		||||
@@ -127,12 +127,13 @@ class LFSOid:
 | 
			
		||||
            "size": self.size,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        url = "http://gitea.opensuse.org:9999/register"
 | 
			
		||||
        url = "http://localhost:9999/register"
 | 
			
		||||
        response = requests.post(
 | 
			
		||||
            url,
 | 
			
		||||
            json=data,
 | 
			
		||||
            timeout=10,
 | 
			
		||||
        )
 | 
			
		||||
        response.raise_for_status()
 | 
			
		||||
        logging.info(f"Register LFS returned {response.status_code}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -167,7 +168,7 @@ if __name__ == "__main__":
 | 
			
		||||
        cur.execute(
 | 
			
		||||
            """
 | 
			
		||||
            CREATE TEMPORARY TABLE lfs_oid_in_revision (
 | 
			
		||||
                revision_id INTEGER, 
 | 
			
		||||
                revision_id INTEGER,
 | 
			
		||||
                lfs_oid_id  INTEGER NOT NULL,
 | 
			
		||||
                name        VARCHAR(255) NOT NULL
 | 
			
		||||
            )
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										34
									
								
								lib/obs.py
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								lib/obs.py
									
									
									
									
									
								
							@@ -73,11 +73,11 @@ class OBS:
 | 
			
		||||
        logging.debug(f"GET {url}")
 | 
			
		||||
        return ET.parse(osc.core.http_GET(url)).getroot()
 | 
			
		||||
 | 
			
		||||
    def _meta(self, project, package, **params):
 | 
			
		||||
    def _meta(self, key, **params):
 | 
			
		||||
        try:
 | 
			
		||||
            root = self._xml(f"source/{project}/{package}/_meta", **params)
 | 
			
		||||
            root = self._xml(f"source/{key}/_meta", **params)
 | 
			
		||||
        except HTTPError:
 | 
			
		||||
            logging.error(f"Package [{project}/{package} {params}] has no meta")
 | 
			
		||||
            logging.error(f"Project/Package [{key} {params}] has no meta")
 | 
			
		||||
            return None
 | 
			
		||||
        return root
 | 
			
		||||
 | 
			
		||||
@@ -118,13 +118,13 @@ class OBS:
 | 
			
		||||
        return root
 | 
			
		||||
 | 
			
		||||
    def exists(self, project, package):
 | 
			
		||||
        root = self._meta(project, package)
 | 
			
		||||
        root = self._meta(f"{project}/{package}")
 | 
			
		||||
        if root is None:
 | 
			
		||||
            return False
 | 
			
		||||
        return root.get("project") == project
 | 
			
		||||
 | 
			
		||||
    def devel_project(self, project, package):
 | 
			
		||||
        root = self._meta(project, package)
 | 
			
		||||
        root = self._meta(f"{project}/{package}")
 | 
			
		||||
        devel = root.find("devel")
 | 
			
		||||
        if devel is None:
 | 
			
		||||
            return None
 | 
			
		||||
@@ -148,12 +148,21 @@ class OBS:
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
    def _download(self, project, package, name, revision):
 | 
			
		||||
        url = osc.core.makeurl(
 | 
			
		||||
            self.url,
 | 
			
		||||
            ["source", project, package, urllib.parse.quote(name)],
 | 
			
		||||
            {"rev": revision, "expand": 1},
 | 
			
		||||
        )
 | 
			
		||||
        return osc.core.http_GET(url)
 | 
			
		||||
        try:
 | 
			
		||||
            url = osc.core.makeurl(
 | 
			
		||||
                self.url,
 | 
			
		||||
                ["source", project, package, name],
 | 
			
		||||
                {"rev": revision, "expand": 1},
 | 
			
		||||
            )
 | 
			
		||||
            return osc.core.http_GET(url)
 | 
			
		||||
        except HTTPError as e:
 | 
			
		||||
            if e.status == 404:
 | 
			
		||||
                url = osc.core.makeurl(
 | 
			
		||||
                    self.url,
 | 
			
		||||
                    ["source", project, package, name],
 | 
			
		||||
                    {"rev": revision, "expand": 1, "deleted": 1},
 | 
			
		||||
                )
 | 
			
		||||
                return osc.core.http_GET(url)
 | 
			
		||||
 | 
			
		||||
    def download(
 | 
			
		||||
        self,
 | 
			
		||||
@@ -165,7 +174,6 @@ class OBS:
 | 
			
		||||
        cachedir: str,
 | 
			
		||||
        file_md5: str,
 | 
			
		||||
    ) -> None:
 | 
			
		||||
 | 
			
		||||
        cached_file = self._path_from_md5(name, cachedir, file_md5)
 | 
			
		||||
        if not self.in_cache(name, cachedir, file_md5):
 | 
			
		||||
            with (dirpath / name).open("wb") as f:
 | 
			
		||||
@@ -190,7 +198,7 @@ class OBS:
 | 
			
		||||
        try:
 | 
			
		||||
            root = self._xml(f"source/{project}/{package}", **params)
 | 
			
		||||
        except HTTPError as e:
 | 
			
		||||
            if e.code == 400:
 | 
			
		||||
            if e.code == 400 or e.code == 404:
 | 
			
		||||
                logging.error(
 | 
			
		||||
                    f"Package [{project}/{package} {params}] can't be expanded: {e}"
 | 
			
		||||
                )
 | 
			
		||||
 
 | 
			
		||||
@@ -7,8 +7,6 @@ except:
 | 
			
		||||
    print("Install python3-python-magic, not python3-magic")
 | 
			
		||||
    raise
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
 | 
			
		||||
from lib.db import DB
 | 
			
		||||
from lib.lfs_oid import LFSOid
 | 
			
		||||
from lib.obs import OBS
 | 
			
		||||
@@ -43,7 +41,6 @@ class ProxySHA256:
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
    def put(self, project, package, name, revision, file_md5, size):
 | 
			
		||||
 | 
			
		||||
        if not self.mime:
 | 
			
		||||
            self.mime = magic.Magic(mime=True)
 | 
			
		||||
 | 
			
		||||
@@ -53,12 +50,12 @@ class ProxySHA256:
 | 
			
		||||
        sha = hashlib.sha256()
 | 
			
		||||
        while True:
 | 
			
		||||
            buffer = fin.read(10000)
 | 
			
		||||
            if not buffer:
 | 
			
		||||
                break
 | 
			
		||||
            sha.update(buffer)
 | 
			
		||||
            # only guess from the first 10K
 | 
			
		||||
            if not mimetype:
 | 
			
		||||
                mimetype = self.mime.from_buffer(buffer)
 | 
			
		||||
            if not buffer:
 | 
			
		||||
                break
 | 
			
		||||
            sha.update(buffer)
 | 
			
		||||
        fin.close()
 | 
			
		||||
        LFSOid(self.db).add(
 | 
			
		||||
            project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,3 @@
 | 
			
		||||
from typing import Dict
 | 
			
		||||
from xmlrpc.client import Boolean
 | 
			
		||||
 | 
			
		||||
from lib.db_revision import DBRevision
 | 
			
		||||
@@ -114,7 +113,7 @@ class TreeBuilder:
 | 
			
		||||
                candidates.append(node)
 | 
			
		||||
            if node.merged_into:
 | 
			
		||||
                # we can't have candidates that are crossing previous merges
 | 
			
		||||
                # see https://gitea.opensuse.org/importers/git-importer/issues/14
 | 
			
		||||
                # see https://src.opensuse.org/importers/git-importer/issues/14
 | 
			
		||||
                candidates = []
 | 
			
		||||
            node = node.parent
 | 
			
		||||
        if candidates:
 | 
			
		||||
@@ -138,7 +137,7 @@ class TreeBuilder:
 | 
			
		||||
                self.requests.add(node.revision.request_id)
 | 
			
		||||
 | 
			
		||||
        class FindMergeWalker(AbstractWalker):
 | 
			
		||||
            def __init__(self, builder: TreeBuilder, requests: Dict) -> None:
 | 
			
		||||
            def __init__(self, builder: TreeBuilder, requests: dict) -> None:
 | 
			
		||||
                super().__init__()
 | 
			
		||||
                self.source_revisions = dict()
 | 
			
		||||
                self.builder = builder
 | 
			
		||||
@@ -217,6 +216,8 @@ class TreeBuilder:
 | 
			
		||||
    def build(self, project, package):
 | 
			
		||||
        """Create a Factory tree (returning the top)"""
 | 
			
		||||
        factory_revisions = self.revisions_chain(project, package)
 | 
			
		||||
        if factory_revisions == None:
 | 
			
		||||
            return None
 | 
			
		||||
        self.add_merge_points(factory_revisions)
 | 
			
		||||
        # factory_revisions.print()
 | 
			
		||||
        self.prune_loose_end(factory_revisions)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										61
									
								
								opensuse-monitor.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										61
									
								
								opensuse-monitor.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,61 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
import json
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import pika
 | 
			
		||||
import random
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
MY_TASKS_DIR = Path(__file__).parent / "tasks"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def listen_events():
 | 
			
		||||
    connection = pika.BlockingConnection(
 | 
			
		||||
        pika.URLParameters("amqps://opensuse:opensuse@rabbit.opensuse.org")
 | 
			
		||||
    )
 | 
			
		||||
    channel = connection.channel()
 | 
			
		||||
 | 
			
		||||
    channel.exchange_declare(
 | 
			
		||||
        exchange="pubsub", exchange_type="topic", passive=True, durable=False
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    result = channel.queue_declare("", exclusive=True)
 | 
			
		||||
    queue_name = result.method.queue
 | 
			
		||||
 | 
			
		||||
    channel.queue_bind(
 | 
			
		||||
        exchange="pubsub", queue=queue_name, routing_key="opensuse.obs.package.commit"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    print(" [*] Waiting for logs. To exit press CTRL+C")
 | 
			
		||||
 | 
			
		||||
    def callback(ch, method, properties, body):
 | 
			
		||||
        if method.routing_key not in ("opensuse.obs.package.commit",):
 | 
			
		||||
            return
 | 
			
		||||
        body = json.loads(body)
 | 
			
		||||
        if (
 | 
			
		||||
            "project" in body
 | 
			
		||||
            and "package" in body
 | 
			
		||||
            and body["project"] == "openSUSE:Factory"
 | 
			
		||||
        ):
 | 
			
		||||
            # Strip multibuild flavors
 | 
			
		||||
            package = body["package"].partition(':')[0]
 | 
			
		||||
            if "/" in package:
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
            (MY_TASKS_DIR / package).touch()
 | 
			
		||||
            print(" [x] %r:%r" % (method.routing_key, body["package"]))
 | 
			
		||||
 | 
			
		||||
    channel.basic_consume(queue_name, callback, auto_ack=True)
 | 
			
		||||
 | 
			
		||||
    channel.start_consuming()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    while True:
 | 
			
		||||
        try:
 | 
			
		||||
            listen_events()
 | 
			
		||||
        except (pika.exceptions.ConnectionClosed, pika.exceptions.AMQPHeartbeatTimeout):
 | 
			
		||||
            time.sleep(random.randint(10, 100))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    main()
 | 
			
		||||
							
								
								
									
										1
									
								
								tasks/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								tasks/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1 @@
 | 
			
		||||
*
 | 
			
		||||
							
								
								
									
										19
									
								
								update-tasks.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										19
									
								
								update-tasks.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
#
 | 
			
		||||
cd /space/dmueller/git-importer
 | 
			
		||||
 | 
			
		||||
source credentials.sh
 | 
			
		||||
 | 
			
		||||
while true; do
 | 
			
		||||
   for i in $PWD/tasks/*; do
 | 
			
		||||
       if test -f "$i"; then
 | 
			
		||||
            echo "$(date): Importing $(basename $i)"
 | 
			
		||||
            if ! python3 ./git-importer.py -c repos/.cache $(basename $i); then
 | 
			
		||||
                mkdir -p $PWD/failed-tasks
 | 
			
		||||
                mv -f $i $PWD/failed-tasks
 | 
			
		||||
            fi
 | 
			
		||||
            rm -f $i
 | 
			
		||||
        fi
 | 
			
		||||
   done
 | 
			
		||||
   inotifywait -q -e create $PWD/tasks
 | 
			
		||||
done
 | 
			
		||||
		Reference in New Issue
	
	Block a user