forked from importers/git-importer
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			use_git_fo
			...
			add_old_st
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 716db10adf | 
| @@ -1,4 +1,4 @@ | ||||
| sudo zypper in python3-psycopg | ||||
| sudo zypper in python3-psycopg2 | ||||
| sudo su - postgres | ||||
| # `createdb -O <LOCAL_USER> imported_git` | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,8 @@ import sys | ||||
|  | ||||
| import osc.core | ||||
|  | ||||
| from lib.db import DB | ||||
| from lib.db_revision import DBRevision | ||||
| from lib.git_exporter import GitExporter | ||||
| from lib.importer import Importer | ||||
| from lib.test_exporter import TestExporter | ||||
| @@ -100,6 +102,56 @@ def main(): | ||||
|             requests_log.setLevel(logging.DEBUG) | ||||
|             requests_log.propagate = True | ||||
|  | ||||
|     def check_old_package(db: DB, dir: pathlib.Path): | ||||
|         md5file = dir / "MD5SUMS" | ||||
|         print(md5file) | ||||
|         valid_revisions = None | ||||
|         with open(md5file, "rb") as f: | ||||
|             for line in f.readlines(): | ||||
|                 try: | ||||
|                     md5, file = line.decode("utf-8").strip().split("  ") | ||||
|                 except UnicodeDecodeError: | ||||
|                     logging.error(f"Corrupt MD5 file: {md5file}") | ||||
|                     return | ||||
|                 if file == "ready": | ||||
|                     continue | ||||
|                 if len(md5) != 32: | ||||
|                     logging.error(f"Corrupt MD5 file: {md5file}") | ||||
|                     return | ||||
|                 with db.cursor() as cur: | ||||
|                     cur.execute( | ||||
|                         "SELECT revision_id FROM files WHERE md5=%s AND name=%s", | ||||
|                         (md5, file), | ||||
|                     ) | ||||
|                     nrevs = set([row[0] for row in cur.fetchall()]) | ||||
|                     if valid_revisions is None: | ||||
|                         valid_revisions = nrevs | ||||
|                     else: | ||||
|                         valid_revisions = valid_revisions & nrevs | ||||
|                     if not valid_revisions: | ||||
|                         break | ||||
|  | ||||
|         with db.cursor() as cur: | ||||
|             cur.execute( | ||||
|                 "SELECT * FROM revisions WHERE id = ANY(%s) AND project=%s", | ||||
|                 (list(valid_revisions), "openSUSE:Factory"), | ||||
|             ) | ||||
|             for row in cur.fetchall(): | ||||
|                 r = DBRevision(db, row) | ||||
|                 print("Valid", r, r.files_hash) | ||||
|                 return True | ||||
|  | ||||
|     if False: | ||||
|         import os | ||||
|  | ||||
|         db = DB() | ||||
|         basedir = pathlib.Path( | ||||
|             f"/mounts/work/SAVE/oldpackages/stable/{args.packages[0]}" | ||||
|         ) | ||||
|         for subdir in sorted(os.listdir(basedir)): | ||||
|             if check_old_package(db, basedir / subdir): | ||||
|                 break | ||||
|  | ||||
|     if args.export: | ||||
|         if len(args.packages) != 1: | ||||
|             print("Can only export one package") | ||||
|   | ||||
							
								
								
									
										1355
									
								
								gone-packages.txt
									
									
									
									
									
								
							
							
						
						
									
										1355
									
								
								gone-packages.txt
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -14,6 +14,8 @@ def config(filename="database.ini", section="production"): | ||||
|         for param in params: | ||||
|             db[param[0]] = param[1] | ||||
|     else: | ||||
|         raise Exception(f"Section {section} not found in the {filename} file") | ||||
|         raise Exception( | ||||
|             "Section {0} not found in the {1} file".format(section, filename) | ||||
|         ) | ||||
|  | ||||
|     return db | ||||
|   | ||||
							
								
								
									
										19
									
								
								lib/db.py
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								lib/db.py
									
									
									
									
									
								
							| @@ -1,6 +1,7 @@ | ||||
| import logging | ||||
|  | ||||
| import psycopg | ||||
| import psycopg2 | ||||
| from psycopg2.extras import LoggingConnection | ||||
|  | ||||
| from lib.config import config | ||||
|  | ||||
| @@ -16,20 +17,22 @@ class DB: | ||||
|             # read the connection parameters | ||||
|             params = config(section=self.config_section) | ||||
|             # connect to the PostgreSQL server | ||||
|             self.conn = psycopg.connect(conninfo=f"dbname={params['database']}") | ||||
|             logging.getLogger("psycopg.pool").setLevel(logging.INFO) | ||||
|             self.conn = psycopg2.connect(connection_factory=LoggingConnection, **params) | ||||
|             logger = logging.getLogger(__name__) | ||||
|             self.conn.initialize(logger) | ||||
|  | ||||
|         except (Exception, psycopg.DatabaseError) as error: | ||||
|         except (Exception, psycopg2.DatabaseError) as error: | ||||
|             print(error) | ||||
|             raise error | ||||
|  | ||||
|     def schema_version(self): | ||||
|         # create a cursor | ||||
|         with self.conn.cursor() as cur: | ||||
|  | ||||
|             # execute a statement | ||||
|             try: | ||||
|                 cur.execute("SELECT MAX(version) from scheme") | ||||
|             except psycopg.errors.UndefinedTable: | ||||
|             except psycopg2.errors.UndefinedTable as error: | ||||
|                 cur.close() | ||||
|                 self.close() | ||||
|                 self.connect() | ||||
| @@ -143,9 +146,9 @@ class DB: | ||||
|         ) | ||||
|         schemes[10] = ( | ||||
|             "ALTER TABLE revisions ADD COLUMN request_id INTEGER", | ||||
|             """ALTER TABLE revisions | ||||
|             """ALTER TABLE revisions  | ||||
|                ADD CONSTRAINT request_id_foreign_key | ||||
|                FOREIGN KEY (request_id) | ||||
|                FOREIGN KEY (request_id)  | ||||
|                REFERENCES requests (id)""", | ||||
|             "UPDATE scheme SET version=10", | ||||
|         ) | ||||
| @@ -270,7 +273,7 @@ class DB: | ||||
|                         cur.execute(command) | ||||
|             # commit the changes | ||||
|             self.conn.commit() | ||||
|         except (Exception, psycopg.DatabaseError) as error: | ||||
|         except (Exception, psycopg2.DatabaseError) as error: | ||||
|             print(error) | ||||
|             self.close() | ||||
|             raise error | ||||
|   | ||||
| @@ -2,6 +2,7 @@ from __future__ import annotations | ||||
|  | ||||
| from hashlib import md5 | ||||
| from pathlib import Path | ||||
| from typing import Optional | ||||
|  | ||||
| from lib.db import DB | ||||
| from lib.obs_revision import OBSRevision | ||||
| @@ -205,7 +206,7 @@ class DBRevision: | ||||
|                 ): | ||||
|                     continue | ||||
|                 cur.execute( | ||||
|                     """INSERT INTO files (name, md5, size, mtime, revision_id) | ||||
|                     """INSERT INTO files (name, md5, size, mtime, revision_id)  | ||||
|                             VALUES (%s,%s,%s,%s,%s)""", | ||||
|                     ( | ||||
|                         entry.get("name"), | ||||
| @@ -254,7 +255,7 @@ class DBRevision: | ||||
|         self._files.sort(key=lambda x: x["name"]) | ||||
|         return self._files | ||||
|  | ||||
|     def calc_delta(self, current_rev: DBRevision | None): | ||||
|     def calc_delta(self, current_rev: Optional[DBRevision]): | ||||
|         """Calculate the list of files to download and to delete. | ||||
|         Param current_rev is the revision that's currently checked out. | ||||
|         If it's None, the repository is empty. | ||||
|   | ||||
							
								
								
									
										188
									
								
								lib/git.py
									
									
									
									
									
								
							
							
						
						
									
										188
									
								
								lib/git.py
									
									
									
									
									
								
							| @@ -4,6 +4,7 @@ import os | ||||
| import pathlib | ||||
| import subprocess | ||||
|  | ||||
| import pygit2 | ||||
| import requests | ||||
|  | ||||
| from lib.binary import BINARY | ||||
| @@ -19,6 +20,11 @@ class Git: | ||||
|         self.committer = committer | ||||
|         self.committer_email = committer_email | ||||
|  | ||||
|         self.repo = None | ||||
|  | ||||
|     def is_open(self): | ||||
|         return self.repo is not None | ||||
|  | ||||
|     def exists(self): | ||||
|         """Check if the path is a valid git repository""" | ||||
|         return (self.path / ".git").exists() | ||||
| @@ -28,69 +34,36 @@ class Git: | ||||
|         self.path.mkdir(parents=True, exist_ok=True) | ||||
|         self.open() | ||||
|  | ||||
|     def git_run(self, args, **kwargs): | ||||
|         """Run a git command""" | ||||
|         if "env" in kwargs: | ||||
|             envs = kwargs["env"].copy() | ||||
|             del kwargs["env"] | ||||
|         else: | ||||
|             envs = os.environ.copy() | ||||
|         envs["GIT_LFS_SKIP_SMUDGE"] = "1" | ||||
|         envs["GIT_CONFIG_GLOBAL"] = "/dev/null" | ||||
|         return subprocess.run( | ||||
|             ["git"] + args, | ||||
|             cwd=self.path, | ||||
|             check=True, | ||||
|             env=envs, | ||||
|             **kwargs, | ||||
|         ) | ||||
|  | ||||
|     def open(self): | ||||
|         if not self.exists(): | ||||
|             self.git_run(["init", "--object-format=sha256", "-b", "factory"]) | ||||
|         # Convert the path to string, to avoid some limitations in | ||||
|         # older pygit2 | ||||
|         self.repo = pygit2.init_repository(str(self.path)) | ||||
|  | ||||
|     def is_dirty(self): | ||||
|         """Check if there is something to commit""" | ||||
|         status_str = self.git_run( | ||||
|             ["status", "--porcelain=2"], | ||||
|             stdout=subprocess.PIPE, | ||||
|         ).stdout.decode("utf-8") | ||||
|         return len(list(filter(None, status_str.split("\n")))) > 0 | ||||
|         assert self.is_open() | ||||
|  | ||||
|         return self.repo.status() | ||||
|  | ||||
|     def branches(self): | ||||
|         br = ( | ||||
|             self.git_run( | ||||
|                 ["for-each-ref", "--format=%(refname:short)", "refs/heads/"], | ||||
|                 stdout=subprocess.PIPE, | ||||
|             ) | ||||
|             .stdout.decode("utf-8") | ||||
|             .split() | ||||
|         ) | ||||
|         if len(br) == 0: | ||||
|             br.append("factory")  # unborn branch? | ||||
|         return br | ||||
|         return list(self.repo.branches) | ||||
|  | ||||
|     def branch(self, branch, commit="HEAD"): | ||||
|         commit = ( | ||||
|             self.git_run( | ||||
|                 ["rev-parse", "--verify", "--end-of-options", commit + "^{commit}"], | ||||
|                 stdout=subprocess.PIPE, | ||||
|             ) | ||||
|             .stdout.decode("utf-8") | ||||
|             .strip() | ||||
|         ) | ||||
|         return self.git_run(["branch", branch, commit]) | ||||
|     def branch(self, branch, commit=None): | ||||
|         if not commit: | ||||
|             commit = self.repo.head | ||||
|         else: | ||||
|             commit = self.repo.get(commit) | ||||
|         self.repo.branches.local.create(branch, commit) | ||||
|  | ||||
|     def checkout(self, branch): | ||||
|         """Checkout into the branch HEAD""" | ||||
|         new_branch = False | ||||
|         ref = f"refs/heads/{branch}" | ||||
|         if branch not in self.branches(): | ||||
|             self.git_run(["branch", "-q", branch, "HEAD"]) | ||||
|             self.repo.references["HEAD"].set_target(ref) | ||||
|             new_branch = True | ||||
|         else: | ||||
|             ref = f"refs/heads/{branch}" | ||||
|             if (self.path / ".git" / ref).exists(): | ||||
|                 self.git_run(["switch", "--no-guess", "-q", branch]) | ||||
|             self.repo.checkout(ref) | ||||
|         return new_branch | ||||
|  | ||||
|     def commit( | ||||
| @@ -114,73 +87,51 @@ class Git: | ||||
|             committer_time = committer_time if committer_time else user_time | ||||
|  | ||||
|         if self.is_dirty(): | ||||
|             self.git_run(["add", "--all", "."]) | ||||
|             self.repo.index.add_all() | ||||
|  | ||||
|         tree_id = ( | ||||
|             self.git_run(["write-tree"], stdout=subprocess.PIPE) | ||||
|             .stdout.decode("utf-8") | ||||
|             .strip() | ||||
|         self.repo.index.write() | ||||
|         author = pygit2.Signature(user, user_email, int(user_time.timestamp())) | ||||
|         committer = pygit2.Signature( | ||||
|             committer, committer_email, int(committer_time.timestamp()) | ||||
|         ) | ||||
|  | ||||
|         parent_array = [] | ||||
|         if isinstance(parents, list): | ||||
|             for parent in filter(None, parents): | ||||
|                 parent_array = parent_array + ["-p", parent] | ||||
|         elif isinstance(parents, str): | ||||
|             parent_array = ["-p", parents] | ||||
|  | ||||
|         commit_id = ( | ||||
|             self.git_run( | ||||
|                 ["commit-tree"] + parent_array + [tree_id], | ||||
|                 env={ | ||||
|                     "GIT_AUTHOR_NAME": user, | ||||
|                     "GIT_AUTHOR_EMAIL": user_email, | ||||
|                     "GIT_AUTHOR_DATE": f"{int(user_time.timestamp())} +0000", | ||||
|                     "GIT_COMMITTER_NAME": committer, | ||||
|                     "GIT_COMMITTER_EMAIL": committer_email, | ||||
|                     "GIT_COMMITTER_DATE": f"{int(committer_time.timestamp())} +0000", | ||||
|                 }, | ||||
|                 input=message.encode("utf-8"), | ||||
|                 stdout=subprocess.PIPE, | ||||
|             ) | ||||
|             .stdout.decode("utf-8") | ||||
|             .rstrip() | ||||
|         tree = self.repo.index.write_tree() | ||||
|         return self.repo.create_commit( | ||||
|             "HEAD", author, committer, message, tree, parents | ||||
|         ) | ||||
|         self.git_run(["reset", "--soft", commit_id]) | ||||
|         return commit_id | ||||
|  | ||||
|     def branch_head(self, branch="HEAD"): | ||||
|         return ( | ||||
|             self.git_run( | ||||
|                 ["rev-parse", "--verify", "--end-of-options", branch], | ||||
|                 stdout=subprocess.PIPE, | ||||
|             ) | ||||
|             .stdout.decode("utf-8") | ||||
|             .strip() | ||||
|         ) | ||||
|     def last_commit(self): | ||||
|         try: | ||||
|             return self.repo.head.target | ||||
|         except: | ||||
|             return None | ||||
|  | ||||
|     def branch_head(self, branch): | ||||
|         return self.repo.references["refs/heads/" + branch].target | ||||
|  | ||||
|     def set_branch_head(self, branch, commit): | ||||
|         return self.git_run(["update-ref", f"refs/heads/{branch}", commit]) | ||||
|         self.repo.references["refs/heads/" + branch].set_target(commit) | ||||
|  | ||||
|     def gc(self): | ||||
|         logging.debug(f"Garbage recollect and repackage {self.path}") | ||||
|         self.git_run( | ||||
|             ["gc", "--auto"], | ||||
|         subprocess.run( | ||||
|             ["git", "gc", "--auto"], | ||||
|             cwd=self.path, | ||||
|             stdout=subprocess.PIPE, | ||||
|             stderr=subprocess.STDOUT, | ||||
|         ) | ||||
|  | ||||
|     #    def clean(self): | ||||
|     #        for path, _ in self.repo.status().items(): | ||||
|     #            logging.debug(f"Cleaning {path}") | ||||
|     #            try: | ||||
|     #                (self.path / path).unlink() | ||||
|     #                self.repo.index.remove(path) | ||||
|     #            except Exception as e: | ||||
|     #                logging.warning(f"Error removing file {path}: {e}") | ||||
|     def clean(self): | ||||
|         for path, _ in self.repo.status().items(): | ||||
|             logging.debug(f"Cleaning {path}") | ||||
|             try: | ||||
|                 (self.path / path).unlink() | ||||
|                 self.repo.index.remove(path) | ||||
|             except Exception as e: | ||||
|                 logging.warning(f"Error removing file {path}: {e}") | ||||
|  | ||||
|     def add(self, filename): | ||||
|         self.git_run(["add", filename]) | ||||
|         self.repo.index.add(filename) | ||||
|  | ||||
|     def add_default_lfs_gitattributes(self, force=False): | ||||
|         if not (self.path / ".gitattributes").exists() or force: | ||||
| @@ -234,9 +185,9 @@ class Git: | ||||
|             return any(fnmatch.fnmatch(filename, line) for line in patterns) | ||||
|  | ||||
|     def remove(self, file: pathlib.Path): | ||||
|         self.git_run( | ||||
|             ["rm", "-q", "-f", "--ignore-unmatch", file.name], | ||||
|         ) | ||||
|         self.repo.index.remove(file.name) | ||||
|         (self.path / file).unlink() | ||||
|  | ||||
|         patterns = self.get_specific_lfs_gitattributes() | ||||
|         if file.name in patterns: | ||||
|             patterns.remove(file.name) | ||||
| @@ -250,7 +201,7 @@ class Git: | ||||
|             logging.warning("Not adding a remote due to missing $GITEA_TOKEN") | ||||
|             return | ||||
|  | ||||
|         url = f"https://src.opensuse.org/api/v1/org/{org_name}/repos" | ||||
|         url = f"https://gitea.opensuse.org/api/v1/org/{org_name}/repos" | ||||
|         response = requests.post( | ||||
|             url, | ||||
|             data={"name": repo_name}, | ||||
| @@ -261,23 +212,16 @@ class Git: | ||||
|         # 201 Created | ||||
|         if response.status_code not in (201, 409): | ||||
|             print(response.data) | ||||
|         url = f"gitea@src.opensuse.org:{org_name}/{repo_name}.git" | ||||
|         self.git_run( | ||||
|             ["remote", "add", "origin", url], | ||||
|         ) | ||||
|         url = f"gitea@gitea.opensuse.org:{org_name}/{repo_name}.git" | ||||
|         self.repo.remotes.create("origin", url) | ||||
|  | ||||
|     def push(self, force=False): | ||||
|         if "origin" not in self.git_run( | ||||
|             ["remote"], | ||||
|             stdout=subprocess.PIPE, | ||||
|         ).stdout.decode("utf-8"): | ||||
|             logger.warning("Not pushing to remote because no 'origin' configured") | ||||
|             return | ||||
|     def push(self): | ||||
|         remo = self.repo.remotes["origin"] | ||||
|  | ||||
|         cmd = ["push"] | ||||
|         if force: | ||||
|             cmd.append("-f") | ||||
|         cmd.append("origin") | ||||
|         cmd.append("refs/heads/factory") | ||||
|         cmd.append("refs/heads/devel") | ||||
|         self.git_run(cmd) | ||||
|         keypair = pygit2.KeypairFromAgent("gitea") | ||||
|         callbacks = pygit2.RemoteCallbacks(credentials=keypair) | ||||
|  | ||||
|         refspecs = ["refs/heads/factory"] | ||||
|         if "refs/heads/devel" in self.repo.references: | ||||
|             refspecs.append("refs/heads/devel") | ||||
|         remo.push(refspecs, callbacks=callbacks) | ||||
|   | ||||
| @@ -2,8 +2,6 @@ import logging | ||||
| import os | ||||
|  | ||||
| import yaml | ||||
| from hashlib import md5 | ||||
| from pathlib import Path | ||||
|  | ||||
| from lib.binary import is_binary_or_large | ||||
| from lib.db import DB | ||||
| @@ -42,9 +40,9 @@ class GitExporter: | ||||
|     def check_repo_state(self, flats, branch_state): | ||||
|         state_data = dict() | ||||
|         if os.path.exists(self.state_file): | ||||
|             with open(self.state_file) as f: | ||||
|             with open(self.state_file, "r") as f: | ||||
|                 state_data = yaml.safe_load(f) | ||||
|                 if not isinstance(state_data, dict): | ||||
|                 if type(state_data) != dict: | ||||
|                     state_data = {} | ||||
|         left_to_commit = [] | ||||
|         for flat in reversed(flats): | ||||
| @@ -88,7 +86,7 @@ class GitExporter: | ||||
|             logging.debug(f"Committing {flat}") | ||||
|             self.commit_flat(flat, branch_state) | ||||
|  | ||||
|         self.git.push(force=True) | ||||
|         self.git.push() | ||||
|  | ||||
|     def run_gc(self): | ||||
|         self.gc_cnt = self.gc_interval | ||||
| @@ -135,12 +133,6 @@ class GitExporter: | ||||
|             return True | ||||
|         return flat.parent1 == branch_state[flat.branch] | ||||
|  | ||||
|     def file_md5(self, file): | ||||
|         m = md5() | ||||
|         with open(file, 'rb') as f: | ||||
|             m.update(f.read()) | ||||
|         return m.hexdigest() | ||||
|  | ||||
|     def commit_flat(self, flat, branch_state): | ||||
|         parents = [] | ||||
|         self.git.checkout(flat.branch) | ||||
| @@ -159,40 +151,11 @@ class GitExporter: | ||||
|         # create file if not existant | ||||
|         self.git.add_default_lfs_gitattributes(force=False) | ||||
|  | ||||
|         new_files = flat.commit.files_list() | ||||
|         cur_files = os.listdir(self.git.path) | ||||
|         for cf in cur_files: | ||||
|             if cf[0] == '.': | ||||
|                 continue | ||||
|             found = False | ||||
|             for nf in new_files: | ||||
|                 if nf['name'] == cf: | ||||
|                     found = True | ||||
|                     break | ||||
|  | ||||
|             if found: | ||||
|                 # check if file is modified | ||||
|                 file_path = self.git.path.joinpath(cf) | ||||
|                 stat = file_path.stat() | ||||
|                 if stat.st_size != nf['size'] or self.file_md5(file_path) != nf['md5']: | ||||
|                     logging.debug(f"updating {file_path.name}") | ||||
|                     self.commit_file(flat, Path(cf), nf['size'], nf['md5']) | ||||
|                 else: | ||||
|                     logging.debug(f"leaving  {file_path.name}") | ||||
|             else: | ||||
|                 # file not exist in new commit | ||||
|                 self.git.remove(Path(cf)) | ||||
|  | ||||
|  | ||||
|         # new files? | ||||
|         for file in new_files: | ||||
|             found = False | ||||
|             for cf in cur_files: | ||||
|                 if file['name'] == cf: | ||||
|                     found = True | ||||
|                     break | ||||
|             if not found: | ||||
|                 self.commit_file(flat, Path(file['name']), file['size'], file['md5']) | ||||
|         to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) | ||||
|         for file in to_delete: | ||||
|             self.git.remove(file) | ||||
|         for file, size, md5 in to_download: | ||||
|             self.commit_file(flat, file, size, md5) | ||||
|  | ||||
|         commit = self.git.commit( | ||||
|             flat.user.realname, | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import concurrent.futures | ||||
| import logging | ||||
| import pathlib | ||||
| import xml.etree.ElementTree as ET | ||||
|  | ||||
| from lib.db import DB | ||||
| @@ -31,7 +31,6 @@ class Importer: | ||||
|         self.obs = OBS(api_url) | ||||
|         assert project == "openSUSE:Factory" | ||||
|         self.refreshed_packages = set() | ||||
|         self.gone_packages_set = None | ||||
|  | ||||
|     def import_request(self, number): | ||||
|         self.obs.request(number).import_into_db(self.db) | ||||
| @@ -106,7 +105,7 @@ class Importer: | ||||
|         with self.db.cursor() as cur: | ||||
|             cur.execute( | ||||
|                 """SELECT * FROM revisions WHERE id IN | ||||
|                             (SELECT revision_id from linked_revs WHERE linked_id=%s) | ||||
|                             (SELECT revision_id from linked_revs WHERE linked_id=%s)  | ||||
|                             AND commit_time <= %s ORDER BY commit_time""", | ||||
|                 (prev.dbid, rev.commit_time), | ||||
|             ) | ||||
| @@ -139,7 +138,7 @@ class Importer: | ||||
|             fake_rev = linked.rev + rev.rev / 1000.0 | ||||
|             comment = f"Updating link to change in {rev.project}/{rev.package} revision {int(rev.rev)}" | ||||
|             cur.execute( | ||||
|                 """INSERT INTO revisions (project,package,rev,unexpanded_srcmd5, | ||||
|                 """INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,  | ||||
|                             commit_time, userid, comment, api_url) VALUES(%s,%s,%s,%s,%s,%s,%s,%s) RETURNING id""", | ||||
|                 ( | ||||
|                     linked.project, | ||||
| @@ -162,12 +161,10 @@ class Importer: | ||||
|                 (rev.dbid, linked.dbid), | ||||
|             ) | ||||
|  | ||||
|     def revisions_without_files(self, package): | ||||
|         logging.debug(f"revisions_without_files({package})") | ||||
|     def revisions_without_files(self): | ||||
|         with self.db.cursor() as cur: | ||||
|             cur.execute( | ||||
|                 "SELECT * FROM revisions WHERE package=%s AND broken=FALSE AND expanded_srcmd5 IS NULL", | ||||
|                 (package,), | ||||
|                 "SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL" | ||||
|             ) | ||||
|             return [DBRevision(self.db, row) for row in cur.fetchall()] | ||||
|  | ||||
| @@ -181,11 +178,11 @@ class Importer: | ||||
|             linked_rev = cur.fetchone() | ||||
|         if linked_rev: | ||||
|             linked_rev = linked_rev[0] | ||||
|         obs_dir_list = self.obs.list( | ||||
|         list = self.obs.list( | ||||
|             rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev | ||||
|         ) | ||||
|         if obs_dir_list: | ||||
|             rev.import_dir_list(obs_dir_list) | ||||
|         if list: | ||||
|             rev.import_dir_list(list) | ||||
|             md5 = rev.calculate_files_hash() | ||||
|             with self.db.cursor() as cur: | ||||
|                 cur.execute( | ||||
| @@ -199,43 +196,53 @@ class Importer: | ||||
|         self.find_linked_revs() | ||||
|  | ||||
|         self.find_fake_revisions() | ||||
|         for package in self.packages: | ||||
|             for rev in self.revisions_without_files(package): | ||||
|                 print(f"rev {rev} is without files") | ||||
|                 self.import_rev(rev) | ||||
|         with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: | ||||
|             fs = [ | ||||
|                 executor.submit(import_rev, self, rev) | ||||
|                 for rev in self.revisions_without_files() | ||||
|             ] | ||||
|         concurrent.futures.wait(fs) | ||||
|  | ||||
|     def refresh_package(self, project, package): | ||||
|         key = f"{project}/{package}" | ||||
|         if key in self.refreshed_packages: | ||||
|             # refreshing once is good enough | ||||
|             return | ||||
|         if self.package_gone(key): | ||||
|             return | ||||
|         logging.debug(f"Refresh {project}/{package}") | ||||
|         self.refreshed_packages.add(key) | ||||
|         self.update_db_package(project, package) | ||||
|         self.fetch_all_linked_packages(project, package) | ||||
|  | ||||
|     def import_into_db(self): | ||||
|         for package in self.packages: | ||||
|             refresh_package(self, self.project, package) | ||||
|  | ||||
|         self.db.conn.commit() | ||||
|         with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: | ||||
|             fs = [ | ||||
|                 executor.submit(refresh_package, self, self.project, package) | ||||
|                 for package in self.packages | ||||
|             ] | ||||
|             concurrent.futures.wait(fs) | ||||
|  | ||||
|         for number in DBRevision.requests_to_fetch(self.db): | ||||
|             self.import_request(number) | ||||
|             self.db.conn.commit() | ||||
|  | ||||
|         self.db.conn.commit() | ||||
|             fs = [ | ||||
|                 executor.submit(import_request, self, number) | ||||
|                 for number in DBRevision.requests_to_fetch(self.db) | ||||
|             ] | ||||
|             concurrent.futures.wait(fs) | ||||
|  | ||||
|         with self.db.cursor() as cur: | ||||
|             cur.execute( | ||||
|                 """SELECT DISTINCT source_project,source_package FROM requests | ||||
|                         WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""", | ||||
|                 (self.project, self.packages), | ||||
|             ) | ||||
|             for project, package in cur.fetchall(): | ||||
|                 self.refresh_package(project, package) | ||||
|             self.db.conn.commit() | ||||
|  | ||||
|             with self.db.cursor() as cur: | ||||
|                 cur.execute( | ||||
|                     """SELECT DISTINCT source_project,source_package FROM requests  | ||||
|                             WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""", | ||||
|                     (self.project, self.packages), | ||||
|                 ) | ||||
|                 fs = [ | ||||
|                     executor.submit(refresh_package, self, project, package) | ||||
|                     for project, package in cur.fetchall() | ||||
|                 ] | ||||
|                 concurrent.futures.wait(fs) | ||||
|         self.db.conn.commit() | ||||
|  | ||||
|         missing_users = User.missing_users(self.db) | ||||
| @@ -247,11 +254,3 @@ class Importer: | ||||
|  | ||||
|         self.fill_file_lists() | ||||
|         self.db.conn.commit() | ||||
|  | ||||
|     def package_gone(self, key): | ||||
|         if not self.gone_packages_set: | ||||
|             self.gone_packages_set = set() | ||||
|             with open(pathlib.Path(__file__).parent.parent / "gone-packages.txt") as f: | ||||
|                 for line in f.readlines(): | ||||
|                     self.gone_packages_set.add(line.strip()) | ||||
|         return key in self.gone_packages_set | ||||
|   | ||||
| @@ -68,7 +68,7 @@ class LFSOid: | ||||
|             row = cur.fetchone() | ||||
|             lfs_oid_id = row[0] | ||||
|             cur.execute( | ||||
|                 """INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id) | ||||
|                 """INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)  | ||||
|                         VALUES (%s,%s,%s)""", | ||||
|                 (package, filename, lfs_oid_id), | ||||
|             ) | ||||
| @@ -83,7 +83,7 @@ class LFSOid: | ||||
|             self.register() | ||||
|  | ||||
|     def check(self): | ||||
|         url = f"http://localhost:9999/check/{self.sha256}/{self.size}" | ||||
|         url = f"http://gitea.opensuse.org:9999/check/{self.sha256}/{self.size}" | ||||
|         response = requests.get( | ||||
|             url, | ||||
|             timeout=10, | ||||
| @@ -127,13 +127,12 @@ class LFSOid: | ||||
|             "size": self.size, | ||||
|         } | ||||
|  | ||||
|         url = "http://localhost:9999/register" | ||||
|         url = "http://gitea.opensuse.org:9999/register" | ||||
|         response = requests.post( | ||||
|             url, | ||||
|             json=data, | ||||
|             timeout=10, | ||||
|         ) | ||||
|         response.raise_for_status() | ||||
|         logging.info(f"Register LFS returned {response.status_code}") | ||||
|  | ||||
|  | ||||
| @@ -168,7 +167,7 @@ if __name__ == "__main__": | ||||
|         cur.execute( | ||||
|             """ | ||||
|             CREATE TEMPORARY TABLE lfs_oid_in_revision ( | ||||
|                 revision_id INTEGER, | ||||
|                 revision_id INTEGER,  | ||||
|                 lfs_oid_id  INTEGER NOT NULL, | ||||
|                 name        VARCHAR(255) NOT NULL | ||||
|             ) | ||||
|   | ||||
| @@ -150,7 +150,7 @@ class OBS: | ||||
|     def _download(self, project, package, name, revision): | ||||
|         url = osc.core.makeurl( | ||||
|             self.url, | ||||
|             ["source", project, package, name], | ||||
|             ["source", project, package, urllib.parse.quote(name)], | ||||
|             {"rev": revision, "expand": 1}, | ||||
|         ) | ||||
|         return osc.core.http_GET(url) | ||||
| @@ -165,6 +165,7 @@ class OBS: | ||||
|         cachedir: str, | ||||
|         file_md5: str, | ||||
|     ) -> None: | ||||
|  | ||||
|         cached_file = self._path_from_md5(name, cachedir, file_md5) | ||||
|         if not self.in_cache(name, cachedir, file_md5): | ||||
|             with (dirpath / name).open("wb") as f: | ||||
|   | ||||
| @@ -7,6 +7,8 @@ except: | ||||
|     print("Install python3-python-magic, not python3-magic") | ||||
|     raise | ||||
|  | ||||
| import requests | ||||
|  | ||||
| from lib.db import DB | ||||
| from lib.lfs_oid import LFSOid | ||||
| from lib.obs import OBS | ||||
| @@ -41,6 +43,7 @@ class ProxySHA256: | ||||
|             } | ||||
|  | ||||
|     def put(self, project, package, name, revision, file_md5, size): | ||||
|  | ||||
|         if not self.mime: | ||||
|             self.mime = magic.Magic(mime=True) | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| from typing import Dict | ||||
| from xmlrpc.client import Boolean | ||||
|  | ||||
| from lib.db_revision import DBRevision | ||||
| @@ -113,7 +114,7 @@ class TreeBuilder: | ||||
|                 candidates.append(node) | ||||
|             if node.merged_into: | ||||
|                 # we can't have candidates that are crossing previous merges | ||||
|                 # see https://src.opensuse.org/importers/git-importer/issues/14 | ||||
|                 # see https://gitea.opensuse.org/importers/git-importer/issues/14 | ||||
|                 candidates = [] | ||||
|             node = node.parent | ||||
|         if candidates: | ||||
| @@ -137,7 +138,7 @@ class TreeBuilder: | ||||
|                 self.requests.add(node.revision.request_id) | ||||
|  | ||||
|         class FindMergeWalker(AbstractWalker): | ||||
|             def __init__(self, builder: TreeBuilder, requests: dict) -> None: | ||||
|             def __init__(self, builder: TreeBuilder, requests: Dict) -> None: | ||||
|                 super().__init__() | ||||
|                 self.source_revisions = dict() | ||||
|                 self.builder = builder | ||||
|   | ||||
| @@ -1,37 +0,0 @@ | ||||
| #!/usr/bin/python3 | ||||
| import json | ||||
| from pathlib import Path | ||||
| import pika | ||||
| import sys | ||||
|  | ||||
| MY_TASKS_DIR = Path(__file__).parent / "tasks" | ||||
|  | ||||
| connection = pika.BlockingConnection(pika.URLParameters("amqps://opensuse:opensuse@rabbit.opensuse.org")) | ||||
| channel = connection.channel() | ||||
|  | ||||
| channel.exchange_declare(exchange='pubsub', exchange_type='topic', passive=True, durable=True) | ||||
|  | ||||
| result = channel.queue_declare("", exclusive=True) | ||||
| queue_name = result.method.queue | ||||
|  | ||||
| channel.queue_bind(exchange='pubsub', | ||||
|                    queue=queue_name,routing_key='#') | ||||
|  | ||||
| print(' [*] Waiting for logs. To exit press CTRL+C') | ||||
|  | ||||
| def callback(ch, method, properties, body): | ||||
|     if method.routing_key not in ("opensuse.obs.package.commit",): | ||||
|         return | ||||
|     body = json.loads(body) | ||||
|     if 'project' in body and 'package' in body and body['project'] == 'openSUSE:Factory': | ||||
|         if '/' in body['package']: | ||||
|             return | ||||
|  | ||||
|         (MY_TASKS_DIR / body['package']).touch() | ||||
|         print(" [x] %r:%r" % (method.routing_key, body['package'])) | ||||
|  | ||||
| channel.basic_consume(queue_name, | ||||
|                       callback, | ||||
|                       auto_ack=True) | ||||
|  | ||||
| channel.start_consuming() | ||||
							
								
								
									
										1
									
								
								tasks/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								tasks/.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1 +0,0 @@ | ||||
| * | ||||
		Reference in New Issue
	
	Block a user