forked from importers/git-importer
Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
5a28f62fb9 | |||
17888407df | |||
d5eb5c0db6 | |||
5a6a55868f | |||
5d55a8c2fe | |||
32d1924a0d | |||
7684388193 | |||
4ef980d1c8 | |||
fb80d0c105 | |||
|
44b4d690db | ||
|
a69e861614 | ||
|
1da740bd8b | ||
|
b3107ba3bf |
@@ -119,7 +119,7 @@ def main():
|
|||||||
importer = Importer(URL_OBS, args.project, args.packages)
|
importer = Importer(URL_OBS, args.project, args.packages)
|
||||||
importer.import_into_db()
|
importer.import_into_db()
|
||||||
for package in args.packages:
|
for package in args.packages:
|
||||||
if not importer.package_with_scmsync(package):
|
if not importer.package_with_scmsync(args.project, package):
|
||||||
export_package(args.project, package, args.repodir, args.cachedir, args.gc)
|
export_package(args.project, package, args.repodir, args.cachedir, args.gc)
|
||||||
else:
|
else:
|
||||||
logging.debug(f"{args.project}/{package} has scmsync links - skipping export")
|
logging.debug(f"{args.project}/{package} has scmsync links - skipping export")
|
||||||
|
@@ -204,6 +204,11 @@ class DBRevision:
|
|||||||
and self.package == "_project"
|
and self.package == "_project"
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# do not import _service:* files as those are created by OBS on source imports
|
||||||
|
if entry.get("name")[0:9] == "_service:":
|
||||||
|
continue
|
||||||
|
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""INSERT INTO files (name, md5, size, mtime, revision_id)
|
"""INSERT INTO files (name, md5, size, mtime, revision_id)
|
||||||
VALUES (%s,%s,%s,%s,%s)""",
|
VALUES (%s,%s,%s,%s,%s)""",
|
||||||
|
@@ -20,7 +20,7 @@ class FlatTreeWalker(AbstractWalker):
|
|||||||
|
|
||||||
def __init__(self, rebase_devel=False) -> None:
|
def __init__(self, rebase_devel=False) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.flats = []
|
self.flats:list[FlatNode] = []
|
||||||
# the rebase_devel won't work as such as rebasing the branch needs an explicit action
|
# the rebase_devel won't work as such as rebasing the branch needs an explicit action
|
||||||
self.rebase_devel = rebase_devel
|
self.rebase_devel = rebase_devel
|
||||||
# remember the last merge point so we can know the parent of it for the root of the sources
|
# remember the last merge point so we can know the parent of it for the root of the sources
|
||||||
|
@@ -160,6 +160,12 @@ class Git:
|
|||||||
.strip()
|
.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def branch_commit(self, branch="HEAD"):
|
||||||
|
try:
|
||||||
|
return (self.git_run(["cat-file", "commit", branch], stdout=subprocess.PIPE).stdout.decode("utf-8").strip())
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
def set_branch_head(self, branch, commit):
|
def set_branch_head(self, branch, commit):
|
||||||
return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
|
return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
|
||||||
|
|
||||||
@@ -251,7 +257,7 @@ class Git:
|
|||||||
|
|
||||||
def add_gitea_remote(self, package):
|
def add_gitea_remote(self, package):
|
||||||
repo_name = package.replace("+", "_")
|
repo_name = package.replace("+", "_")
|
||||||
org_name = "rpm"
|
org_name = "pool"
|
||||||
|
|
||||||
if not os.getenv("GITEA_TOKEN"):
|
if not os.getenv("GITEA_TOKEN"):
|
||||||
logging.warning("Not adding a remote due to missing $GITEA_TOKEN")
|
logging.warning("Not adding a remote due to missing $GITEA_TOKEN")
|
||||||
|
@@ -1,8 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from urllib.parse import parse_qs
|
||||||
|
|
||||||
|
import psycopg
|
||||||
|
from urllib3.util import url
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
from lib import db
|
||||||
from lib.binary import is_binary_or_large
|
from lib.binary import is_binary_or_large
|
||||||
from lib.db import DB
|
from lib.db import DB
|
||||||
from lib.git import Git
|
from lib.git import Git
|
||||||
@@ -12,6 +16,12 @@ from lib.proxy_sha256 import ProxySHA256
|
|||||||
from lib.tree_builder import TreeBuilder
|
from lib.tree_builder import TreeBuilder
|
||||||
from lib.user import User
|
from lib.user import User
|
||||||
|
|
||||||
|
def is_number(s):
|
||||||
|
try:
|
||||||
|
float(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
class GitExporter:
|
class GitExporter:
|
||||||
def __init__(self, api_url, project, package, repodir, cachedir):
|
def __init__(self, api_url, project, package, repodir, cachedir):
|
||||||
@@ -37,42 +47,107 @@ class GitExporter:
|
|||||||
def set_gc_interval(self, gc):
|
def set_gc_interval(self, gc):
|
||||||
self.gc_interval = gc
|
self.gc_interval = gc
|
||||||
|
|
||||||
def check_repo_state(self, flats, branch_state):
|
def reconstruct_state(self, flats):
|
||||||
|
state_data = dict()
|
||||||
|
prefix = "OBS-URL: "
|
||||||
|
for line in self.git.branch_commit("factory").splitlines():
|
||||||
|
if line.startswith(prefix):
|
||||||
|
u = url.parse_url(line.strip(prefix))
|
||||||
|
if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
|
||||||
|
continue
|
||||||
|
v = parse_qs(u.query)
|
||||||
|
rev = v['rev'][0]
|
||||||
|
with self.db.cursor() as cur:
|
||||||
|
try:
|
||||||
|
if is_number(rev):
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
|
||||||
|
else:
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logging.error(e)
|
||||||
|
self.db.conn.rollback()
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return state_data
|
||||||
|
state_data['factory'] = row[0]
|
||||||
|
try:
|
||||||
|
print("devel reconstruct")
|
||||||
|
d = self.devel_rev(flats)
|
||||||
|
if d is not None:
|
||||||
|
prj = d.commit.project
|
||||||
|
for line in self.git.branch_commit("devel").splitlines():
|
||||||
|
if line.startswith(prefix):
|
||||||
|
u = url.parse_url(line.strip(prefix))
|
||||||
|
if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
|
||||||
|
continue
|
||||||
|
v = parse_qs(u.query)
|
||||||
|
rev = v['rev'][0]
|
||||||
|
try:
|
||||||
|
with self.db.cursor() as cur:
|
||||||
|
logging.debug(f"finding id for ({prj, self.package, rev}")
|
||||||
|
if is_number(rev):
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
|
||||||
|
else:
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
logging.info(" ** cannot find revision for devel branch:", rev)
|
||||||
|
return state_data
|
||||||
|
state_data['devel'] = row[0]
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logging.error(e)
|
||||||
|
self.db.conn.rollback()
|
||||||
|
if state_data['factory'] is not None:
|
||||||
|
state_data['devel'] = state_data['factory']
|
||||||
|
except:
|
||||||
|
if state_data['factory'] is not None:
|
||||||
|
state_data['devel'] = state_data['factory']
|
||||||
|
return state_data
|
||||||
|
|
||||||
|
def check_repo_state(self, flats, branch_state, branch):
|
||||||
state_data = dict()
|
state_data = dict()
|
||||||
if os.path.exists(self.state_file):
|
if os.path.exists(self.state_file):
|
||||||
with open(self.state_file) as f:
|
with open(self.state_file) as f:
|
||||||
state_data = yaml.safe_load(f)
|
state_data = yaml.safe_load(f)
|
||||||
if not isinstance(state_data, dict):
|
if not isinstance(state_data, dict):
|
||||||
state_data = {}
|
state_data = {}
|
||||||
|
else:
|
||||||
|
state_data = self.reconstruct_state(flats)
|
||||||
|
|
||||||
|
logging.debug(f"state data: {state_data}")
|
||||||
left_to_commit = []
|
left_to_commit = []
|
||||||
for flat in reversed(flats):
|
for flat in reversed(flats):
|
||||||
found_state = False
|
found_state = False
|
||||||
for branch in ["factory"]:
|
if flat.commit.dbid == state_data.get(branch):
|
||||||
if flat.commit.dbid == state_data.get(branch):
|
branch_state[branch] = flat.commit
|
||||||
branch_state[branch] = flat.commit
|
flat.commit.git_commit = self.git.branch_head(branch)
|
||||||
flat.commit.git_commit = self.git.branch_head(branch)
|
logging.debug(
|
||||||
logging.debug(
|
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
||||||
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
)
|
||||||
)
|
left_to_commit = []
|
||||||
left_to_commit = []
|
found_state = True
|
||||||
found_state = True
|
|
||||||
if not found_state:
|
if not found_state:
|
||||||
left_to_commit.append(flat)
|
left_to_commit.append(flat)
|
||||||
return left_to_commit
|
return left_to_commit
|
||||||
|
|
||||||
|
def devel_rev(self, tree):
|
||||||
|
for flat in tree:
|
||||||
|
if flat.branch == "devel":
|
||||||
|
return flat
|
||||||
|
return None
|
||||||
|
|
||||||
def export_as_git(self):
|
def export_as_git(self):
|
||||||
if os.getenv("CHECK_ALL_LFS"):
|
if os.getenv("CHECK_ALL_LFS"):
|
||||||
LFSOid.check_all(self.db, self.package)
|
LFSOid.check_all(self.db, self.package)
|
||||||
tree = TreeBuilder(self.db).build(self.project, self.package)
|
tree = TreeBuilder(self.db).build(self.project, self.package)
|
||||||
flats = tree.as_flat_list()
|
added_commits = False
|
||||||
|
|
||||||
branch_state = {"factory": None, "devel": None}
|
if tree == None: # eg. python-M2Crypto errors
|
||||||
left_to_commit = self.check_repo_state(flats, branch_state)
|
|
||||||
|
|
||||||
if not left_to_commit:
|
|
||||||
return
|
return
|
||||||
|
flats = tree.as_flat_list()
|
||||||
logging.info(f"Commiting into {self.git.path}")
|
branch_state = {"factory": None, "devel": None}
|
||||||
|
left_to_commit = self.check_repo_state(flats, branch_state, "factory")
|
||||||
|
logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
|
||||||
self.run_gc()
|
self.run_gc()
|
||||||
users = dict()
|
users = dict()
|
||||||
|
|
||||||
@@ -85,13 +160,40 @@ class GitExporter:
|
|||||||
self.run_gc()
|
self.run_gc()
|
||||||
logging.debug(f"Committing {flat}")
|
logging.debug(f"Committing {flat}")
|
||||||
self.commit_flat(flat, branch_state)
|
self.commit_flat(flat, branch_state)
|
||||||
|
added_commits = True
|
||||||
|
|
||||||
|
# export the devel_tree head commits based on the devel branch
|
||||||
|
if self.project == "openSUSE:Factory":
|
||||||
|
devel_head = self.devel_rev(flats)
|
||||||
|
flat_devel = None
|
||||||
|
if devel_head is not None:
|
||||||
|
logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
|
||||||
|
flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
|
||||||
|
for f in flat_devel:
|
||||||
|
f.branch = "devel"
|
||||||
|
|
||||||
|
if flat_devel is not None:
|
||||||
|
left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
|
||||||
|
logging.debug(branch_state)
|
||||||
|
logging.debug(f"appending {len(left_to_commit)} items on top of devel")
|
||||||
|
for flat in left_to_commit:
|
||||||
|
if flat.commit.userid not in users:
|
||||||
|
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
|
||||||
|
flat.user = users[flat.commit.userid]
|
||||||
|
self.gc_cnt -= 1
|
||||||
|
if self.gc_cnt <= 0 and self.gc_interval:
|
||||||
|
self.run_gc()
|
||||||
|
logging.debug(f"Committing {flat}")
|
||||||
|
self.commit_flat(flat, branch_state)
|
||||||
|
added_commits = True
|
||||||
|
|
||||||
# make sure that we create devel branch
|
# make sure that we create devel branch
|
||||||
if not branch_state["devel"]:
|
if not branch_state["devel"]:
|
||||||
logging.debug("force creating devel")
|
logging.debug("force creating devel")
|
||||||
self.git.set_branch_head("devel", self.git.branch_head("factory"))
|
self.git.set_branch_head("devel", self.git.branch_head("factory"))
|
||||||
|
|
||||||
self.git.push(force=True)
|
#if added_commits:
|
||||||
|
# self.git.push(force=True)
|
||||||
|
|
||||||
def run_gc(self):
|
def run_gc(self):
|
||||||
self.gc_cnt = self.gc_interval
|
self.gc_cnt = self.gc_interval
|
||||||
@@ -103,6 +205,10 @@ class GitExporter:
|
|||||||
return not self.proxy_sha256.is_text(package, filename)
|
return not self.proxy_sha256.is_text(package, filename)
|
||||||
|
|
||||||
def commit_file(self, flat, file, size, md5):
|
def commit_file(self, flat, file, size, md5):
|
||||||
|
# don't export imported _service: files, if any
|
||||||
|
if file.name[0:9] == '_service:':
|
||||||
|
return
|
||||||
|
|
||||||
# have such files been detected as text mimetype before?
|
# have such files been detected as text mimetype before?
|
||||||
if self.is_lfs_file(flat.commit.package, file.name, size):
|
if self.is_lfs_file(flat.commit.package, file.name, size):
|
||||||
file_sha256 = self.proxy_sha256.get_or_put(
|
file_sha256 = self.proxy_sha256.get_or_put(
|
||||||
|
@@ -42,6 +42,8 @@ class Importer:
|
|||||||
def update_db_package(self, project, package):
|
def update_db_package(self, project, package):
|
||||||
root = self.obs._history(project, package)
|
root = self.obs._history(project, package)
|
||||||
if root is None:
|
if root is None:
|
||||||
|
if self.project == "openSUSE:Factory" and project == self.project:
|
||||||
|
exit(10)
|
||||||
return
|
return
|
||||||
latest = DBRevision.max_rev(self.db, project, package)
|
latest = DBRevision.max_rev(self.db, project, package)
|
||||||
for r in root.findall("revision"):
|
for r in root.findall("revision"):
|
||||||
@@ -217,7 +219,7 @@ class Importer:
|
|||||||
logging.debug(f"Refresh {project}/{package}")
|
logging.debug(f"Refresh {project}/{package}")
|
||||||
self.refreshed_packages.add(key)
|
self.refreshed_packages.add(key)
|
||||||
if self.has_scmsync(project) or self.has_scmsync(key):
|
if self.has_scmsync(project) or self.has_scmsync(key):
|
||||||
self.packages_with_scmsync.add(package)
|
self.packages_with_scmsync.add((project, package))
|
||||||
logging.debug(f"{project}/{package} already in Git - skipping")
|
logging.debug(f"{project}/{package} already in Git - skipping")
|
||||||
return
|
return
|
||||||
self.update_db_package(project, package)
|
self.update_db_package(project, package)
|
||||||
@@ -274,6 +276,6 @@ class Importer:
|
|||||||
self.scmsync_cache[key] = scmsync_exists
|
self.scmsync_cache[key] = scmsync_exists
|
||||||
return scmsync_exists
|
return scmsync_exists
|
||||||
|
|
||||||
def package_with_scmsync(self, package):
|
def package_with_scmsync(self, project, package):
|
||||||
return package in self.packages_with_scmsync
|
return (project, package) in self.packages_with_scmsync
|
||||||
|
|
||||||
|
23
lib/obs.py
23
lib/obs.py
@@ -148,12 +148,21 @@ class OBS:
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _download(self, project, package, name, revision):
|
def _download(self, project, package, name, revision):
|
||||||
url = osc.core.makeurl(
|
try:
|
||||||
self.url,
|
url = osc.core.makeurl(
|
||||||
["source", project, package, name],
|
self.url,
|
||||||
{"rev": revision, "expand": 1},
|
["source", project, package, name],
|
||||||
)
|
{"rev": revision, "expand": 1},
|
||||||
return osc.core.http_GET(url)
|
)
|
||||||
|
return osc.core.http_GET(url)
|
||||||
|
except HTTPError as e:
|
||||||
|
if e.status == 404:
|
||||||
|
url = osc.core.makeurl(
|
||||||
|
self.url,
|
||||||
|
["source", project, package, name],
|
||||||
|
{"rev": revision, "expand": 1, "deleted": 1},
|
||||||
|
)
|
||||||
|
return osc.core.http_GET(url)
|
||||||
|
|
||||||
def download(
|
def download(
|
||||||
self,
|
self,
|
||||||
@@ -189,7 +198,7 @@ class OBS:
|
|||||||
try:
|
try:
|
||||||
root = self._xml(f"source/{project}/{package}", **params)
|
root = self._xml(f"source/{project}/{package}", **params)
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
if e.code == 400:
|
if e.code == 400 or e.code == 404:
|
||||||
logging.error(
|
logging.error(
|
||||||
f"Package [{project}/{package} {params}] can't be expanded: {e}"
|
f"Package [{project}/{package} {params}] can't be expanded: {e}"
|
||||||
)
|
)
|
||||||
|
@@ -50,12 +50,12 @@ class ProxySHA256:
|
|||||||
sha = hashlib.sha256()
|
sha = hashlib.sha256()
|
||||||
while True:
|
while True:
|
||||||
buffer = fin.read(10000)
|
buffer = fin.read(10000)
|
||||||
if not buffer:
|
|
||||||
break
|
|
||||||
sha.update(buffer)
|
|
||||||
# only guess from the first 10K
|
# only guess from the first 10K
|
||||||
if not mimetype:
|
if not mimetype:
|
||||||
mimetype = self.mime.from_buffer(buffer)
|
mimetype = self.mime.from_buffer(buffer)
|
||||||
|
if not buffer:
|
||||||
|
break
|
||||||
|
sha.update(buffer)
|
||||||
fin.close()
|
fin.close()
|
||||||
LFSOid(self.db).add(
|
LFSOid(self.db).add(
|
||||||
project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5
|
project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5
|
||||||
|
@@ -216,6 +216,8 @@ class TreeBuilder:
|
|||||||
def build(self, project, package):
|
def build(self, project, package):
|
||||||
"""Create a Factory tree (returning the top)"""
|
"""Create a Factory tree (returning the top)"""
|
||||||
factory_revisions = self.revisions_chain(project, package)
|
factory_revisions = self.revisions_chain(project, package)
|
||||||
|
if factory_revisions == None:
|
||||||
|
return None
|
||||||
self.add_merge_points(factory_revisions)
|
self.add_merge_points(factory_revisions)
|
||||||
# factory_revisions.print()
|
# factory_revisions.print()
|
||||||
self.prune_loose_end(factory_revisions)
|
self.prune_loose_end(factory_revisions)
|
||||||
|
@@ -36,10 +36,12 @@ def listen_events():
|
|||||||
and "package" in body
|
and "package" in body
|
||||||
and body["project"] == "openSUSE:Factory"
|
and body["project"] == "openSUSE:Factory"
|
||||||
):
|
):
|
||||||
if "/" in body["package"]:
|
# Strip multibuild flavors
|
||||||
|
package = body["package"].partition(':')[0]
|
||||||
|
if "/" in package:
|
||||||
return
|
return
|
||||||
|
|
||||||
(MY_TASKS_DIR / body["package"]).touch()
|
(MY_TASKS_DIR / package).touch()
|
||||||
print(" [x] %r:%r" % (method.routing_key, body["package"]))
|
print(" [x] %r:%r" % (method.routing_key, body["package"]))
|
||||||
|
|
||||||
channel.basic_consume(queue_name, callback, auto_ack=True)
|
channel.basic_consume(queue_name, callback, auto_ack=True)
|
||||||
|
Reference in New Issue
Block a user