main #36
@@ -119,7 +119,7 @@ def main():
|
|||||||
importer = Importer(URL_OBS, args.project, args.packages)
|
importer = Importer(URL_OBS, args.project, args.packages)
|
||||||
importer.import_into_db()
|
importer.import_into_db()
|
||||||
for package in args.packages:
|
for package in args.packages:
|
||||||
if not importer.package_with_scmsync(package):
|
if not importer.package_with_scmsync(args.project, package):
|
||||||
export_package(args.project, package, args.repodir, args.cachedir, args.gc)
|
export_package(args.project, package, args.repodir, args.cachedir, args.gc)
|
||||||
else:
|
else:
|
||||||
logging.debug(f"{args.project}/{package} has scmsync links - skipping export")
|
logging.debug(f"{args.project}/{package} has scmsync links - skipping export")
|
||||||
|
@@ -204,6 +204,11 @@ class DBRevision:
|
|||||||
and self.package == "_project"
|
and self.package == "_project"
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# do not import _service:* files as those are created by OBS on source imports
|
||||||
|
if entry.get("name")[0:9] == "_service:":
|
||||||
|
continue
|
||||||
|
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""INSERT INTO files (name, md5, size, mtime, revision_id)
|
"""INSERT INTO files (name, md5, size, mtime, revision_id)
|
||||||
VALUES (%s,%s,%s,%s,%s)""",
|
VALUES (%s,%s,%s,%s,%s)""",
|
||||||
|
@@ -20,7 +20,7 @@ class FlatTreeWalker(AbstractWalker):
|
|||||||
|
|
||||||
def __init__(self, rebase_devel=False) -> None:
|
def __init__(self, rebase_devel=False) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.flats = []
|
self.flats:list[FlatNode] = []
|
||||||
# the rebase_devel won't work as such as rebasing the branch needs an explicit action
|
# the rebase_devel won't work as such as rebasing the branch needs an explicit action
|
||||||
self.rebase_devel = rebase_devel
|
self.rebase_devel = rebase_devel
|
||||||
# remember the last merge point so we can know the parent of it for the root of the sources
|
# remember the last merge point so we can know the parent of it for the root of the sources
|
||||||
|
@@ -160,6 +160,12 @@ class Git:
|
|||||||
.strip()
|
.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def branch_commit(self, branch="HEAD"):
|
||||||
|
try:
|
||||||
|
|||||||
|
return (self.git_run(["cat-file", "commit", branch], stdout=subprocess.PIPE).stdout.decode("utf-8").strip())
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
def set_branch_head(self, branch, commit):
|
def set_branch_head(self, branch, commit):
|
||||||
return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
|
return self.git_run(["update-ref", f"refs/heads/{branch}", commit])
|
||||||
|
|
||||||
|
@@ -1,8 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from urllib.parse import parse_qs
|
||||||
|
|
||||||
|
import psycopg
|
||||||
|
from urllib3.util import url
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
from lib import db
|
||||||
from lib.binary import is_binary_or_large
|
from lib.binary import is_binary_or_large
|
||||||
from lib.db import DB
|
from lib.db import DB
|
||||||
from lib.git import Git
|
from lib.git import Git
|
||||||
@@ -12,6 +16,12 @@ from lib.proxy_sha256 import ProxySHA256
|
|||||||
from lib.tree_builder import TreeBuilder
|
from lib.tree_builder import TreeBuilder
|
||||||
from lib.user import User
|
from lib.user import User
|
||||||
|
|
||||||
|
def is_number(s):
|
||||||
|
try:
|
||||||
|
float(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
class GitExporter:
|
class GitExporter:
|
||||||
def __init__(self, api_url, project, package, repodir, cachedir):
|
def __init__(self, api_url, project, package, repodir, cachedir):
|
||||||
@@ -30,49 +40,106 @@ class GitExporter:
|
|||||||
else:
|
else:
|
||||||
self.git.create()
|
self.git.create()
|
||||||
self.git.add_gitea_remote(package)
|
self.git.add_gitea_remote(package)
|
||||||
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
|
||||||
self.gc_interval = 200
|
self.gc_interval = 200
|
||||||
self.cachedir = cachedir
|
self.cachedir = cachedir
|
||||||
|
|
||||||
def set_gc_interval(self, gc):
|
def set_gc_interval(self, gc):
|
||||||
self.gc_interval = gc
|
self.gc_interval = gc
|
||||||
|
|
||||||
def check_repo_state(self, flats, branch_state):
|
def reconstruct_state(self, flats):
|
||||||
state_data = dict()
|
state_data = dict()
|
||||||
if os.path.exists(self.state_file):
|
prefix = "OBS-URL: "
|
||||||
with open(self.state_file) as f:
|
for line in self.git.branch_commit("factory").splitlines():
|
||||||
state_data = yaml.safe_load(f)
|
if line.startswith(prefix):
|
||||||
if not isinstance(state_data, dict):
|
u = url.parse_url(line.strip(prefix))
|
||||||
state_data = {}
|
if u.path != f"/package/show/openSUSE:Factory/{self.package}" or "rev=" not in u.query:
|
||||||
|
continue
|
||||||
|
v = parse_qs(u.query)
|
||||||
|
rev = v['rev'][0]
|
||||||
|
with self.db.cursor() as cur:
|
||||||
|
try:
|
||||||
|
if is_number(rev):
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", ('openSUSE:Factory', self.package, rev,))
|
||||||
|
else:
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", ('openSUSE:Factory', self.package, rev, rev))
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logging.error(e)
|
||||||
|
self.db.conn.rollback()
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return state_data
|
||||||
|
state_data['factory'] = row[0]
|
||||||
|
try:
|
||||||
|
print("devel reconstruct")
|
||||||
|
d = self.devel_rev(flats)
|
||||||
|
if d is not None:
|
||||||
|
prj = d.commit.project
|
||||||
|
for line in self.git.branch_commit("devel").splitlines():
|
||||||
|
if line.startswith(prefix):
|
||||||
|
u = url.parse_url(line.strip(prefix))
|
||||||
|
if u.path != f"/package/show/{prj}/{self.package}" or u.query is None or "rev=" not in u.query:
|
||||||
|
continue
|
||||||
|
v = parse_qs(u.query)
|
||||||
|
rev = v['rev'][0]
|
||||||
|
try:
|
||||||
|
with self.db.cursor() as cur:
|
||||||
|
logging.debug(f"finding id for ({prj, self.package, rev}")
|
||||||
|
if is_number(rev):
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND rev=%s", (prj, self.package, rev,))
|
||||||
|
else:
|
||||||
|
cur.execute("SELECT id FROM revisions WHERE project=%s AND package=%s AND expanded_srcmd5=%s", (prj, self.package, rev,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
logging.info(" ** cannot find revision for devel branch:", rev)
|
||||||
|
return state_data
|
||||||
|
state_data['devel'] = row[0]
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logging.error(e)
|
||||||
|
self.db.conn.rollback()
|
||||||
|
if state_data['factory'] is not None:
|
||||||
|
state_data['devel'] = state_data['factory']
|
||||||
|
except:
|
||||||
|
if state_data['factory'] is not None:
|
||||||
|
state_data['devel'] = state_data['factory']
|
||||||
|
return state_data
|
||||||
|
|
||||||
|
def check_repo_state(self, flats, branch_state, branch):
|
||||||
|
state_data = self.reconstruct_state(flats)
|
||||||
|
|
||||||
|
logging.debug(f"state data: {state_data}")
|
||||||
left_to_commit = []
|
left_to_commit = []
|
||||||
for flat in reversed(flats):
|
for flat in reversed(flats):
|
||||||
found_state = False
|
found_state = False
|
||||||
for branch in ["factory"]:
|
if flat.commit.dbid == state_data.get(branch):
|
||||||
if flat.commit.dbid == state_data.get(branch):
|
branch_state[branch] = flat.commit
|
||||||
branch_state[branch] = flat.commit
|
flat.commit.git_commit = self.git.branch_head(branch)
|
||||||
flat.commit.git_commit = self.git.branch_head(branch)
|
logging.debug(
|
||||||
logging.debug(
|
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
||||||
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
)
|
||||||
)
|
left_to_commit = []
|
||||||
left_to_commit = []
|
found_state = True
|
||||||
found_state = True
|
|
||||||
if not found_state:
|
if not found_state:
|
||||||
left_to_commit.append(flat)
|
left_to_commit.append(flat)
|
||||||
return left_to_commit
|
return left_to_commit
|
||||||
|
|
||||||
|
def devel_rev(self, tree):
|
||||||
|
for flat in tree:
|
||||||
|
if flat.branch == "devel":
|
||||||
|
return flat
|
||||||
|
return None
|
||||||
|
|
||||||
def export_as_git(self):
|
def export_as_git(self):
|
||||||
if os.getenv("CHECK_ALL_LFS"):
|
if os.getenv("CHECK_ALL_LFS"):
|
||||||
LFSOid.check_all(self.db, self.package)
|
LFSOid.check_all(self.db, self.package)
|
||||||
tree = TreeBuilder(self.db).build(self.project, self.package)
|
tree = TreeBuilder(self.db).build(self.project, self.package)
|
||||||
flats = tree.as_flat_list()
|
added_commits = False
|
||||||
|
|
||||||
branch_state = {"factory": None, "devel": None}
|
if tree == None: # eg. python-M2Crypto errors
|
||||||
mcepl
commented
??? As a maintainer of M2Crypto (both upstream and in openSUSE), I am curious about this comment? What’s the problem with it? ??? As a maintainer of M2Crypto (both upstream and in openSUSE), I am curious about this comment? What’s the problem with it?
dirkmueller
commented
thats when the ssl connection terminates, because the server is overloaded. no issue in m2crypto. thats when the ssl connection terminates, because the server is overloaded. no issue in m2crypto.
|
|||||||
left_to_commit = self.check_repo_state(flats, branch_state)
|
|
||||||
|
|
||||||
if not left_to_commit:
|
|
||||||
return
|
return
|
||||||
|
flats = tree.as_flat_list()
|
||||||
logging.info(f"Commiting into {self.git.path}")
|
branch_state = {"factory": None, "devel": None}
|
||||||
|
left_to_commit = self.check_repo_state(flats, branch_state, "factory")
|
||||||
|
logging.info(f"Commiting into {self.git.path} {len(left_to_commit)} into factory")
|
||||||
self.run_gc()
|
self.run_gc()
|
||||||
users = dict()
|
users = dict()
|
||||||
|
|
||||||
@@ -85,13 +152,40 @@ class GitExporter:
|
|||||||
self.run_gc()
|
self.run_gc()
|
||||||
logging.debug(f"Committing {flat}")
|
logging.debug(f"Committing {flat}")
|
||||||
self.commit_flat(flat, branch_state)
|
self.commit_flat(flat, branch_state)
|
||||||
|
added_commits = True
|
||||||
|
|
||||||
|
# export the devel_tree head commits based on the devel branch
|
||||||
|
if self.project == "openSUSE:Factory":
|
||||||
|
devel_head = self.devel_rev(flats)
|
||||||
|
flat_devel = None
|
||||||
|
if devel_head is not None:
|
||||||
|
logging.debug(f"building devel revisions chain for {devel_head.commit.project} / {self.package}")
|
||||||
|
flat_devel = TreeBuilder(self.db).revisions_chain(devel_head.commit.project, self.package).as_flat_list()
|
||||||
|
for f in flat_devel:
|
||||||
|
f.branch = "devel"
|
||||||
|
|
||||||
|
if flat_devel is not None:
|
||||||
|
left_to_commit = self.check_repo_state(flat_devel, branch_state, "devel")
|
||||||
|
logging.debug(branch_state)
|
||||||
|
logging.debug(f"appending {len(left_to_commit)} items on top of devel")
|
||||||
|
for flat in left_to_commit:
|
||||||
|
if flat.commit.userid not in users:
|
||||||
|
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
|
||||||
|
flat.user = users[flat.commit.userid]
|
||||||
|
self.gc_cnt -= 1
|
||||||
|
if self.gc_cnt <= 0 and self.gc_interval:
|
||||||
|
self.run_gc()
|
||||||
|
logging.debug(f"Committing {flat}")
|
||||||
|
self.commit_flat(flat, branch_state)
|
||||||
|
added_commits = True
|
||||||
|
|
||||||
# make sure that we create devel branch
|
# make sure that we create devel branch
|
||||||
if not branch_state["devel"]:
|
if not branch_state["devel"]:
|
||||||
logging.debug("force creating devel")
|
logging.debug("force creating devel")
|
||||||
self.git.set_branch_head("devel", self.git.branch_head("factory"))
|
self.git.set_branch_head("devel", self.git.branch_head("factory"))
|
||||||
|
|
||||||
self.git.push(force=True)
|
#if added_commits:
|
||||||
|
# self.git.push(force=True)
|
||||||
|
|
||||||
def run_gc(self):
|
def run_gc(self):
|
||||||
self.gc_cnt = self.gc_interval
|
self.gc_cnt = self.gc_interval
|
||||||
@@ -103,6 +197,10 @@ class GitExporter:
|
|||||||
return not self.proxy_sha256.is_text(package, filename)
|
return not self.proxy_sha256.is_text(package, filename)
|
||||||
|
|
||||||
def commit_file(self, flat, file, size, md5):
|
def commit_file(self, flat, file, size, md5):
|
||||||
|
# don't export imported _service: files, if any
|
||||||
|
if file.name[0:9] == '_service:':
|
||||||
|
return
|
||||||
|
|
||||||
# have such files been detected as text mimetype before?
|
# have such files been detected as text mimetype before?
|
||||||
if self.is_lfs_file(flat.commit.package, file.name, size):
|
if self.is_lfs_file(flat.commit.package, file.name, size):
|
||||||
file_sha256 = self.proxy_sha256.get_or_put(
|
file_sha256 = self.proxy_sha256.get_or_put(
|
||||||
@@ -172,10 +270,3 @@ class GitExporter:
|
|||||||
)
|
)
|
||||||
flat.commit.git_commit = commit
|
flat.commit.git_commit = commit
|
||||||
branch_state[flat.branch] = flat.commit
|
branch_state[flat.branch] = flat.commit
|
||||||
with open(self.state_file, "w") as f:
|
|
||||||
data = {}
|
|
||||||
for branch in ["factory", "devel"]:
|
|
||||||
commit = branch_state[branch]
|
|
||||||
if commit:
|
|
||||||
data[branch] = commit.dbid
|
|
||||||
yaml.dump(data, f)
|
|
||||||
|
@@ -42,6 +42,8 @@ class Importer:
|
|||||||
def update_db_package(self, project, package):
|
def update_db_package(self, project, package):
|
||||||
root = self.obs._history(project, package)
|
root = self.obs._history(project, package)
|
||||||
if root is None:
|
if root is None:
|
||||||
|
if self.project == "openSUSE:Factory" and project == self.project:
|
||||||
|
exit(10)
|
||||||
return
|
return
|
||||||
latest = DBRevision.max_rev(self.db, project, package)
|
latest = DBRevision.max_rev(self.db, project, package)
|
||||||
for r in root.findall("revision"):
|
for r in root.findall("revision"):
|
||||||
@@ -217,7 +219,7 @@ class Importer:
|
|||||||
logging.debug(f"Refresh {project}/{package}")
|
logging.debug(f"Refresh {project}/{package}")
|
||||||
self.refreshed_packages.add(key)
|
self.refreshed_packages.add(key)
|
||||||
if self.has_scmsync(project) or self.has_scmsync(key):
|
if self.has_scmsync(project) or self.has_scmsync(key):
|
||||||
self.packages_with_scmsync.add(package)
|
self.packages_with_scmsync.add((project, package))
|
||||||
logging.debug(f"{project}/{package} already in Git - skipping")
|
logging.debug(f"{project}/{package} already in Git - skipping")
|
||||||
return
|
return
|
||||||
self.update_db_package(project, package)
|
self.update_db_package(project, package)
|
||||||
@@ -268,15 +270,12 @@ class Importer:
|
|||||||
return self.scmsync_cache[key]
|
return self.scmsync_cache[key]
|
||||||
|
|
||||||
root = self.obs._meta(key)
|
root = self.obs._meta(key)
|
||||||
scmsync = None
|
|
||||||
scmsync_exists = False
|
scmsync_exists = False
|
||||||
if root and root.find('scmsync') is not None:
|
if root is not None:
|
||||||
scmsync = root.find('scmsync').text
|
scmsync_exists = root.find('scmsync') is not None
|
||||||
if scmsync:
|
|
||||||
scmsync_exists = scmsync.startswith('https://src.opensuse.org/pool/')
|
|
||||||
self.scmsync_cache[key] = scmsync_exists
|
self.scmsync_cache[key] = scmsync_exists
|
||||||
return scmsync_exists
|
return scmsync_exists
|
||||||
|
|
||||||
def package_with_scmsync(self, package):
|
def package_with_scmsync(self, project, package):
|
||||||
return package in self.packages_with_scmsync
|
return (project, package) in self.packages_with_scmsync
|
||||||
|
|
||||||
|
23
lib/obs.py
23
lib/obs.py
@@ -148,12 +148,21 @@ class OBS:
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _download(self, project, package, name, revision):
|
def _download(self, project, package, name, revision):
|
||||||
url = osc.core.makeurl(
|
try:
|
||||||
self.url,
|
url = osc.core.makeurl(
|
||||||
["source", project, package, name],
|
self.url,
|
||||||
{"rev": revision, "expand": 1},
|
["source", project, package, name],
|
||||||
)
|
{"rev": revision, "expand": 1},
|
||||||
return osc.core.http_GET(url)
|
)
|
||||||
|
return osc.core.http_GET(url)
|
||||||
|
except HTTPError as e:
|
||||||
|
if e.status == 404:
|
||||||
|
url = osc.core.makeurl(
|
||||||
|
self.url,
|
||||||
|
["source", project, package, name],
|
||||||
|
{"rev": revision, "expand": 1, "deleted": 1},
|
||||||
|
)
|
||||||
|
return osc.core.http_GET(url)
|
||||||
|
|
||||||
def download(
|
def download(
|
||||||
self,
|
self,
|
||||||
@@ -189,7 +198,7 @@ class OBS:
|
|||||||
try:
|
try:
|
||||||
root = self._xml(f"source/{project}/{package}", **params)
|
root = self._xml(f"source/{project}/{package}", **params)
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
if e.code == 400:
|
if e.code == 400 or e.code == 404:
|
||||||
logging.error(
|
logging.error(
|
||||||
f"Package [{project}/{package} {params}] can't be expanded: {e}"
|
f"Package [{project}/{package} {params}] can't be expanded: {e}"
|
||||||
)
|
)
|
||||||
|
@@ -50,12 +50,12 @@ class ProxySHA256:
|
|||||||
sha = hashlib.sha256()
|
sha = hashlib.sha256()
|
||||||
while True:
|
while True:
|
||||||
buffer = fin.read(10000)
|
buffer = fin.read(10000)
|
||||||
if not buffer:
|
|
||||||
break
|
|
||||||
sha.update(buffer)
|
|
||||||
# only guess from the first 10K
|
# only guess from the first 10K
|
||||||
if not mimetype:
|
if not mimetype:
|
||||||
mimetype = self.mime.from_buffer(buffer)
|
mimetype = self.mime.from_buffer(buffer)
|
||||||
|
if not buffer:
|
||||||
|
break
|
||||||
|
sha.update(buffer)
|
||||||
fin.close()
|
fin.close()
|
||||||
LFSOid(self.db).add(
|
LFSOid(self.db).add(
|
||||||
project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5
|
project, package, name, revision, sha.hexdigest(), size, mimetype, file_md5
|
||||||
|
@@ -216,6 +216,8 @@ class TreeBuilder:
|
|||||||
def build(self, project, package):
|
def build(self, project, package):
|
||||||
"""Create a Factory tree (returning the top)"""
|
"""Create a Factory tree (returning the top)"""
|
||||||
factory_revisions = self.revisions_chain(project, package)
|
factory_revisions = self.revisions_chain(project, package)
|
||||||
|
if factory_revisions == None:
|
||||||
|
return None
|
||||||
self.add_merge_points(factory_revisions)
|
self.add_merge_points(factory_revisions)
|
||||||
# factory_revisions.print()
|
# factory_revisions.print()
|
||||||
self.prune_loose_end(factory_revisions)
|
self.prune_loose_end(factory_revisions)
|
||||||
|
Reference in New Issue
Block a user
Using
try … except
as a substitute forif … then … else
is always a code smell. Couldn’t you find some civilized way how to decide when to run that command without just trying it blindly?