forked from adamm/git-importer
Compare commits
11 Commits
Author | SHA1 | Date | |
---|---|---|---|
5da7861c2a | |||
c9e07e536f | |||
dc0f33354e | |||
56cbe0a125 | |||
4353f015c8 | |||
9cbe0899bc | |||
9e80a64fe0 | |||
12001b1640 | |||
3797ea178a | |||
999dcabcfa | |||
9962673eff |
1355
gone-packages.txt
Normal file
1355
gone-packages.txt
Normal file
File diff suppressed because it is too large
Load Diff
196
lib/git.py
196
lib/git.py
@ -4,7 +4,6 @@ import os
|
||||
import pathlib
|
||||
import subprocess
|
||||
|
||||
import pygit2
|
||||
import requests
|
||||
|
||||
from lib.binary import BINARY
|
||||
@ -20,11 +19,6 @@ class Git:
|
||||
self.committer = committer
|
||||
self.committer_email = committer_email
|
||||
|
||||
self.repo = None
|
||||
|
||||
def is_open(self):
|
||||
return self.repo is not None
|
||||
|
||||
def exists(self):
|
||||
"""Check if the path is a valid git repository"""
|
||||
return (self.path / ".git").exists()
|
||||
@ -35,35 +29,60 @@ class Git:
|
||||
self.open()
|
||||
|
||||
def open(self):
|
||||
# Convert the path to string, to avoid some limitations in
|
||||
# older pygit2
|
||||
self.repo = pygit2.init_repository(str(self.path))
|
||||
subprocess.run(
|
||||
['git', 'init', '--object-format=sha256', '-b', 'factory'],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
|
||||
def is_dirty(self):
|
||||
"""Check if there is something to commit"""
|
||||
assert self.is_open()
|
||||
|
||||
return self.repo.status()
|
||||
status_str = subprocess.run(
|
||||
['git', 'status', '--porcelain=2'],
|
||||
cwd=self.path,
|
||||
stdout=subprocess.PIPE,
|
||||
check=True
|
||||
).stdout.decode('utf-8')
|
||||
return len(list(filter(None, status_str.split('\n')))) > 0
|
||||
|
||||
def branches(self):
|
||||
return list(self.repo.branches)
|
||||
br=subprocess.run(
|
||||
['git', 'for-each-ref', '--format=%(refname:short)', 'refs/heads/'],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
stdout=subprocess.PIPE
|
||||
).stdout.decode('utf-8').split()
|
||||
if len(br) == 0:
|
||||
br.append('factory') # unborn branch?
|
||||
return br
|
||||
|
||||
def branch(self, branch, commit=None):
|
||||
if not commit:
|
||||
commit = self.repo.head
|
||||
else:
|
||||
commit = self.repo.get(commit)
|
||||
self.repo.branches.local.create(branch, commit)
|
||||
def branch(self, branch, commit='HEAD'):
|
||||
commit = subprocess.run(
|
||||
['git', 'rev-parse', '--verify', '--end-of-options', commit + '^{commit}'],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
stdout=subprocess.PIPE
|
||||
).stdout.decode('utf-8').strip()
|
||||
return subprocess.run(['git', 'branch', branch, commit], check=True)
|
||||
|
||||
def checkout(self, branch):
|
||||
"""Checkout into the branch HEAD"""
|
||||
new_branch = False
|
||||
ref = f"refs/heads/{branch}"
|
||||
if branch not in self.branches():
|
||||
self.repo.references["HEAD"].set_target(ref)
|
||||
subprocess.run(
|
||||
['git', 'branch', '-q', branch, 'HEAD'],
|
||||
cwd=self.path,
|
||||
check=True
|
||||
)
|
||||
new_branch = True
|
||||
else:
|
||||
self.repo.checkout(ref)
|
||||
ref = f"refs/heads/{branch}"
|
||||
if (self.path/'.git'/ref).exists():
|
||||
subprocess.run(
|
||||
['git', 'checkout', '-q', branch],
|
||||
cwd=self.path,
|
||||
check=True
|
||||
)
|
||||
return new_branch
|
||||
|
||||
def commit(
|
||||
@ -87,30 +106,62 @@ class Git:
|
||||
committer_time = committer_time if committer_time else user_time
|
||||
|
||||
if self.is_dirty():
|
||||
self.repo.index.add_all()
|
||||
subprocess.run(
|
||||
["git", "add", "--all", "."],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
|
||||
self.repo.index.write()
|
||||
author = pygit2.Signature(user, user_email, int(user_time.timestamp()))
|
||||
committer = pygit2.Signature(
|
||||
committer, committer_email, int(committer_time.timestamp())
|
||||
tree_id = subprocess.run(
|
||||
['git', 'write-tree'],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
stdout=subprocess.PIPE
|
||||
).stdout.decode('utf-8').strip()
|
||||
|
||||
parent_array = []
|
||||
if isinstance(parents, list):
|
||||
for parent in filter(None, parents):
|
||||
parent_array = parent_array + ['-p', parent]
|
||||
elif isinstance(parents, str):
|
||||
parents_array = ['-p', parents]
|
||||
|
||||
commit_id = subprocess.run(
|
||||
['git', 'commit-tree'] + parent_array + [tree_id],
|
||||
cwd=self.path,
|
||||
env={
|
||||
"GIT_AUTHOR_NAME": user,
|
||||
"GIT_AUTHOR_EMAIL": user_email,
|
||||
"GIT_AUTHOR_DATE": f"{int(user_time.timestamp())} +0000",
|
||||
"GIT_COMMITTER_NAME": committer,
|
||||
"GIT_COMMITTER_EMAIL": committer_email,
|
||||
"GIT_COMMITTER_DATE": f"{int(committer_time.timestamp())} +0000",
|
||||
},
|
||||
input=message.encode('utf-8'),
|
||||
check=True,
|
||||
stdout=subprocess.PIPE
|
||||
).stdout.decode('utf-8').rstrip()
|
||||
subprocess.run(
|
||||
['git', 'reset', '--soft', commit_id],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
return commit_id
|
||||
|
||||
tree = self.repo.index.write_tree()
|
||||
return self.repo.create_commit(
|
||||
"HEAD", author, committer, message, tree, parents
|
||||
)
|
||||
|
||||
def last_commit(self):
|
||||
try:
|
||||
return self.repo.head.target
|
||||
except:
|
||||
return None
|
||||
|
||||
def branch_head(self, branch):
|
||||
return self.repo.references["refs/heads/" + branch].target
|
||||
def branch_head(self, branch='HEAD'):
|
||||
return subprocess.run(
|
||||
['git', 'rev-parse', '--verify', '--end-of-options', branch],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
stdout=subprocess.PIPE
|
||||
).stdout.decode('utf-8').strip()
|
||||
|
||||
def set_branch_head(self, branch, commit):
|
||||
self.repo.references["refs/heads/" + branch].set_target(commit)
|
||||
return subprocess.run(
|
||||
['git', 'branch', '-f', branch, commit],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
|
||||
def gc(self):
|
||||
logging.debug(f"Garbage recollect and repackage {self.path}")
|
||||
@ -121,17 +172,21 @@ class Git:
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
|
||||
def clean(self):
|
||||
for path, _ in self.repo.status().items():
|
||||
logging.debug(f"Cleaning {path}")
|
||||
try:
|
||||
(self.path / path).unlink()
|
||||
self.repo.index.remove(path)
|
||||
except Exception as e:
|
||||
logging.warning(f"Error removing file {path}: {e}")
|
||||
# def clean(self):
|
||||
# for path, _ in self.repo.status().items():
|
||||
# logging.debug(f"Cleaning {path}")
|
||||
# try:
|
||||
# (self.path / path).unlink()
|
||||
# self.repo.index.remove(path)
|
||||
# except Exception as e:
|
||||
# logging.warning(f"Error removing file {path}: {e}")
|
||||
|
||||
def add(self, filename):
|
||||
self.repo.index.add(filename)
|
||||
subprocess.run(
|
||||
['git', 'add', filename],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
|
||||
def add_default_lfs_gitattributes(self, force=False):
|
||||
if not (self.path / ".gitattributes").exists() or force:
|
||||
@ -185,9 +240,11 @@ class Git:
|
||||
return any(fnmatch.fnmatch(filename, line) for line in patterns)
|
||||
|
||||
def remove(self, file: pathlib.Path):
|
||||
self.repo.index.remove(file.name)
|
||||
(self.path / file).unlink()
|
||||
|
||||
subprocess.run(
|
||||
['git', 'rm', '-q', '--ignore-unmatch', file.name],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
patterns = self.get_specific_lfs_gitattributes()
|
||||
if file.name in patterns:
|
||||
patterns.remove(file.name)
|
||||
@ -201,7 +258,7 @@ class Git:
|
||||
logging.warning("Not adding a remote due to missing $GITEA_TOKEN")
|
||||
return
|
||||
|
||||
url = f"https://gitea.opensuse.org/api/v1/org/{org_name}/repos"
|
||||
url = f"https://src.opensuse.org/api/v1/org/{org_name}/repos"
|
||||
response = requests.post(
|
||||
url,
|
||||
data={"name": repo_name},
|
||||
@ -212,20 +269,23 @@ class Git:
|
||||
# 201 Created
|
||||
if response.status_code not in (201, 409):
|
||||
print(response.data)
|
||||
url = f"gitea@gitea.opensuse.org:{org_name}/{repo_name}.git"
|
||||
self.repo.remotes.create("origin", url)
|
||||
url = f"gitea@src.opensuse.org:{org_name}/{repo_name}.git"
|
||||
subprocess.run(
|
||||
['git', 'remote', 'add', 'origin', url],
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
|
||||
def push(self, force=False):
|
||||
remo = self.repo.remotes["origin"]
|
||||
cmd = ['git', 'push'];
|
||||
if force:
|
||||
cmd.append('-f')
|
||||
cmd.append('origin')
|
||||
cmd.append('refs/heads/factory');
|
||||
cmd.append('refs/heads/devel');
|
||||
subprocess.run(
|
||||
cmd,
|
||||
cwd=self.path,
|
||||
check=True,
|
||||
)
|
||||
|
||||
keypair = pygit2.KeypairFromAgent("gitea")
|
||||
callbacks = pygit2.RemoteCallbacks(credentials=keypair)
|
||||
|
||||
refspecs = ["refs/heads/factory"]
|
||||
develspec = "refs/heads/devel"
|
||||
if develspec in self.repo.references:
|
||||
if force:
|
||||
refspecs.append(f"+{develspec}:{develspec}")
|
||||
else:
|
||||
refspecs.append("{develspec}:{develspec}")
|
||||
remo.push(refspecs, callbacks=callbacks)
|
||||
|
@ -1,5 +1,5 @@
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import pathlib
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from lib.db import DB
|
||||
@ -31,6 +31,7 @@ class Importer:
|
||||
self.obs = OBS(api_url)
|
||||
assert project == "openSUSE:Factory"
|
||||
self.refreshed_packages = set()
|
||||
self.gone_packages_set = None
|
||||
|
||||
def import_request(self, number):
|
||||
self.obs.request(number).import_into_db(self.db)
|
||||
@ -105,7 +106,7 @@ class Importer:
|
||||
with self.db.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT * FROM revisions WHERE id IN
|
||||
(SELECT revision_id from linked_revs WHERE linked_id=%s)
|
||||
(SELECT revision_id from linked_revs WHERE linked_id=%s)
|
||||
AND commit_time <= %s ORDER BY commit_time""",
|
||||
(prev.dbid, rev.commit_time),
|
||||
)
|
||||
@ -138,7 +139,7 @@ class Importer:
|
||||
fake_rev = linked.rev + rev.rev / 1000.0
|
||||
comment = f"Updating link to change in {rev.project}/{rev.package} revision {int(rev.rev)}"
|
||||
cur.execute(
|
||||
"""INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,
|
||||
"""INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,
|
||||
commit_time, userid, comment, api_url) VALUES(%s,%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
|
||||
(
|
||||
linked.project,
|
||||
@ -161,10 +162,12 @@ class Importer:
|
||||
(rev.dbid, linked.dbid),
|
||||
)
|
||||
|
||||
def revisions_without_files(self):
|
||||
def revisions_without_files(self, package):
|
||||
logging.debug(f"revisions_without_files({package})")
|
||||
with self.db.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL"
|
||||
"SELECT * FROM revisions WHERE package=%s AND broken=FALSE AND expanded_srcmd5 IS NULL",
|
||||
(package, )
|
||||
)
|
||||
return [DBRevision(self.db, row) for row in cur.fetchall()]
|
||||
|
||||
@ -178,11 +181,11 @@ class Importer:
|
||||
linked_rev = cur.fetchone()
|
||||
if linked_rev:
|
||||
linked_rev = linked_rev[0]
|
||||
list = self.obs.list(
|
||||
obs_dir_list = self.obs.list(
|
||||
rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev
|
||||
)
|
||||
if list:
|
||||
rev.import_dir_list(list)
|
||||
if obs_dir_list:
|
||||
rev.import_dir_list(obs_dir_list)
|
||||
md5 = rev.calculate_files_hash()
|
||||
with self.db.cursor() as cur:
|
||||
cur.execute(
|
||||
@ -196,53 +199,43 @@ class Importer:
|
||||
self.find_linked_revs()
|
||||
|
||||
self.find_fake_revisions()
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
|
||||
fs = [
|
||||
executor.submit(import_rev, self, rev)
|
||||
for rev in self.revisions_without_files()
|
||||
]
|
||||
concurrent.futures.wait(fs)
|
||||
for package in self.packages:
|
||||
for rev in self.revisions_without_files(package):
|
||||
print(f"rev {rev} is without files")
|
||||
self.import_rev(rev)
|
||||
|
||||
def refresh_package(self, project, package):
|
||||
key = f"{project}/{package}"
|
||||
if key in self.refreshed_packages:
|
||||
# refreshing once is good enough
|
||||
return
|
||||
if self.package_gone(key):
|
||||
return
|
||||
logging.debug(f"Refresh {project}/{package}")
|
||||
self.refreshed_packages.add(key)
|
||||
self.update_db_package(project, package)
|
||||
self.fetch_all_linked_packages(project, package)
|
||||
|
||||
def import_into_db(self):
|
||||
for package in self.packages:
|
||||
refresh_package(self, self.project, package)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
|
||||
fs = [
|
||||
executor.submit(refresh_package, self, self.project, package)
|
||||
for package in self.packages
|
||||
]
|
||||
concurrent.futures.wait(fs)
|
||||
self.db.conn.commit()
|
||||
|
||||
self.db.conn.commit()
|
||||
for number in DBRevision.requests_to_fetch(self.db):
|
||||
self.import_request(number)
|
||||
|
||||
fs = [
|
||||
executor.submit(import_request, self, number)
|
||||
for number in DBRevision.requests_to_fetch(self.db)
|
||||
]
|
||||
concurrent.futures.wait(fs)
|
||||
self.db.conn.commit()
|
||||
|
||||
self.db.conn.commit()
|
||||
with self.db.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT DISTINCT source_project,source_package FROM requests
|
||||
WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
|
||||
(self.project, self.packages),
|
||||
)
|
||||
for project, package in cur.fetchall():
|
||||
self.refresh_package(project, package)
|
||||
|
||||
with self.db.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT DISTINCT source_project,source_package FROM requests
|
||||
WHERE id IN (SELECT request_id FROM revisions WHERE project=%s and package = ANY(%s));""",
|
||||
(self.project, self.packages),
|
||||
)
|
||||
fs = [
|
||||
executor.submit(refresh_package, self, project, package)
|
||||
for project, package in cur.fetchall()
|
||||
]
|
||||
concurrent.futures.wait(fs)
|
||||
self.db.conn.commit()
|
||||
|
||||
missing_users = User.missing_users(self.db)
|
||||
@ -254,3 +247,11 @@ class Importer:
|
||||
|
||||
self.fill_file_lists()
|
||||
self.db.conn.commit()
|
||||
|
||||
def package_gone(self, key):
|
||||
if not self.gone_packages_set:
|
||||
self.gone_packages_set = set()
|
||||
with open(pathlib.Path(__file__).parent.parent / "gone-packages.txt") as f:
|
||||
for line in f.readlines():
|
||||
self.gone_packages_set.add(line.strip())
|
||||
return key in self.gone_packages_set
|
||||
|
@ -68,7 +68,7 @@ class LFSOid:
|
||||
row = cur.fetchone()
|
||||
lfs_oid_id = row[0]
|
||||
cur.execute(
|
||||
"""INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)
|
||||
"""INSERT INTO lfs_oid_in_package (package,filename,lfs_oid_id)
|
||||
VALUES (%s,%s,%s)""",
|
||||
(package, filename, lfs_oid_id),
|
||||
)
|
||||
@ -83,7 +83,7 @@ class LFSOid:
|
||||
self.register()
|
||||
|
||||
def check(self):
|
||||
url = f"http://gitea.opensuse.org:9999/check/{self.sha256}/{self.size}"
|
||||
url = f"http://localhost:9999/check/{self.sha256}/{self.size}"
|
||||
response = requests.get(
|
||||
url,
|
||||
timeout=10,
|
||||
@ -127,12 +127,13 @@ class LFSOid:
|
||||
"size": self.size,
|
||||
}
|
||||
|
||||
url = "http://gitea.opensuse.org:9999/register"
|
||||
url = "http://localhost:9999/register"
|
||||
response = requests.post(
|
||||
url,
|
||||
json=data,
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
logging.info(f"Register LFS returned {response.status_code}")
|
||||
|
||||
|
||||
@ -167,7 +168,7 @@ if __name__ == "__main__":
|
||||
cur.execute(
|
||||
"""
|
||||
CREATE TEMPORARY TABLE lfs_oid_in_revision (
|
||||
revision_id INTEGER,
|
||||
revision_id INTEGER,
|
||||
lfs_oid_id INTEGER NOT NULL,
|
||||
name VARCHAR(255) NOT NULL
|
||||
)
|
||||
|
26
lib/obs.py
26
lib/obs.py
@ -148,12 +148,28 @@ class OBS:
|
||||
]
|
||||
|
||||
def _download(self, project, package, name, revision):
|
||||
# the object might be deleted but we can only pass deleted=1
|
||||
# if it is actually deleted
|
||||
deleted = 0
|
||||
while deleted < 2:
|
||||
url = osc.core.makeurl(
|
||||
self.url,
|
||||
["source", project, package, urllib.parse.quote(name)],
|
||||
{"rev": revision, "expand": 1, "deleted": deleted if deleted else ()},
|
||||
)
|
||||
try:
|
||||
osc.core.http_request("HEAD", url)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
deleted += 1
|
||||
|
||||
url = osc.core.makeurl(
|
||||
self.url,
|
||||
["source", project, package, urllib.parse.quote(name)],
|
||||
{"rev": revision, "expand": 1},
|
||||
)
|
||||
return osc.core.http_GET(url)
|
||||
self.url,
|
||||
["source", project, package, urllib.parse.quote(name)],
|
||||
{"rev": revision, "expand": 1, "deleted": 1 if deleted else ()},
|
||||
)
|
||||
return osc.core.http_request("GET", url)
|
||||
|
||||
def download(
|
||||
self,
|
||||
|
@ -7,8 +7,6 @@ except:
|
||||
print("Install python3-python-magic, not python3-magic")
|
||||
raise
|
||||
|
||||
import requests
|
||||
|
||||
from lib.db import DB
|
||||
from lib.lfs_oid import LFSOid
|
||||
from lib.obs import OBS
|
||||
|
@ -114,7 +114,7 @@ class TreeBuilder:
|
||||
candidates.append(node)
|
||||
if node.merged_into:
|
||||
# we can't have candidates that are crossing previous merges
|
||||
# see https://gitea.opensuse.org/importers/git-importer/issues/14
|
||||
# see https://src.opensuse.org/importers/git-importer/issues/14
|
||||
candidates = []
|
||||
node = node.parent
|
||||
if candidates:
|
||||
|
Reference in New Issue
Block a user