forked from importers/git-importer
Split out History class
This commit is contained in:
parent
13f49f59c9
commit
bdc1dc0cc9
86
binary.py
Normal file
86
binary.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import pathlib
|
||||||
|
|
||||||
|
BINARY = {
|
||||||
|
".7z",
|
||||||
|
".bsp",
|
||||||
|
".bz2",
|
||||||
|
".gem",
|
||||||
|
".gz",
|
||||||
|
".jar",
|
||||||
|
".lz",
|
||||||
|
".lzma",
|
||||||
|
".obscpio",
|
||||||
|
".oxt",
|
||||||
|
".pdf",
|
||||||
|
".png",
|
||||||
|
".rpm",
|
||||||
|
".tbz",
|
||||||
|
".tbz2",
|
||||||
|
".tgz",
|
||||||
|
".ttf",
|
||||||
|
".txz",
|
||||||
|
".whl",
|
||||||
|
".xz",
|
||||||
|
".zip",
|
||||||
|
".zst",
|
||||||
|
}
|
||||||
|
|
||||||
|
def is_binary_or_large(filename, size):
|
||||||
|
"""Decide if is a binary file based on the extension or size"""
|
||||||
|
binary_suffix = BINARY
|
||||||
|
non_binary_suffix = {
|
||||||
|
".1",
|
||||||
|
".8",
|
||||||
|
".SUSE",
|
||||||
|
".asc",
|
||||||
|
".c",
|
||||||
|
".cabal",
|
||||||
|
".cfg",
|
||||||
|
".changes",
|
||||||
|
".conf",
|
||||||
|
".desktop",
|
||||||
|
".dif",
|
||||||
|
".diff",
|
||||||
|
".dsc",
|
||||||
|
".el",
|
||||||
|
".html",
|
||||||
|
".in",
|
||||||
|
".init",
|
||||||
|
".install",
|
||||||
|
".keyring",
|
||||||
|
".kiwi",
|
||||||
|
".logrotate",
|
||||||
|
".macros",
|
||||||
|
".md",
|
||||||
|
".obsinfo",
|
||||||
|
".pamd",
|
||||||
|
".patch",
|
||||||
|
".pl",
|
||||||
|
".pom",
|
||||||
|
".py",
|
||||||
|
".rpmlintrc",
|
||||||
|
".rules",
|
||||||
|
".script",
|
||||||
|
".service",
|
||||||
|
".sh",
|
||||||
|
".sig",
|
||||||
|
".sign",
|
||||||
|
".spec",
|
||||||
|
".sysconfig",
|
||||||
|
".test",
|
||||||
|
".txt",
|
||||||
|
".xml",
|
||||||
|
".xml",
|
||||||
|
".yml",
|
||||||
|
}
|
||||||
|
|
||||||
|
suffix = pathlib.Path(filename).suffix
|
||||||
|
if suffix in binary_suffix:
|
||||||
|
return True
|
||||||
|
if suffix in non_binary_suffix:
|
||||||
|
return False
|
||||||
|
if size >= 6 * 1024:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
191
git-importer.py
191
git-importer.py
@ -4,10 +4,8 @@ import argparse
|
|||||||
import errno
|
import errno
|
||||||
import functools
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import itertools
|
|
||||||
import logging
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
@ -19,7 +17,9 @@ import osc.core
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from request import Request
|
from request import Request
|
||||||
from revision import Revision
|
from git import Git
|
||||||
|
from history import History
|
||||||
|
from binary import is_binary_or_large
|
||||||
|
|
||||||
# Add a retry wrapper for some of the HTTP actions.
|
# Add a retry wrapper for some of the HTTP actions.
|
||||||
def retry(func):
|
def retry(func):
|
||||||
@ -62,32 +62,6 @@ def retry(func):
|
|||||||
osc.core.http_GET = retry(osc.core.http_GET)
|
osc.core.http_GET = retry(osc.core.http_GET)
|
||||||
|
|
||||||
|
|
||||||
BINARY = {
|
|
||||||
".7z",
|
|
||||||
".bsp",
|
|
||||||
".bz2",
|
|
||||||
".gem",
|
|
||||||
".gz",
|
|
||||||
".jar",
|
|
||||||
".lz",
|
|
||||||
".lzma",
|
|
||||||
".obscpio",
|
|
||||||
".oxt",
|
|
||||||
".pdf",
|
|
||||||
".png",
|
|
||||||
".rpm",
|
|
||||||
".tbz",
|
|
||||||
".tbz2",
|
|
||||||
".tgz",
|
|
||||||
".ttf",
|
|
||||||
".txz",
|
|
||||||
".whl",
|
|
||||||
".xz",
|
|
||||||
".zip",
|
|
||||||
".zst",
|
|
||||||
}
|
|
||||||
|
|
||||||
LFS_SUFFIX = "filter=lfs diff=lfs merge=lfs -text"
|
|
||||||
|
|
||||||
URL_OBS = "https://api.opensuse.org"
|
URL_OBS = "https://api.opensuse.org"
|
||||||
URL_IBS = "https://api.suse.de"
|
URL_IBS = "https://api.suse.de"
|
||||||
@ -120,66 +94,6 @@ PROJECTS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def is_binary_or_large(filename, size):
|
|
||||||
"""Decide if is a binary file based on the extension or size"""
|
|
||||||
binary_suffix = BINARY
|
|
||||||
non_binary_suffix = {
|
|
||||||
".1",
|
|
||||||
".8",
|
|
||||||
".SUSE",
|
|
||||||
".asc",
|
|
||||||
".c",
|
|
||||||
".cabal",
|
|
||||||
".cfg",
|
|
||||||
".changes",
|
|
||||||
".conf",
|
|
||||||
".desktop",
|
|
||||||
".dif",
|
|
||||||
".diff",
|
|
||||||
".dsc",
|
|
||||||
".el",
|
|
||||||
".html",
|
|
||||||
".in",
|
|
||||||
".init",
|
|
||||||
".install",
|
|
||||||
".keyring",
|
|
||||||
".kiwi",
|
|
||||||
".logrotate",
|
|
||||||
".macros",
|
|
||||||
".md",
|
|
||||||
".obsinfo",
|
|
||||||
".pamd",
|
|
||||||
".patch",
|
|
||||||
".pl",
|
|
||||||
".pom",
|
|
||||||
".py",
|
|
||||||
".rpmlintrc",
|
|
||||||
".rules",
|
|
||||||
".script",
|
|
||||||
".service",
|
|
||||||
".sh",
|
|
||||||
".sig",
|
|
||||||
".sign",
|
|
||||||
".spec",
|
|
||||||
".sysconfig",
|
|
||||||
".test",
|
|
||||||
".txt",
|
|
||||||
".xml",
|
|
||||||
".xml",
|
|
||||||
".yml",
|
|
||||||
}
|
|
||||||
|
|
||||||
suffix = pathlib.Path(filename).suffix
|
|
||||||
if suffix in binary_suffix:
|
|
||||||
return True
|
|
||||||
if suffix in non_binary_suffix:
|
|
||||||
return False
|
|
||||||
if size >= 6 * 1024:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _hash(hash_alg, file_or_path):
|
def _hash(hash_alg, file_or_path):
|
||||||
h = hash_alg()
|
h = hash_alg()
|
||||||
|
|
||||||
@ -380,105 +294,6 @@ class ProxySHA256:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
class History:
|
|
||||||
"""Store the history of revisions of a package in different
|
|
||||||
projects.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, obs, package):
|
|
||||||
self.obs = obs
|
|
||||||
self.package = package
|
|
||||||
|
|
||||||
self.revisions = {}
|
|
||||||
|
|
||||||
def __contains__(self, project):
|
|
||||||
return project in self.revisions
|
|
||||||
|
|
||||||
def __getitem__(self, project):
|
|
||||||
return self.revisions[project]
|
|
||||||
|
|
||||||
def _extract_copypac(self, comment):
|
|
||||||
original_project = re.findall(
|
|
||||||
r"osc copypac from project:(.*) package:", comment
|
|
||||||
)
|
|
||||||
return original_project[0] if original_project else None
|
|
||||||
|
|
||||||
def _fetch_revisions(self, project, **params):
|
|
||||||
root = self.obs._history(project, self.package, **params)
|
|
||||||
if root is not None:
|
|
||||||
return [
|
|
||||||
Revision(self.obs, self, project, self.package).parse(r)
|
|
||||||
for r in root.findall("revision")
|
|
||||||
]
|
|
||||||
|
|
||||||
def fetch_revisions(self, project, follow_copypac=False):
|
|
||||||
"""Get the revision history of a package"""
|
|
||||||
if project in self:
|
|
||||||
return
|
|
||||||
|
|
||||||
revs = self._fetch_revisions(project)
|
|
||||||
self.revisions[project] = revs
|
|
||||||
# while (
|
|
||||||
# revs
|
|
||||||
# and follow_copypac
|
|
||||||
# and (copypac_project := self._extract_copypac(revs[0].comment))
|
|
||||||
# ):
|
|
||||||
# # Add the history pre-copypac
|
|
||||||
# # TODO: missing the old project name
|
|
||||||
# revs = self._fetch_revisions(copypac_project, deleted=1)
|
|
||||||
# self.revisions[project] = (
|
|
||||||
# revs + self.revisions[project]
|
|
||||||
# )
|
|
||||||
|
|
||||||
def fetch_all_revisions(self, projects):
|
|
||||||
"""Pre-populate the history"""
|
|
||||||
for project, _, api_url in projects:
|
|
||||||
self.obs.change_url(api_url)
|
|
||||||
self.fetch_revisions(project)
|
|
||||||
|
|
||||||
def sort_all_revisions(self):
|
|
||||||
"""Sort revisions for all projects, from older to newer"""
|
|
||||||
return sorted(itertools.chain(*self.revisions.values()), key=lambda x: x.time)
|
|
||||||
|
|
||||||
def find_revision(self, project, revisionid, accepted_at):
|
|
||||||
last_commited_revision = None
|
|
||||||
for r in self.revisions.get(project, []):
|
|
||||||
logging.debug(f"Find revision {revisionid} [{accepted_at}]: {r}")
|
|
||||||
if str(r.rev) == str(revisionid) or r.srcmd5 == revisionid:
|
|
||||||
if r.ignored:
|
|
||||||
logging.debug(
|
|
||||||
f"{r} fits but is ignored, returning {last_commited_revision}"
|
|
||||||
)
|
|
||||||
return last_commited_revision
|
|
||||||
else:
|
|
||||||
logging.debug(f"{r} fits")
|
|
||||||
return r
|
|
||||||
if r.time > accepted_at:
|
|
||||||
# if we can't find the right revision, we take the last
|
|
||||||
# commit. Before ~2012 the data was tracked really loosely
|
|
||||||
# (e.g. using different timezones and the state field was
|
|
||||||
# only introduced in 2016...)
|
|
||||||
logging.warning(
|
|
||||||
f"Deploying workaround for missing request revision - returning {last_commited_revision}"
|
|
||||||
)
|
|
||||||
return last_commited_revision
|
|
||||||
if r.commit:
|
|
||||||
last_commited_revision = r
|
|
||||||
logging.info("No commited revision found, returning None")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def find_last_rev_after_time(self, project, time):
|
|
||||||
# revs = self.projects.get(project, [])
|
|
||||||
# return next((r for r in reversed(revs) if r.time <= time), None)
|
|
||||||
prev = None
|
|
||||||
for rev in self.revisions.get(project, []):
|
|
||||||
if rev.time > time:
|
|
||||||
return prev
|
|
||||||
if rev.time == time:
|
|
||||||
return rev
|
|
||||||
prev = rev
|
|
||||||
return prev
|
|
||||||
|
|
||||||
|
|
||||||
class Importer:
|
class Importer:
|
||||||
|
3
git.py
3
git.py
@ -4,6 +4,9 @@ import logging
|
|||||||
import subprocess
|
import subprocess
|
||||||
import fnmatch
|
import fnmatch
|
||||||
|
|
||||||
|
from binary import BINARY
|
||||||
|
|
||||||
|
LFS_SUFFIX = "filter=lfs diff=lfs merge=lfs -text"
|
||||||
|
|
||||||
class Git:
|
class Git:
|
||||||
"""Local git repository"""
|
"""Local git repository"""
|
||||||
|
106
history.py
Normal file
106
history.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
import re
|
||||||
|
import logging
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from revision import Revision
|
||||||
|
|
||||||
|
|
||||||
|
class History:
|
||||||
|
"""Store the history of revisions of a package in different
|
||||||
|
projects.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, obs, package):
|
||||||
|
self.obs = obs
|
||||||
|
self.package = package
|
||||||
|
|
||||||
|
self.revisions = {}
|
||||||
|
|
||||||
|
def __contains__(self, project):
|
||||||
|
return project in self.revisions
|
||||||
|
|
||||||
|
def __getitem__(self, project):
|
||||||
|
return self.revisions[project]
|
||||||
|
|
||||||
|
def _extract_copypac(self, comment):
|
||||||
|
original_project = re.findall(
|
||||||
|
r"osc copypac from project:(.*) package:", comment
|
||||||
|
)
|
||||||
|
return original_project[0] if original_project else None
|
||||||
|
|
||||||
|
def _fetch_revisions(self, project, **params):
|
||||||
|
root = self.obs._history(project, self.package, **params)
|
||||||
|
if root is not None:
|
||||||
|
return [
|
||||||
|
Revision(self.obs, self, project, self.package).parse(r)
|
||||||
|
for r in root.findall("revision")
|
||||||
|
]
|
||||||
|
|
||||||
|
def fetch_revisions(self, project, follow_copypac=False):
|
||||||
|
"""Get the revision history of a package"""
|
||||||
|
if project in self:
|
||||||
|
return
|
||||||
|
|
||||||
|
revs = self._fetch_revisions(project)
|
||||||
|
self.revisions[project] = revs
|
||||||
|
# while (
|
||||||
|
# revs
|
||||||
|
# and follow_copypac
|
||||||
|
# and (copypac_project := self._extract_copypac(revs[0].comment))
|
||||||
|
# ):
|
||||||
|
# # Add the history pre-copypac
|
||||||
|
# # TODO: missing the old project name
|
||||||
|
# revs = self._fetch_revisions(copypac_project, deleted=1)
|
||||||
|
# self.revisions[project] = (
|
||||||
|
# revs + self.revisions[project]
|
||||||
|
# )
|
||||||
|
|
||||||
|
def fetch_all_revisions(self, projects):
|
||||||
|
"""Pre-populate the history"""
|
||||||
|
for project, _, api_url in projects:
|
||||||
|
self.obs.change_url(api_url)
|
||||||
|
self.fetch_revisions(project)
|
||||||
|
|
||||||
|
def sort_all_revisions(self):
|
||||||
|
"""Sort revisions for all projects, from older to newer"""
|
||||||
|
return sorted(itertools.chain(*self.revisions.values()), key=lambda x: x.time)
|
||||||
|
|
||||||
|
def find_revision(self, project, revisionid, accepted_at):
|
||||||
|
last_commited_revision = None
|
||||||
|
for r in self.revisions.get(project, []):
|
||||||
|
logging.debug(f"Find revision {revisionid} [{accepted_at}]: {r}")
|
||||||
|
if str(r.rev) == str(revisionid) or r.srcmd5 == revisionid:
|
||||||
|
if r.ignored:
|
||||||
|
logging.debug(
|
||||||
|
f"{r} fits but is ignored, returning {last_commited_revision}"
|
||||||
|
)
|
||||||
|
return last_commited_revision
|
||||||
|
else:
|
||||||
|
logging.debug(f"{r} fits")
|
||||||
|
return r
|
||||||
|
if r.time > accepted_at:
|
||||||
|
# if we can't find the right revision, we take the last
|
||||||
|
# commit. Before ~2012 the data was tracked really loosely
|
||||||
|
# (e.g. using different timezones and the state field was
|
||||||
|
# only introduced in 2016...)
|
||||||
|
logging.warning(
|
||||||
|
f"Deploying workaround for missing request revision - returning {last_commited_revision}"
|
||||||
|
)
|
||||||
|
return last_commited_revision
|
||||||
|
if r.commit:
|
||||||
|
last_commited_revision = r
|
||||||
|
logging.info("No commited revision found, returning None")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def find_last_rev_after_time(self, project, time):
|
||||||
|
# revs = self.projects.get(project, [])
|
||||||
|
# return next((r for r in reversed(revs) if r.time <= time), None)
|
||||||
|
prev = None
|
||||||
|
for rev in self.revisions.get(project, []):
|
||||||
|
if rev.time > time:
|
||||||
|
return prev
|
||||||
|
if rev.time == time:
|
||||||
|
return rev
|
||||||
|
prev = rev
|
||||||
|
return prev
|
Loading…
Reference in New Issue
Block a user