optimize cached file locations and add option for cache directory

This commit is contained in:
Nico Krapp 2022-11-03 13:17:49 +01:00
parent 7678967ae0
commit 639096b548
No known key found for this signature in database
GPG Key ID: AC35CFFF55212BC7
3 changed files with 30 additions and 18 deletions

View File

@ -52,6 +52,13 @@ def main():
type=pathlib.Path,
help="Local git repository directory",
)
parser.add_argument(
"-c",
"--cachedir",
required=False,
type=pathlib.Path,
help="Local cache directory",
)
parser.add_argument(
"-g",
"--gc",
@ -93,9 +100,12 @@ def main():
if not args.repodir:
args.repodir = pathlib.Path("repos") / args.package
if not args.cachedir:
args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser()
importer = Importer(URL_OBS, "openSUSE:Factory", args.package)
importer.import_into_db()
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir)
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir)
exporter.set_gc_interval(args.gc)
exporter.export_as_git()

View File

@ -12,7 +12,7 @@ from lib.tree_builder import TreeBuilder
class GitExporter:
def __init__(self, api_url, project, package, repodir):
def __init__(self, api_url, project, package, repodir, cachedir):
self.obs = OBS()
self.project = project
self.package = package
@ -26,6 +26,7 @@ class GitExporter:
).create()
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
self.gc_interval = 200
self.cachedir = cachedir
def download(self, revision):
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
@ -40,10 +41,7 @@ class GitExporter:
# Download each file in OBS if it is not a binary (or large)
# file
for (name, size, file_md5) in obs_files:
# Validate the MD5 of the downloaded file
if md5(self.git.path / name) != file_md5:
raise Exception(f"Download error in {name}")
for name in obs_files:
self.git.add(name)
def set_gc_interval(self, gc):
@ -121,6 +119,7 @@ class GitExporter:
file.name,
flat.commit.expanded_srcmd5,
self.git.path,
self.cachedir,
file_md5=md5,
)
self.git.add(file)

View File

@ -6,6 +6,7 @@ import urllib.parse
import xml.etree.ElementTree as ET
from urllib.error import HTTPError
from pathlib import Path
from lib.proxy_sha256 import md5
import osc.core
@ -160,16 +161,22 @@ class OBS:
name: str,
revision: str,
dirpath: str,
cachedir: str,
file_md5: str,
) -> None:
cached_file = self._path_from_md5(name, dirpath, file_md5)
if not self.in_cache(name, dirpath, file_md5):
cached_file = self._path_from_md5(name, cachedir, file_md5)
if not self.in_cache(name, cachedir, file_md5):
with (dirpath / name).open("wb") as f:
f.write(self._download(project, package, name, revision).read())
shutil.copy(dirpath / name, cached_file)
else:
shutil.copy(cached_file, dirpath / name)
# Validate the MD5 of the downloaded file
if md5(dirpath / name) != file_md5:
raise Exception(f"Download error in {name}")
def list(self, project, package, srcmd5, linkrev):
params = {"rev": srcmd5, "expand": "1"}
if linkrev:
@ -187,15 +194,11 @@ class OBS:
return root
def _path_from_md5(self, name, dirpath, md5):
cache = dirpath.joinpath(".cache/")
if not Path(cache).exists():
cache.mkdir()
filepath = cache.joinpath(f"{md5[0:3]}/{md5[3:6]}/{md5[6:9]}/")
def _path_from_md5(self, name, cachedir, md5):
filepath = cachedir / md5[:3]
cached_file = f"{md5[3:]}-{name}"
filepath.mkdir(parents=True, exist_ok=True)
return filepath.joinpath(f"{md5[9:]}-{name}")
return filepath / cached_file
def in_cache(self, name, dirpath, md5):
if self._path_from_md5(name, dirpath, md5).is_file():
return True
return False
def in_cache(self, name, cachedir, md5):
return self._path_from_md5(name, cachedir, md5).exists()