diff --git a/git-importer.py b/git-importer.py index eabadbf..12b1910 100755 --- a/git-importer.py +++ b/git-importer.py @@ -52,6 +52,13 @@ def main(): type=pathlib.Path, help="Local git repository directory", ) + parser.add_argument( + "-c", + "--cachedir", + required=False, + type=pathlib.Path, + help="Local cache directory", + ) parser.add_argument( "-g", "--gc", @@ -93,9 +100,12 @@ def main(): if not args.repodir: args.repodir = pathlib.Path("repos") / args.package + if not args.cachedir: + args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser() + importer = Importer(URL_OBS, "openSUSE:Factory", args.package) importer.import_into_db() - exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir) + exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir) exporter.set_gc_interval(args.gc) exporter.export_as_git() diff --git a/lib/git_exporter.py b/lib/git_exporter.py index 4b14720..88cfc23 100644 --- a/lib/git_exporter.py +++ b/lib/git_exporter.py @@ -12,7 +12,7 @@ from lib.tree_builder import TreeBuilder class GitExporter: - def __init__(self, api_url, project, package, repodir): + def __init__(self, api_url, project, package, repodir, cachedir): self.obs = OBS() self.project = project self.package = package @@ -26,6 +26,7 @@ class GitExporter: ).create() self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml") self.gc_interval = 200 + self.cachedir = cachedir def download(self, revision): obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5) @@ -40,10 +41,7 @@ class GitExporter: # Download each file in OBS if it is not a binary (or large) # file - for (name, size, file_md5) in obs_files: - # Validate the MD5 of the downloaded file - if md5(self.git.path / name) != file_md5: - raise Exception(f"Download error in {name}") + for name in obs_files: self.git.add(name) def set_gc_interval(self, gc): @@ -121,6 +119,7 @@ class GitExporter: file.name, flat.commit.expanded_srcmd5, self.git.path, + self.cachedir, file_md5=md5, ) self.git.add(file) diff --git a/lib/obs.py b/lib/obs.py index 05af330..73e6d8e 100644 --- a/lib/obs.py +++ b/lib/obs.py @@ -1,9 +1,12 @@ import errno import logging +import shutil import time import urllib.parse import xml.etree.ElementTree as ET from urllib.error import HTTPError +from pathlib import Path +from lib.proxy_sha256 import md5 import osc.core @@ -158,10 +161,21 @@ class OBS: name: str, revision: str, dirpath: str, + cachedir: str, file_md5: str, ) -> None: - with (dirpath / name).open("wb") as f: - f.write(self._download(project, package, name, revision).read()) + + cached_file = self._path_from_md5(name, cachedir, file_md5) + if not self.in_cache(name, cachedir, file_md5): + with (dirpath / name).open("wb") as f: + f.write(self._download(project, package, name, revision).read()) + shutil.copy(dirpath / name, cached_file) + else: + shutil.copy(cached_file, dirpath / name) + + # Validate the MD5 of the downloaded file + if md5(dirpath / name) != file_md5: + raise Exception(f"Download error in {name}") def list(self, project, package, srcmd5, linkrev): params = {"rev": srcmd5, "expand": "1"} @@ -179,3 +193,11 @@ class OBS: raise e return root + + def _path_from_md5(self, name, cachedir, md5): + filepath = cachedir / md5[:3] + filepath.mkdir(parents=True, exist_ok=True) + return filepath / md5[3:] + + def in_cache(self, name, cachedir, md5): + return self._path_from_md5(name, cachedir, md5).exists()