2022-11-03 14:24:22 +01:00 · 2022-11-02 17:26:17 +01:00 · 2022-11-03 08:04:30 +01:00 · 2022-11-03 10:59:29 +01:00 · 2022-11-03 11:01:03 +01:00 · 2022-11-03 11:12:55 +01:00
3 changed files with 39 additions and 8 deletions
--- a/git-importer.py
+++ b/git-importer.py
@ -52,6 +52,13 @@ def main():
        type=pathlib.Path,
        help="Local git repository directory",
    )
+    parser.add_argument(
+        "-c",
+        "--cachedir",
+        required=False,
+        type=pathlib.Path,
+        help="Local cache directory",
+    )
    parser.add_argument(
        "-g",
        "--gc",
@ -93,9 +100,12 @@ def main():
    if not args.repodir:
        args.repodir = pathlib.Path("repos") / args.package

+    if not args.cachedir:
+        args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser()
+
    importer = Importer(URL_OBS, "openSUSE:Factory", args.package)
    importer.import_into_db()
-    exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir)
+    exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir)
    exporter.set_gc_interval(args.gc)
    exporter.export_as_git()

--- a/lib/git_exporter.py
+++ b/lib/git_exporter.py
@ -12,7 +12,7 @@ from lib.tree_builder import TreeBuilder


 class GitExporter:
-    def __init__(self, api_url, project, package, repodir):
+    def __init__(self, api_url, project, package, repodir, cachedir):
        self.obs = OBS()
        self.project = project
        self.package = package
@ -26,6 +26,7 @@ class GitExporter:
        ).create()
        self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
        self.gc_interval = 200
+        self.cachedir = cachedir

    def download(self, revision):
        obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
@ -40,10 +41,7 @@ class GitExporter:

        # Download each file in OBS if it is not a binary (or large)
        # file
-        for (name, size, file_md5) in obs_files:
-            # Validate the MD5 of the downloaded file
-            if md5(self.git.path / name) != file_md5:
-                raise Exception(f"Download error in {name}")
+        for name in obs_files:
            self.git.add(name)

    def set_gc_interval(self, gc):
@ -121,6 +119,7 @@ class GitExporter:
                    file.name,
                    flat.commit.expanded_srcmd5,
                    self.git.path,
+                    self.cachedir,
                    file_md5=md5,
                )
                self.git.add(file)
--- a/lib/obs.py
+++ b/lib/obs.py
@ -1,9 +1,12 @@
 import errno
 import logging
+import shutil
 import time
 import urllib.parse
 import xml.etree.ElementTree as ET
 from urllib.error import HTTPError
+from pathlib import Path
+from lib.proxy_sha256 import md5

 import osc.core

@ -158,10 +161,21 @@ class OBS:
        name: str,
        revision: str,
        dirpath: str,
+        cachedir: str,
        file_md5: str,
    ) -> None:
-        with (dirpath / name).open("wb") as f:
-            f.write(self._download(project, package, name, revision).read())
+        
+        cached_file = self._path_from_md5(name, cachedir, file_md5)
+        if not self.in_cache(name, cachedir, file_md5):
+            with (dirpath / name).open("wb") as f:
+                f.write(self._download(project, package, name, revision).read())
+            shutil.copy(dirpath / name, cached_file)
+        else:
+            shutil.copy(cached_file, dirpath / name)
+
+        # Validate the MD5 of the downloaded file
+        if md5(dirpath / name) != file_md5:
+            raise Exception(f"Download error in {name}")

    def list(self, project, package, srcmd5, linkrev):
        params = {"rev": srcmd5, "expand": "1"}
@ -179,3 +193,11 @@ class OBS:
            raise e

        return root
+
+    def _path_from_md5(self, name, cachedir, md5):
+        filepath = cachedir / md5[:3]
+        filepath.mkdir(parents=True, exist_ok=True)
+        return filepath / md5[3:]
+
+    def in_cache(self, name, cachedir, md5):
+        return self._path_from_md5(name, cachedir, md5).exists()