Merge pull request 'implement file caching' (#11) from file-cache into main

Reviewed-on: https://gitea.opensuse.org/importers/git-importer/pulls/11
This commit is contained in:
Nico Krapp 2022-11-03 14:24:22 +01:00
commit 6dd3cf3eba
3 changed files with 39 additions and 8 deletions

View File

@ -52,6 +52,13 @@ def main():
type=pathlib.Path,
help="Local git repository directory",
)
parser.add_argument(
"-c",
"--cachedir",
required=False,
type=pathlib.Path,
help="Local cache directory",
)
parser.add_argument(
"-g",
"--gc",
@ -93,9 +100,12 @@ def main():
if not args.repodir:
args.repodir = pathlib.Path("repos") / args.package
if not args.cachedir:
args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser()
importer = Importer(URL_OBS, "openSUSE:Factory", args.package)
importer.import_into_db()
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir)
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir)
exporter.set_gc_interval(args.gc)
exporter.export_as_git()

View File

@ -12,7 +12,7 @@ from lib.tree_builder import TreeBuilder
class GitExporter:
def __init__(self, api_url, project, package, repodir):
def __init__(self, api_url, project, package, repodir, cachedir):
self.obs = OBS()
self.project = project
self.package = package
@ -26,6 +26,7 @@ class GitExporter:
).create()
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
self.gc_interval = 200
self.cachedir = cachedir
def download(self, revision):
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
@ -40,10 +41,7 @@ class GitExporter:
# Download each file in OBS if it is not a binary (or large)
# file
for (name, size, file_md5) in obs_files:
# Validate the MD5 of the downloaded file
if md5(self.git.path / name) != file_md5:
raise Exception(f"Download error in {name}")
for name in obs_files:
self.git.add(name)
def set_gc_interval(self, gc):
@ -121,6 +119,7 @@ class GitExporter:
file.name,
flat.commit.expanded_srcmd5,
self.git.path,
self.cachedir,
file_md5=md5,
)
self.git.add(file)

View File

@ -1,9 +1,12 @@
import errno
import logging
import shutil
import time
import urllib.parse
import xml.etree.ElementTree as ET
from urllib.error import HTTPError
from pathlib import Path
from lib.proxy_sha256 import md5
import osc.core
@ -158,10 +161,21 @@ class OBS:
name: str,
revision: str,
dirpath: str,
cachedir: str,
file_md5: str,
) -> None:
with (dirpath / name).open("wb") as f:
f.write(self._download(project, package, name, revision).read())
cached_file = self._path_from_md5(name, cachedir, file_md5)
if not self.in_cache(name, cachedir, file_md5):
with (dirpath / name).open("wb") as f:
f.write(self._download(project, package, name, revision).read())
shutil.copy(dirpath / name, cached_file)
else:
shutil.copy(cached_file, dirpath / name)
# Validate the MD5 of the downloaded file
if md5(dirpath / name) != file_md5:
raise Exception(f"Download error in {name}")
def list(self, project, package, srcmd5, linkrev):
params = {"rev": srcmd5, "expand": "1"}
@ -179,3 +193,11 @@ class OBS:
raise e
return root
def _path_from_md5(self, name, cachedir, md5):
filepath = cachedir / md5[:3]
filepath.mkdir(parents=True, exist_ok=True)
return filepath / md5[3:]
def in_cache(self, name, cachedir, md5):
return self._path_from_md5(name, cachedir, md5).exists()