forked from importers/git-importer
optimize cached file locations and add option for cache directory
This commit is contained in:
parent
7678967ae0
commit
639096b548
@ -52,6 +52,13 @@ def main():
|
||||
type=pathlib.Path,
|
||||
help="Local git repository directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--cachedir",
|
||||
required=False,
|
||||
type=pathlib.Path,
|
||||
help="Local cache directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-g",
|
||||
"--gc",
|
||||
@ -93,9 +100,12 @@ def main():
|
||||
if not args.repodir:
|
||||
args.repodir = pathlib.Path("repos") / args.package
|
||||
|
||||
if not args.cachedir:
|
||||
args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser()
|
||||
|
||||
importer = Importer(URL_OBS, "openSUSE:Factory", args.package)
|
||||
importer.import_into_db()
|
||||
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir)
|
||||
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir)
|
||||
exporter.set_gc_interval(args.gc)
|
||||
exporter.export_as_git()
|
||||
|
||||
|
@ -12,7 +12,7 @@ from lib.tree_builder import TreeBuilder
|
||||
|
||||
|
||||
class GitExporter:
|
||||
def __init__(self, api_url, project, package, repodir):
|
||||
def __init__(self, api_url, project, package, repodir, cachedir):
|
||||
self.obs = OBS()
|
||||
self.project = project
|
||||
self.package = package
|
||||
@ -26,6 +26,7 @@ class GitExporter:
|
||||
).create()
|
||||
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
||||
self.gc_interval = 200
|
||||
self.cachedir = cachedir
|
||||
|
||||
def download(self, revision):
|
||||
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
||||
@ -40,10 +41,7 @@ class GitExporter:
|
||||
|
||||
# Download each file in OBS if it is not a binary (or large)
|
||||
# file
|
||||
for (name, size, file_md5) in obs_files:
|
||||
# Validate the MD5 of the downloaded file
|
||||
if md5(self.git.path / name) != file_md5:
|
||||
raise Exception(f"Download error in {name}")
|
||||
for name in obs_files:
|
||||
self.git.add(name)
|
||||
|
||||
def set_gc_interval(self, gc):
|
||||
@ -121,6 +119,7 @@ class GitExporter:
|
||||
file.name,
|
||||
flat.commit.expanded_srcmd5,
|
||||
self.git.path,
|
||||
self.cachedir,
|
||||
file_md5=md5,
|
||||
)
|
||||
self.git.add(file)
|
||||
|
27
lib/obs.py
27
lib/obs.py
@ -6,6 +6,7 @@ import urllib.parse
|
||||
import xml.etree.ElementTree as ET
|
||||
from urllib.error import HTTPError
|
||||
from pathlib import Path
|
||||
from lib.proxy_sha256 import md5
|
||||
|
||||
import osc.core
|
||||
|
||||
@ -160,16 +161,22 @@ class OBS:
|
||||
name: str,
|
||||
revision: str,
|
||||
dirpath: str,
|
||||
cachedir: str,
|
||||
file_md5: str,
|
||||
) -> None:
|
||||
cached_file = self._path_from_md5(name, dirpath, file_md5)
|
||||
if not self.in_cache(name, dirpath, file_md5):
|
||||
|
||||
cached_file = self._path_from_md5(name, cachedir, file_md5)
|
||||
if not self.in_cache(name, cachedir, file_md5):
|
||||
with (dirpath / name).open("wb") as f:
|
||||
f.write(self._download(project, package, name, revision).read())
|
||||
shutil.copy(dirpath / name, cached_file)
|
||||
else:
|
||||
shutil.copy(cached_file, dirpath / name)
|
||||
|
||||
# Validate the MD5 of the downloaded file
|
||||
if md5(dirpath / name) != file_md5:
|
||||
raise Exception(f"Download error in {name}")
|
||||
|
||||
def list(self, project, package, srcmd5, linkrev):
|
||||
params = {"rev": srcmd5, "expand": "1"}
|
||||
if linkrev:
|
||||
@ -187,15 +194,11 @@ class OBS:
|
||||
|
||||
return root
|
||||
|
||||
def _path_from_md5(self, name, dirpath, md5):
|
||||
cache = dirpath.joinpath(".cache/")
|
||||
if not Path(cache).exists():
|
||||
cache.mkdir()
|
||||
filepath = cache.joinpath(f"{md5[0:3]}/{md5[3:6]}/{md5[6:9]}/")
|
||||
def _path_from_md5(self, name, cachedir, md5):
|
||||
filepath = cachedir / md5[:3]
|
||||
cached_file = f"{md5[3:]}-{name}"
|
||||
filepath.mkdir(parents=True, exist_ok=True)
|
||||
return filepath.joinpath(f"{md5[9:]}-{name}")
|
||||
return filepath / cached_file
|
||||
|
||||
def in_cache(self, name, dirpath, md5):
|
||||
if self._path_from_md5(name, dirpath, md5).is_file():
|
||||
return True
|
||||
return False
|
||||
def in_cache(self, name, cachedir, md5):
|
||||
return self._path_from_md5(name, cachedir, md5).exists()
|
||||
|
Loading…
Reference in New Issue
Block a user