forked from importers/git-importer
Merge pull request 'implement file caching' (#11) from file-cache into main
Reviewed-on: https://gitea.opensuse.org/importers/git-importer/pulls/11
This commit is contained in:
commit
6dd3cf3eba
@ -52,6 +52,13 @@ def main():
|
|||||||
type=pathlib.Path,
|
type=pathlib.Path,
|
||||||
help="Local git repository directory",
|
help="Local git repository directory",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-c",
|
||||||
|
"--cachedir",
|
||||||
|
required=False,
|
||||||
|
type=pathlib.Path,
|
||||||
|
help="Local cache directory",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-g",
|
"-g",
|
||||||
"--gc",
|
"--gc",
|
||||||
@ -93,9 +100,12 @@ def main():
|
|||||||
if not args.repodir:
|
if not args.repodir:
|
||||||
args.repodir = pathlib.Path("repos") / args.package
|
args.repodir = pathlib.Path("repos") / args.package
|
||||||
|
|
||||||
|
if not args.cachedir:
|
||||||
|
args.cachedir = pathlib.Path("~/.cache/git-import/").expanduser()
|
||||||
|
|
||||||
importer = Importer(URL_OBS, "openSUSE:Factory", args.package)
|
importer = Importer(URL_OBS, "openSUSE:Factory", args.package)
|
||||||
importer.import_into_db()
|
importer.import_into_db()
|
||||||
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir)
|
exporter = GitExporter(URL_OBS, "openSUSE:Factory", args.package, args.repodir, args.cachedir)
|
||||||
exporter.set_gc_interval(args.gc)
|
exporter.set_gc_interval(args.gc)
|
||||||
exporter.export_as_git()
|
exporter.export_as_git()
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from lib.tree_builder import TreeBuilder
|
|||||||
|
|
||||||
|
|
||||||
class GitExporter:
|
class GitExporter:
|
||||||
def __init__(self, api_url, project, package, repodir):
|
def __init__(self, api_url, project, package, repodir, cachedir):
|
||||||
self.obs = OBS()
|
self.obs = OBS()
|
||||||
self.project = project
|
self.project = project
|
||||||
self.package = package
|
self.package = package
|
||||||
@ -26,6 +26,7 @@ class GitExporter:
|
|||||||
).create()
|
).create()
|
||||||
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
||||||
self.gc_interval = 200
|
self.gc_interval = 200
|
||||||
|
self.cachedir = cachedir
|
||||||
|
|
||||||
def download(self, revision):
|
def download(self, revision):
|
||||||
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
||||||
@ -40,10 +41,7 @@ class GitExporter:
|
|||||||
|
|
||||||
# Download each file in OBS if it is not a binary (or large)
|
# Download each file in OBS if it is not a binary (or large)
|
||||||
# file
|
# file
|
||||||
for (name, size, file_md5) in obs_files:
|
for name in obs_files:
|
||||||
# Validate the MD5 of the downloaded file
|
|
||||||
if md5(self.git.path / name) != file_md5:
|
|
||||||
raise Exception(f"Download error in {name}")
|
|
||||||
self.git.add(name)
|
self.git.add(name)
|
||||||
|
|
||||||
def set_gc_interval(self, gc):
|
def set_gc_interval(self, gc):
|
||||||
@ -121,6 +119,7 @@ class GitExporter:
|
|||||||
file.name,
|
file.name,
|
||||||
flat.commit.expanded_srcmd5,
|
flat.commit.expanded_srcmd5,
|
||||||
self.git.path,
|
self.git.path,
|
||||||
|
self.cachedir,
|
||||||
file_md5=md5,
|
file_md5=md5,
|
||||||
)
|
)
|
||||||
self.git.add(file)
|
self.git.add(file)
|
||||||
|
26
lib/obs.py
26
lib/obs.py
@ -1,9 +1,12 @@
|
|||||||
import errno
|
import errno
|
||||||
import logging
|
import logging
|
||||||
|
import shutil
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
|
from pathlib import Path
|
||||||
|
from lib.proxy_sha256 import md5
|
||||||
|
|
||||||
import osc.core
|
import osc.core
|
||||||
|
|
||||||
@ -158,10 +161,21 @@ class OBS:
|
|||||||
name: str,
|
name: str,
|
||||||
revision: str,
|
revision: str,
|
||||||
dirpath: str,
|
dirpath: str,
|
||||||
|
cachedir: str,
|
||||||
file_md5: str,
|
file_md5: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
with (dirpath / name).open("wb") as f:
|
|
||||||
f.write(self._download(project, package, name, revision).read())
|
cached_file = self._path_from_md5(name, cachedir, file_md5)
|
||||||
|
if not self.in_cache(name, cachedir, file_md5):
|
||||||
|
with (dirpath / name).open("wb") as f:
|
||||||
|
f.write(self._download(project, package, name, revision).read())
|
||||||
|
shutil.copy(dirpath / name, cached_file)
|
||||||
|
else:
|
||||||
|
shutil.copy(cached_file, dirpath / name)
|
||||||
|
|
||||||
|
# Validate the MD5 of the downloaded file
|
||||||
|
if md5(dirpath / name) != file_md5:
|
||||||
|
raise Exception(f"Download error in {name}")
|
||||||
|
|
||||||
def list(self, project, package, srcmd5, linkrev):
|
def list(self, project, package, srcmd5, linkrev):
|
||||||
params = {"rev": srcmd5, "expand": "1"}
|
params = {"rev": srcmd5, "expand": "1"}
|
||||||
@ -179,3 +193,11 @@ class OBS:
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
return root
|
return root
|
||||||
|
|
||||||
|
def _path_from_md5(self, name, cachedir, md5):
|
||||||
|
filepath = cachedir / md5[:3]
|
||||||
|
filepath.mkdir(parents=True, exist_ok=True)
|
||||||
|
return filepath / md5[3:]
|
||||||
|
|
||||||
|
def in_cache(self, name, cachedir, md5):
|
||||||
|
return self._path_from_md5(name, cachedir, md5).exists()
|
||||||
|
Loading…
Reference in New Issue
Block a user