Download the full revision

This commit is contained in:
Stephan Kulow 2022-11-02 20:55:09 +01:00
parent c2294d6200
commit 1c54a74ecd
2 changed files with 34 additions and 62 deletions

View File

@ -2,7 +2,7 @@ from __future__ import annotations
import logging
from hashlib import md5
from pathlib import PurePath
from pathlib import Path
from typing import Optional
from lib.db import DB
@ -232,11 +232,11 @@ class DBRevision:
for entry in self.files_list():
if old_files.get(entry["name"]) != f"{entry['md5']}-{entry['size']}":
logging.debug(f"Download {entry['name']}")
to_download.append((PurePath(entry["name"]), entry["md5"]))
to_download.append((Path(entry["name"]), entry["size"], entry["md5"]))
old_files.pop(entry["name"], None)
for entry in old_files.keys():
logging.debug(f"Delete {entry}")
to_delete.append(PurePath(entry))
to_delete.append(Path(entry))
return to_download, to_delete
@staticmethod

View File

@ -1,6 +1,5 @@
import logging
import os
from pathlib import Path
import yaml
@ -42,45 +41,11 @@ class GitExporter:
# Download each file in OBS if it is not a binary (or large)
# file
for (name, size, file_md5) in obs_files:
# this file creates easily 100k commits and is just useless data :(
# unfortunately it's stored in the same meta package as the project config
if revision.package == "_project" and name == "_staging_workflow":
continue
# have such files been detected as text mimetype before?
is_text = self.proxy_sha256.is_text(name)
if not is_text and is_binary_or_large(name, size):
file_sha256 = self.proxy_sha256.get_or_put(
revision.project,
revision.package,
name,
revision.srcmd5,
file_md5,
size,
)
self.git.add_lfs(name, file_sha256["sha256"], size)
else:
if (name, size, file_md5) not in git_files:
logging.debug(f"Download {name}")
self.obs.download(
revision.project,
revision.package,
name,
revision.srcmd5,
self.git.path,
file_md5=file_md5,
)
# Validate the MD5 of the downloaded file
if md5(self.git.path / name) != file_md5:
raise Exception(f"Download error in {name}")
self.git.add(name)
# Remove extra files
obs_names = {n for (n, _, _) in obs_files}
git_names = {n for (n, _, _) in git_files}
for name in git_names - obs_names:
logging.debug(f"Remove {name}")
self.git.remove(name)
def set_gc_interval(self, gc):
self.gc_interval = gc
@ -122,24 +87,34 @@ class GitExporter:
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
def limit_download(self, file: Path):
return file.suffix in (".spec", ".changes")
def commit_flat(self, flat, branch_state):
parents = []
self.git.checkout(flat.branch)
# Overwrite ".gitattributes" with the
self.git.add_default_lfs_gitattributes(force=True)
if flat.parent1:
parents.append(flat.parent1.git_commit)
if flat.parent2:
parents.append(flat.parent2.git_commit)
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
for file in to_delete:
if not self.limit_download(file):
continue
self.git.remove(file)
for file, md5 in to_download:
if not self.limit_download(file):
continue
for file, size, md5 in to_download:
# have such files been detected as text mimetype before?
is_text = self.proxy_sha256.is_text(file.name)
if not is_text and is_binary_or_large(file.name, size):
file_sha256 = self.proxy_sha256.get_or_put(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
md5,
size,
)
self.git.add_lfs(file.name, file_sha256["sha256"], size)
else:
self.obs.download(
flat.commit.project,
flat.commit.package,
@ -150,9 +125,6 @@ class GitExporter:
)
self.git.add(file)
# Overwrite ".gitattributes" with the
self.git.add_default_lfs_gitattributes(force=True)
commit = self.git.commit(
f"OBS User {flat.commit.userid}",
"null@suse.de",