Download the full revision

This commit is contained in:
Stephan Kulow 2022-11-02 20:55:09 +01:00
parent c2294d6200
commit 1c54a74ecd
2 changed files with 34 additions and 62 deletions

View File

@ -2,7 +2,7 @@ from __future__ import annotations
import logging import logging
from hashlib import md5 from hashlib import md5
from pathlib import PurePath from pathlib import Path
from typing import Optional from typing import Optional
from lib.db import DB from lib.db import DB
@ -232,11 +232,11 @@ class DBRevision:
for entry in self.files_list(): for entry in self.files_list():
if old_files.get(entry["name"]) != f"{entry['md5']}-{entry['size']}": if old_files.get(entry["name"]) != f"{entry['md5']}-{entry['size']}":
logging.debug(f"Download {entry['name']}") logging.debug(f"Download {entry['name']}")
to_download.append((PurePath(entry["name"]), entry["md5"])) to_download.append((Path(entry["name"]), entry["size"], entry["md5"]))
old_files.pop(entry["name"], None) old_files.pop(entry["name"], None)
for entry in old_files.keys(): for entry in old_files.keys():
logging.debug(f"Delete {entry}") logging.debug(f"Delete {entry}")
to_delete.append(PurePath(entry)) to_delete.append(Path(entry))
return to_download, to_delete return to_download, to_delete
@staticmethod @staticmethod

View File

@ -1,6 +1,5 @@
import logging import logging
import os import os
from pathlib import Path
import yaml import yaml
@ -42,45 +41,11 @@ class GitExporter:
# Download each file in OBS if it is not a binary (or large) # Download each file in OBS if it is not a binary (or large)
# file # file
for (name, size, file_md5) in obs_files: for (name, size, file_md5) in obs_files:
# this file creates easily 100k commits and is just useless data :(
# unfortunately it's stored in the same meta package as the project config
if revision.package == "_project" and name == "_staging_workflow":
continue
# have such files been detected as text mimetype before?
is_text = self.proxy_sha256.is_text(name)
if not is_text and is_binary_or_large(name, size):
file_sha256 = self.proxy_sha256.get_or_put(
revision.project,
revision.package,
name,
revision.srcmd5,
file_md5,
size,
)
self.git.add_lfs(name, file_sha256["sha256"], size)
else:
if (name, size, file_md5) not in git_files:
logging.debug(f"Download {name}")
self.obs.download(
revision.project,
revision.package,
name,
revision.srcmd5,
self.git.path,
file_md5=file_md5,
)
# Validate the MD5 of the downloaded file # Validate the MD5 of the downloaded file
if md5(self.git.path / name) != file_md5: if md5(self.git.path / name) != file_md5:
raise Exception(f"Download error in {name}") raise Exception(f"Download error in {name}")
self.git.add(name) self.git.add(name)
# Remove extra files
obs_names = {n for (n, _, _) in obs_files}
git_names = {n for (n, _, _) in git_files}
for name in git_names - obs_names:
logging.debug(f"Remove {name}")
self.git.remove(name)
def set_gc_interval(self, gc): def set_gc_interval(self, gc):
self.gc_interval = gc self.gc_interval = gc
@ -122,24 +87,34 @@ class GitExporter:
logging.debug(f"Committing {flat}") logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state) self.commit_flat(flat, branch_state)
def limit_download(self, file: Path):
return file.suffix in (".spec", ".changes")
def commit_flat(self, flat, branch_state): def commit_flat(self, flat, branch_state):
parents = [] parents = []
self.git.checkout(flat.branch) self.git.checkout(flat.branch)
# Overwrite ".gitattributes" with the
self.git.add_default_lfs_gitattributes(force=True)
if flat.parent1: if flat.parent1:
parents.append(flat.parent1.git_commit) parents.append(flat.parent1.git_commit)
if flat.parent2: if flat.parent2:
parents.append(flat.parent2.git_commit) parents.append(flat.parent2.git_commit)
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
for file in to_delete: for file in to_delete:
if not self.limit_download(file):
continue
self.git.remove(file) self.git.remove(file)
for file, md5 in to_download: for file, size, md5 in to_download:
if not self.limit_download(file): # have such files been detected as text mimetype before?
continue is_text = self.proxy_sha256.is_text(file.name)
if not is_text and is_binary_or_large(file.name, size):
file_sha256 = self.proxy_sha256.get_or_put(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
md5,
size,
)
self.git.add_lfs(file.name, file_sha256["sha256"], size)
else:
self.obs.download( self.obs.download(
flat.commit.project, flat.commit.project,
flat.commit.package, flat.commit.package,
@ -150,9 +125,6 @@ class GitExporter:
) )
self.git.add(file) self.git.add(file)
# Overwrite ".gitattributes" with the
self.git.add_default_lfs_gitattributes(force=True)
commit = self.git.commit( commit = self.git.commit(
f"OBS User {flat.commit.userid}", f"OBS User {flat.commit.userid}",
"null@suse.de", "null@suse.de",