forked from importers/git-importer
		
	
		
			
				
	
	
		
			188 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			188 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| import os
 | |
| 
 | |
| import yaml
 | |
| 
 | |
| from lib.binary import is_binary_or_large
 | |
| from lib.db import DB
 | |
| from lib.git import Git
 | |
| from lib.lfs_oid import LFSOid
 | |
| from lib.obs import OBS
 | |
| from lib.proxy_sha256 import ProxySHA256
 | |
| from lib.tree_builder import TreeBuilder
 | |
| from lib.user import User
 | |
| 
 | |
| 
 | |
| class GitExporter:
 | |
|     def __init__(self, api_url, project, package, repodir, cachedir):
 | |
|         self.obs = OBS(api_url)
 | |
|         self.project = project
 | |
|         self.package = package
 | |
|         self.db = DB()
 | |
|         self.proxy_sha256 = ProxySHA256(self.obs, self.db)
 | |
|         self.git = Git(
 | |
|             repodir / package,
 | |
|             committer="Git OBS Bridge",
 | |
|             committer_email="obsbridge@suse.de",
 | |
|         )
 | |
|         if self.git.exists():
 | |
|             self.git.open()
 | |
|         else:
 | |
|             self.git.create()
 | |
|             self.git.add_gitea_remote(package)
 | |
|         self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
 | |
|         self.gc_interval = 200
 | |
|         self.cachedir = cachedir
 | |
| 
 | |
|     def set_gc_interval(self, gc):
 | |
|         self.gc_interval = gc
 | |
| 
 | |
|     def check_repo_state(self, flats, branch_state):
 | |
|         state_data = dict()
 | |
|         if os.path.exists(self.state_file):
 | |
|             with open(self.state_file) as f:
 | |
|                 state_data = yaml.safe_load(f)
 | |
|                 if not isinstance(state_data, dict):
 | |
|                     state_data = {}
 | |
|         left_to_commit = []
 | |
|         for flat in reversed(flats):
 | |
|             found_state = False
 | |
|             for branch in ["factory"]:
 | |
|                 if flat.commit.dbid == state_data.get(branch):
 | |
|                     branch_state[branch] = flat.commit
 | |
|                     flat.commit.git_commit = self.git.branch_head(branch)
 | |
|                     logging.debug(
 | |
|                         f"Found {self.git.path}'s {branch} branch in state {flat}"
 | |
|                     )
 | |
|                     left_to_commit = []
 | |
|                     found_state = True
 | |
|             if not found_state:
 | |
|                 left_to_commit.append(flat)
 | |
|         return left_to_commit
 | |
| 
 | |
|     def export_as_git(self):
 | |
|         if os.getenv("CHECK_ALL_LFS"):
 | |
|             LFSOid.check_all(self.db, self.package)
 | |
|         tree = TreeBuilder(self.db).build(self.project, self.package)
 | |
|         if tree == None:  # eg. python-M2Crypto errors
 | |
|             return
 | |
|         flats = tree.as_flat_list()
 | |
| 
 | |
|         branch_state = {"factory": None, "devel": None}
 | |
|         left_to_commit = self.check_repo_state(flats, branch_state)
 | |
| 
 | |
|         if not left_to_commit:
 | |
|             return
 | |
| 
 | |
|         logging.info(f"Commiting into {self.git.path}")
 | |
|         self.run_gc()
 | |
|         users = dict()
 | |
| 
 | |
|         for flat in left_to_commit:
 | |
|             if flat.commit.userid not in users:
 | |
|                 users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
 | |
|             flat.user = users[flat.commit.userid]
 | |
|             self.gc_cnt -= 1
 | |
|             if self.gc_cnt <= 0 and self.gc_interval:
 | |
|                 self.run_gc()
 | |
|             logging.debug(f"Committing {flat}")
 | |
|             self.commit_flat(flat, branch_state)
 | |
| 
 | |
|         # make sure that we create devel branch
 | |
|         if not branch_state["devel"]:
 | |
|             logging.debug("force creating devel")
 | |
|             self.git.set_branch_head("devel", self.git.branch_head("factory"))
 | |
| 
 | |
|         self.git.push(force=True)
 | |
| 
 | |
|     def run_gc(self):
 | |
|         self.gc_cnt = self.gc_interval
 | |
|         self.git.gc()
 | |
| 
 | |
|     def is_lfs_file(self, package, filename, size):
 | |
|         if not is_binary_or_large(filename, size):
 | |
|             return False
 | |
|         return not self.proxy_sha256.is_text(package, filename)
 | |
| 
 | |
|     def commit_file(self, flat, file, size, md5):
 | |
|         # don't export imported _service: files, if any
 | |
|         if file.name[0:9] == '_service:':
 | |
|             return
 | |
| 
 | |
|         # have such files been detected as text mimetype before?
 | |
|         if self.is_lfs_file(flat.commit.package, file.name, size):
 | |
|             file_sha256 = self.proxy_sha256.get_or_put(
 | |
|                 flat.commit.project,
 | |
|                 flat.commit.package,
 | |
|                 file.name,
 | |
|                 flat.commit.expanded_srcmd5,
 | |
|                 md5,
 | |
|                 size,
 | |
|             )
 | |
|             # as it's newly registered, it might be a text file now, so double check
 | |
|             if not self.proxy_sha256.is_text(flat.commit.package, file.name):
 | |
|                 self.git.add_lfs(file.name, file_sha256, size)
 | |
|                 return
 | |
|         self.commit_non_lfs_file(flat, file, md5)
 | |
| 
 | |
|     def commit_non_lfs_file(self, flat, file, md5):
 | |
|         self.obs.change_url(flat.commit.api_url)
 | |
|         self.obs.download(
 | |
|             flat.commit.project,
 | |
|             flat.commit.package,
 | |
|             file.name,
 | |
|             flat.commit.expanded_srcmd5,
 | |
|             self.git.path,
 | |
|             self.cachedir,
 | |
|             file_md5=md5,
 | |
|         )
 | |
|         self.git.add(file)
 | |
| 
 | |
|     def branch_fits_parent1(self, flat, branch_state):
 | |
|         if branch_state[flat.branch] is None:
 | |
|             # everything fits nothing
 | |
|             return True
 | |
|         return flat.parent1 == branch_state[flat.branch]
 | |
| 
 | |
|     def commit_flat(self, flat, branch_state):
 | |
|         parents = []
 | |
|         self.git.checkout(flat.branch)
 | |
| 
 | |
|         if flat.parent1:
 | |
|             if not self.branch_fits_parent1(flat, branch_state):
 | |
|                 logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
 | |
|                 assert flat.parent1.git_commit
 | |
|                 self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
 | |
|                 self.git.checkout(flat.branch)
 | |
|             parents.append(flat.parent1.git_commit)
 | |
|             if flat.parent2:
 | |
|                 assert flat.parent2.git_commit
 | |
|                 parents.append(flat.parent2.git_commit)
 | |
| 
 | |
|         # create file if not existant
 | |
|         self.git.add_default_lfs_gitattributes(force=False)
 | |
|         self.git.add_default_gitignore()
 | |
| 
 | |
|         to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
 | |
|         for file in to_delete:
 | |
|             self.git.remove(file)
 | |
|         for file, size, md5 in to_download:
 | |
|             self.commit_file(flat, file, size, md5)
 | |
| 
 | |
|         commit = self.git.commit(
 | |
|             flat.user.realname,
 | |
|             flat.user.email,
 | |
|             flat.commit.commit_time,
 | |
|             flat.commit.git_commit_message(),
 | |
|             parents=parents,
 | |
|         )
 | |
|         flat.commit.git_commit = commit
 | |
|         branch_state[flat.branch] = flat.commit
 | |
|         with open(self.state_file, "w") as f:
 | |
|             data = {}
 | |
|             for branch in ["factory", "devel"]:
 | |
|                 commit = branch_state[branch]
 | |
|                 if commit:
 | |
|                     data[branch] = commit.dbid
 | |
|             yaml.dump(data, f)
 |