git-importer/lib/tree_builder.py
Stephan Kulow bfdade8ecf Create a flat list of commits to do from the tree
Walk the node tree and record the parents, then reverse the tree so we
can have the exact order in which to create git commits
2022-10-31 12:28:12 +01:00

188 lines
6.4 KiB
Python

from typing import Dict
from xmlrpc.client import Boolean
from lib.db_revision import DBRevision
from lib.request import Request
class AbstractWalker:
def call(self, node, is_source):
pass
class PrintWalker(AbstractWalker):
def call(self, node, is_source):
if is_source:
print(" ", node.revision, node.revision.files_hash)
else:
print(node.revision, node.revision.files_hash)
class TreeNode:
"""
Nodes in this "tree" have either no parent (root), one parent (in a chain)
or two parents (in this case the merged revision wins in conflicts).
"""
def __init__(self, rev):
self.parent = None
self.merged = None
self.revision = rev
self.merged_into = None
self.git_commit = None
def walk(self, walker: AbstractWalker):
node = self
while node:
walker.call(node, False)
if node.merged:
source_node = node.merged
while source_node:
walker.call(source_node, True)
source_node = source_node.parent
if source_node and source_node.merged_into:
break
node = node.parent
def print(self):
self.walk(PrintWalker())
def as_list(self):
"""Return a list for test cases"""
node = self
ret = []
while node:
repr = {"commit": node.revision.short_string()}
if node.merged:
source_node = node.merged
repr["merged"] = []
while source_node:
repr["merged"].append(source_node.revision.short_string())
source_node = source_node.parent
if source_node and source_node.merged_into:
break
node = node.parent
ret.append(repr)
return ret
class TreeBuilder:
def __init__(self, db):
self.db = db
def revisions_chain(self, project, package):
"""Build a tree without branches (chain) from a project's
history ignoring empty and broken revisions"""
revisions = DBRevision.all_revisions(self.db, project, package)
revisions.sort()
prev = None
tree = None
for rev in revisions:
if rev.broken:
continue
if prev and prev.files_hash == rev.files_hash:
continue
prev = rev
new_tree = TreeNode(rev)
if tree:
new_tree.parent = tree
tree = new_tree
return tree
def find_merge(self, revision, source_chain):
"""For a given revision in the target, find the node in the source chain
that matches the files"""
node = source_chain
while node:
# exclude reverts happening after the merge
if (
node.revision.commit_time <= revision.commit_time
and node.revision.files_hash == revision.files_hash
):
return node
node = node.parent
def add_merge_points(self, factory_revisions):
"""For all target revisions that accepted a request, look up the merge
points in the source chains (ignoring the actual revision submitted for now)"""
class FindRequestsWalker(AbstractWalker):
def __init__(self) -> None:
super().__init__()
self.requests = set()
def call(self, node: TreeNode, _: Boolean) -> None:
if not node.revision.request_id:
return
self.requests.add(node.revision.request_id)
class FindMergeWalker(AbstractWalker):
def __init__(self, builder: TreeBuilder, requests: Dict) -> None:
super().__init__()
self.source_revisions = dict()
self.builder = builder
self.requests = requests
def call(self, node, is_source) -> None:
# not going to happen, but better safe
if is_source:
return
if not node.revision.request_id:
return
req = self.requests.get(node.revision.request_id)
key = f"{req.source_project}/{req.source_package}"
if key not in self.source_revisions:
self.source_revisions[key] = self.builder.revisions_chain(
req.source_project, req.source_package
)
node.merged = self.builder.find_merge(
node.revision, self.source_revisions[key]
)
# add a reverse lookup
if node.merged:
node.merged.merged_into = node
# walk the tree twice. First we collect all requests to be looked up
# to avoid going into the DB a thousand times
frqs = FindRequestsWalker()
factory_revisions.walk(frqs)
requests = dict()
with self.db.cursor() as cur:
cur.execute(
"SELECT * from requests WHERE id = ANY(%s)", (list(frqs.requests),)
)
for row in cur.fetchall():
req = Request.from_db(row)
requests[req.dbid] = req
sw = FindMergeWalker(self, requests)
factory_revisions.walk(sw)
def prune_loose_end(self, factory_node):
"""Look for source revisions that end in a new root and prune them"""
last_merge = None
while factory_node:
if factory_node.merged:
source_node = factory_node.merged
ended_without_merge = False
while source_node:
source_node = source_node.parent
if source_node and source_node.merged_into:
ended_without_merge = True
break
if not ended_without_merge:
factory_node.merged = None
if last_merge:
last_merge.parent = None
else:
last_merge = factory_node.merged
factory_node = factory_node.parent
def build(self, package):
"""Create a Factory tree (returning the top)"""
factory_revisions = self.revisions_chain("openSUSE:Factory", package)
self.add_merge_points(factory_revisions)
self.prune_loose_end(factory_revisions)
return factory_revisions