Files
kernel-source-component/kernel-source-component.py

620 lines
17 KiB
Python

#!/usr/bin/python3
#
# Extract a tarball and changelog corresponding to an independently buildable
# userspace component from kernel source git
#
# (C) 2023 by Tony Jones <tonyj@suse.de>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# See http://www.gnu.org/licenses/gpl-2.0.html for full license text.
import argparse
import giturlparse
import glob
import os
import pygit2
import re
import sys
import subprocess
import tempfile
import whatthepatch
import time
from datetime import datetime, timezone
from wcmatch import glob
scratch_env = "SCRATCH_AREA"
scratch_size_gb = 5
scratch_size_bytes = scratch_size_gb * (2**30)
component_files = ""
patterns = []
def vprint(*pargs, **kwargs):
global args
if args.verbose:
print(*pargs, **kwargs)
def warning(*pargs, **kwargs):
global args
if not args.quiet:
print("Warning: ", file=sys.stderr, end="")
print(*pargs, file=sys.stderr, **kwargs)
def error(*pargs, **kwargs):
print("Error: ", file=sys.stderr, end="")
print(*pargs, file=sys.stderr, **kwargs)
def cl_write(*pargs, **kwargs):
print(*pargs, file=clfile, **kwargs)
def shortsha(commit):
return str(commit)[0:10]
def read_obsinfo():
names = glob.glob("*.obsinfo")
if not names:
error("Unable to determine component name")
return None
elif len(names) > 1:
error("Too many obsinfo files")
return None
obsinfof = names.pop()
try:
obsinfo = {}
with open(obsinfof) as file:
for line in file:
name, value = line.partition(":")[::2]
obsinfo[name.strip()] = value.strip()
except:
error(f"Error parsing obsinfo file '{obsinfof}'")
return None
pathbase = re.sub(".obsinfo$", "", obsinfof)
name = obsinfo["name"]
if pathbase != name:
error("Filename/name: mismatch for obsinfo '{obsinfo}' 'name:{name}'")
return None
for required in ["name", "commit"]:
if not required in obsinfo:
error(f"Error name '{required}' missing from obsinfo '{obsinfof}'")
return None
for suffix in ["spec", "changes"]:
if not os.path.isfile(name + "." + suffix):
error(f"Unable to verify {suffix} file for component {name}")
return None
return obsinfo
def write_obsinfo(outdir, obsinfo):
for key in ["name", "version", "mtime", "commit"]:
if not key in obsinfo:
error("Error name '{key}' missing from obsinfo")
return False
name = obsinfo["name"]
file = f"{outdir}/{name}.obsinfo"
newp = open(file, "w")
if not newp:
error(f"Unable to open new obsinfo file '{file}' for writing")
return False
for key in ["name", "version", "mtime", "commit"]:
print(f"{key}: {obsinfo[key]}", file=newp)
return True
def update_changelog(outdir, obsinfo, newclp):
name = obsinfo["name"]
file = f"{name}.changes"
newfile = f"{outdir}/{file}"
oldp = open(file, "r")
if not oldp:
error(f"Unable to open old changes file '{oldfile}' for reading")
return False
newp = open(newfile, "w")
if not newp:
error(f"Unable to open new changes file '{file}' for writing")
oldp.close()
return False
newclp.seek(0)
for fp in [newclp, oldp]:
for line in fp.readlines():
newp.write(line)
oldp.close()
newp.close()
return True
def generate_interdiff(path, filea, fileb):
global interdiff_fail
cmd = f"cd {path} && interdiff <(git cat-file -p {filea.id}) <(git cat-file -p {fileb.id}) 2> /dev/null"
proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True)
file = os.fdopen(proc.stdout.fileno(), "r", encoding="utf8")
blob = file.read()
returncode = proc.wait()
if returncode != 0:
interdiff_fail += 1
return None
paths = []
for diff in whatthepatch.parse_patch(blob):
path = re.sub("^[ab]/", "", diff.header.new_path)
paths.append(path)
return paths
def git_checkout(gitrepo, gitbranch):
scratch = os.environ.get(scratch_env)
scratch_area = None
for dir in [scratch, "/abuild", "/tmp"]:
if dir and os.path.isdir(dir) and os.access(dir, os.W_OK):
scratch_area = dir
break
if not scratch_area:
error(
f"Unable to locate suitable scratch area. Set environment variable {scratch_env} to a directory with at least {scratch_size_gb}GB available"
)
return None
statvfs = os.statvfs(scratch_area)
if not statvfs or statvfs.f_frsize * statvfs.f_bavail < scratch_size_bytes:
error(
f"Scratch area '{scratch_area}' requires >= {scratch_size_gb}GB available space"
)
return None
tmpdirctx = tempfile.TemporaryDirectory(None, None, scratch_area)
cmd = ["git", "clone"]
msg = f"{gitrepo}"
if gitbranch:
cmd.append(f"-b{gitbranch}")
msg += f":{gitbranch}"
cmd.append(gitrepo)
cmd.append(tmpdirctx.name)
vprint(f"Cloning git tree from {msg} to {tmpdirctx.name}")
proc = subprocess.run(cmd)
if proc.returncode != 0:
error("git clone failed")
return None
vprint("Git clone completed")
return tmpdirctx
def get_kernel_version(path):
makefile = path + "/Makefile"
try:
with open(makefile) as file:
version = None
patchlevel = None
sublevel = None
extraversion = None
for line in file:
_name, _value = line.partition("=")[::2]
name = _name.strip()
value = _value.strip()
if name and value:
match name:
case "VERSION":
version = value
case "PATCHLEVEL":
patchlevel = value
case "SUBLEVEL":
sublevel = value
case "EXTRAVERSION":
extraversion = value
# value should always be present even if ""
if name == "EXTRAVERSION":
break
if version and patchlevel:
full = f"{version}.{patchlevel}"
if sublevel:
full += f".{sublevel}"
if extraversion:
full += f".{extraversion}"
return f"{version}.{patchlevel}.{sublevel}"
except:
error(f"Unable to parse config {config}")
return None
def get_patch_changelog(data):
line = 1
subject = None
references = None
for line in data.splitlines():
if line == 10:
return None
res = re.search(r"^([A-Z][a-z]+):(.*)$", line)
if not res:
continue
tag, _value = res.groups()
value = _value.lstrip()
match tag:
case "Subject":
subject = value.replace("[PATCH] ", "")
case "References":
references = value
if subject and references:
return subject, references
def read_config():
componentfilesf = "_component_files"
patternsf = "_patterns"
global patterns, component_files
try:
patterns = [line.rstrip() for line in open(patternsf, "r")]
except:
error(f"Unable to open pattern file {patternsf}")
return False
try:
lines = [line.rstrip() for line in open(componentfilesf)]
for line in lines:
for item in line.split(" "):
component_files += " " + item
except:
error(f"Unable to open component file {componentfilesf}")
return False
return True
def sequence_patches(path):
cmd = f"cd {path} && scripts/sequence-patch.sh --rapid"
proc = subprocess.Popen([cmd], stdout=subprocess.PIPE, shell=True)
path = None
while True:
line = proc.stdout.readline()
if not line:
break
match line.split():
case [b"Creating", b"tree", b"in", _path]:
path = _path.decode()
returncode = proc.wait()
if not path or returncode != 0:
error("sequence_patch failed")
return None
else:
print(f"Expanded kernel tree to: {path}")
return path
def generate_tarfile(expanded_tree, outdir, obsinfo):
tarversion = obsinfo["version"]
name = obsinfo["name"]
tarname = f"{name}-{tarversion}"
tarfile = f"{outdir}/{tarname}.tar.gz"
cmd = f"cd {expanded_tree} && tar --transform 's/^/.\/{tarname}\//' -czf {tarfile} {component_files}"
proc = subprocess.run(["/bin/bash", "-c", cmd])
return proc.returncode == 0
def format_commit_message(message, lines=None):
existing_fmt = False
for lineno, line in enumerate(message.splitlines()):
if lines and lineno == lines:
break
if lineno == 0 and line[0] == "-":
existing_fmt = True
if existing_fmt:
cl_write(line)
else:
cl_write(f"- {line}" if lineno == 0 else f" {line}")
#
#
# PROGRAM START
parser = argparse.ArgumentParser(
description="Extract tarball and changelog from kernel source git"
)
parser.add_argument(
"--outdir",
help="osc service parameter for internal use only "
"(determines where generated files go before "
"collection",
dest="outdir",
required=True,
)
parser.add_argument(
"--verbose", "-v", help="Enable verbose output", dest="verbose", action="store_true"
)
parser.add_argument(
"--quiet", "-q", help="Enable quiet mode", dest="quiet", action="store_true"
)
parser.add_argument(
"--repo",
"-r",
help="kernel source repository",
dest="gitrepo",
default="kerncvs.suse.de:/home/git/kernel-source.git",
)
parser.add_argument(
"--branch", "-b", dest="gitbranch", help="kernel source branch name"
)
parser.add_argument(
"--force", "-f",
action="store_true",
dest="force",
default="False",
help="Force generation of changelog/tarball even when no changes are detected",
)
args = parser.parse_args()
if not os.path.isdir(args.outdir) or not os.access(args.outdir, os.W_OK):
error(f"Specified directory '{args.outdir}' does not exist or is not writable")
exit(1)
outdir=os.path.abspath(args.outdir)
if outdir == os.getcwd():
error(f"Specified directory '{args.outdir}' is pwd")
exit(1)
obsinfo = read_obsinfo()
if not obsinfo:
error("Unable to read obsinfo file")
exit(1)
if "version" in obsinfo:
match = re.match("^.*\.git([0-9]+)\.[0-9a-z]*$", obsinfo["version"])
if match:
offset = int(match.group(1))
else:
warning("commit: but no prior version: found in obsinfo [setting gitoffset = 0]")
offset = 0
if not read_config():
error("Unable to read config")
exit(1)
p = giturlparse.parse(args.gitrepo)
if not p:
error("Unable to parse repository {args.gitrepo}")
exit(1)
if os.path.isdir(p.href):
if args.gitbranch:
error(
"Branch option invalid if specified repository is directory. Please manually checkout required branch and rerun"
)
exit(1)
unexpanded_git_tree = args.gitrepo
else:
gitdir = git_checkout(args.gitrepo, args.gitbranch)
if not gitdir:
error("Unable to checkout kernel source tree from git")
exit(1)
unexpanded_git_tree = gitdir.name
clfile = tempfile.TemporaryFile(mode="w+")
repo = pygit2.Repository(unexpanded_git_tree)
total_changes = 0
head = repo.head.target
newobsinfo = {}
newobsinfo["name"] = obsinfo["name"]
newobsinfo["commit"] = head
interdiff_fail = 0
whatthepatch_fail = 0
for commit in repo.walk(head, pygit2.GIT_SORT_NONE):
if str(commit.id) == obsinfo["commit"]:
break
offset += 1
if len(commit.parents) == 1:
changelog = []
diff = commit.parents[0].tree.diff_to_tree(commit.tree)
num_deltas = len(diff)
for delta in diff.deltas:
status = delta.status
if status == pygit2.GIT_DELTA_ADDED:
file1 = delta.new_file
elif status == pygit2.GIT_DELTA_DELETED:
file1 = delta.old_file
elif status == pygit2.GIT_DELTA_MODIFIED:
file1 = delta.new_file
file2 = delta.old_file
else:
continue
if status == pygit2.GIT_DELTA_ADDED or status == pygit2.GIT_DELTA_DELETED:
if glob.globmatch(file1.path, "patches.*/**", flags=glob.GLOBSTAR):
filematched = False
blob = repo.revparse_single(str(file1.id)).data.decode("utf-8")
for diff in whatthepatch.parse_patch(blob):
if not diff or not diff.header:
whatthepatch_fail += 1
continue
if glob.globmatch(
diff.header.new_path, patterns, flags=glob.GLOBSTAR
):
filematched = True
break
if filematched:
res = get_patch_changelog(blob)
if res:
subject, references = res
changelog.append((status, subject, references, file1.path))
elif status == pygit2.GIT_DELTA_MODIFIED:
new = glob.globmatch(file1.path, "patches.*/**", flags=glob.GLOBSTAR)
old = glob.globmatch(file2.path, "patches.*/**", flags=glob.GLOBSTAR)
filematched = False
if new or old:
paths = generate_interdiff(unexpanded_git_tree, file1, file2)
if paths:
for path in paths:
if glob.globmatch(path, patterns, flags=glob.GLOBSTAR):
filematched = True
break
if filematched:
blob = repo.revparse_single(str(file1.id)).data.decode("utf-8")
res = get_patch_changelog(blob)
if res:
subject, references = res
changelog.append((status, subject, references, file1.path))
num_changes = len(changelog)
total_changes += num_changes
if num_changes > 0:
cl_write(
"-------------------------------------------------------------------"
)
stime = datetime.fromtimestamp(commit.commit_time, timezone.utc).strftime(
"%a %b %d %H:%M:%S %Z %Y"
)
cl_write(f"{stime} - {commit.author}\n")
if num_changes == 1:
if num_changes == 1:
# most typical case, one change (patch addition, deletion) or
# refresh of patch
status, subject, references, file = changelog.pop()
# ignore above, just use original git commit message
format_commit_message(commit.message)
elif num_changes > 1:
format_commit_message(commit.message, 6)
cl_write("")
cl_write("Relevant changes:")
for status, subject, references, file in changelog:
if subject:
for index, line in enumerate(subject.splitlines()):
mode = ""
match status:
case pygit2.GIT_DELTA_ADDED:
mode = "Add"
case pygit2.GIT_DELTA_DELETED:
mode = "Delete"
case pygit2.GIT_DELTA_MODIFIED:
mode = "Refresh"
format_commit_message(f"{mode} patch: {file}\n{subject}")
if references:
cl_write(f" ({references})")
cl_write(f"\n [kernel-source commit {shortsha(commit.id)}]\n")
if interdiff_fail > 0:
warning(f"{interdiff_fail} patches could not be processed by interdiff")
if whatthepatch_fail > 0:
warning(f"{whatthepatch_fail} patches could not be processed by whatthepatch")
if total_changes > 0 or args.force == True:
print(f"New changes: {total_changes}")
expanded_git_tree = sequence_patches(unexpanded_git_tree)
if not expanded_git_tree:
error("Unable to expand git tree")
exit(1)
kernversion = get_kernel_version(expanded_git_tree)
if not kernversion:
error("Unable to determine kernel version")
exit(1)
newobsinfo["version"] = f"{kernversion}.git{offset}.{shortsha(head)}"
newobsinfo["mtime"] = int(time.time())
if not generate_tarfile(expanded_git_tree, outdir, newobsinfo):
error("Unable to generate tarfile")
exit(1)
update_changelog(outdir, obsinfo, clfile)
write_obsinfo(outdir, newobsinfo)
print(f"Updated revision to {str(head)}")
else:
print("No new changes")