From 4d1ca8d882ed2231cf42327775fd41a292819760 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Fri, 11 Nov 2022 16:22:18 +0100 Subject: [PATCH 1/2] Also treat some more mimetypes as text --- lib/binary.py | 16 +++++++++++++--- lib/lfs_oid.py | 5 +++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/binary.py b/lib/binary.py index 9794d43..eaeebdc 100644 --- a/lib/binary.py +++ b/lib/binary.py @@ -25,18 +25,28 @@ BINARY = { ".zst", } +TEXT_MIMETYPES = { + "message/rfc822", + "application/pgp-keys", + "application/x-gnupg-keyring", +} + + +def is_text_mimetype(mimetype): + if mimetype.startswith("text/"): + return True + + return mimetype.split(";")[0] in TEXT_MIMETYPES + def is_binary_or_large(filename, size): """Decide if is a binary file based on the extension or size""" binary_suffix = BINARY non_binary_suffix = { - ".1", - ".8", ".SUSE", ".asc", ".c", ".cabal", - ".cfg", ".changes", ".conf", ".desktop", diff --git a/lib/lfs_oid.py b/lib/lfs_oid.py index 37a82ae..c350558 100644 --- a/lib/lfs_oid.py +++ b/lib/lfs_oid.py @@ -6,6 +6,7 @@ import sys import requests +from lib.binary import is_text_mimetype from lib.db import DB @@ -71,13 +72,13 @@ class LFSOid: VALUES (%s,%s,%s)""", (package, filename, lfs_oid_id), ) - if mimetype.startswith("text/"): + if is_text_mimetype(mimetype): cur.execute( "INSERT INTO text_files (package,filename) VALUES (%s,%s)", (package, filename), ) self.db.conn.commit() - self.from_db(lfs_oid_id) + self.set_from_dbid(lfs_oid_id) if not self.check(): self.register() From dddc54ab1ccde5663fa80d50cfe1bd8d217a25e0 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Fri, 11 Nov 2022 16:33:44 +0100 Subject: [PATCH 2/2] Remove ProcessPool from exporting It's ignoring exceptions and makes debugging way too hard to justify what's happening --- git-importer.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/git-importer.py b/git-importer.py index 6394d4f..a97d2ab 100755 --- a/git-importer.py +++ b/git-importer.py @@ -1,7 +1,6 @@ #!/usr/bin/python3 import argparse -import concurrent.futures import logging import pathlib import sys @@ -113,17 +112,8 @@ def main(): importer = Importer(URL_OBS, "openSUSE:Factory", args.packages) importer.import_into_db() - if len(args.packages) != 1: - with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor: - fs = [ - executor.submit( - export_package, package, args.repodir, args.cachedir, args.gc - ) - for package in args.packages - ] - concurrent.futures.wait(fs) - else: - export_package(args.packages[0], args.repodir, args.cachedir, args.gc) + for package in args.packages: + export_package(package, args.repodir, args.cachedir, args.gc) if __name__ == "__main__":