Merge pull request 'Also treat some more mimetypes as text' (#20) from add_further_mimetypes into main

Reviewed-on: https://gitea.opensuse.org/importers/git-importer/pulls/20
This commit is contained in:
coolo 2022-11-15 07:28:05 +01:00
commit d311d54f26
3 changed files with 18 additions and 17 deletions

View File

@ -1,7 +1,6 @@
#!/usr/bin/python3 #!/usr/bin/python3
import argparse import argparse
import concurrent.futures
import logging import logging
import pathlib import pathlib
import sys import sys
@ -113,17 +112,8 @@ def main():
importer = Importer(URL_OBS, "openSUSE:Factory", args.packages) importer = Importer(URL_OBS, "openSUSE:Factory", args.packages)
importer.import_into_db() importer.import_into_db()
if len(args.packages) != 1: for package in args.packages:
with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor: export_package(package, args.repodir, args.cachedir, args.gc)
fs = [
executor.submit(
export_package, package, args.repodir, args.cachedir, args.gc
)
for package in args.packages
]
concurrent.futures.wait(fs)
else:
export_package(args.packages[0], args.repodir, args.cachedir, args.gc)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -25,18 +25,28 @@ BINARY = {
".zst", ".zst",
} }
TEXT_MIMETYPES = {
"message/rfc822",
"application/pgp-keys",
"application/x-gnupg-keyring",
}
def is_text_mimetype(mimetype):
if mimetype.startswith("text/"):
return True
return mimetype.split(";")[0] in TEXT_MIMETYPES
def is_binary_or_large(filename, size): def is_binary_or_large(filename, size):
"""Decide if is a binary file based on the extension or size""" """Decide if is a binary file based on the extension or size"""
binary_suffix = BINARY binary_suffix = BINARY
non_binary_suffix = { non_binary_suffix = {
".1",
".8",
".SUSE", ".SUSE",
".asc", ".asc",
".c", ".c",
".cabal", ".cabal",
".cfg",
".changes", ".changes",
".conf", ".conf",
".desktop", ".desktop",

View File

@ -6,6 +6,7 @@ import sys
import requests import requests
from lib.binary import is_text_mimetype
from lib.db import DB from lib.db import DB
@ -71,13 +72,13 @@ class LFSOid:
VALUES (%s,%s,%s)""", VALUES (%s,%s,%s)""",
(package, filename, lfs_oid_id), (package, filename, lfs_oid_id),
) )
if mimetype.startswith("text/"): if is_text_mimetype(mimetype):
cur.execute( cur.execute(
"INSERT INTO text_files (package,filename) VALUES (%s,%s)", "INSERT INTO text_files (package,filename) VALUES (%s,%s)",
(package, filename), (package, filename),
) )
self.db.conn.commit() self.db.conn.commit()
self.from_db(lfs_oid_id) self.set_from_dbid(lfs_oid_id)
if not self.check(): if not self.check():
self.register() self.register()