git-importer/lib/binary.py

97 lines
1.6 KiB
Python
Raw Permalink Normal View History

2022-10-17 14:58:18 +02:00
import pathlib
BINARY = {
".7z",
".bsp",
".bz2",
".gem",
".gz",
".jar",
".lz",
".lzma",
".obscpio",
".oxt",
".pdf",
".png",
".rpm",
".tbz",
".tbz2",
".tgz",
".ttf",
".txz",
".whl",
".xz",
".zip",
".zst",
}
2022-11-11 16:22:18 +01:00
TEXT_MIMETYPES = {
"message/rfc822",
"application/pgp-keys",
"application/x-gnupg-keyring",
}
def is_text_mimetype(mimetype):
if mimetype.startswith("text/"):
return True
return mimetype.split(";")[0] in TEXT_MIMETYPES
2022-10-17 14:58:18 +02:00
def is_binary_or_large(filename, size):
"""Decide if is a binary file based on the extension or size"""
binary_suffix = BINARY
non_binary_suffix = {
".SUSE",
".asc",
".c",
".cabal",
".changes",
".conf",
".desktop",
".dif",
".diff",
".dsc",
".el",
".html",
".in",
".init",
".install",
".keyring",
".kiwi",
".logrotate",
".macros",
".md",
".obsinfo",
".pamd",
".patch",
".pl",
".pom",
".py",
".rpmlintrc",
".rules",
".script",
".service",
".sh",
".sig",
".sign",
".spec",
".sysconfig",
".test",
".txt",
".xml",
".xml",
".yml",
}
suffix = pathlib.Path(filename).suffix
if suffix in binary_suffix:
return True
if suffix in non_binary_suffix:
return False
if size >= 6 * 1024:
return True
return False