import pathlib BINARY = { ".7z", ".bsp", ".bz2", ".gem", ".gz", ".jar", ".lz", ".lzma", ".obscpio", ".oxt", ".pdf", ".png", ".rpm", ".tbz", ".tbz2", ".tgz", ".ttf", ".txz", ".whl", ".xz", ".zip", ".zst", } TEXT_MIMETYPES = { "message/rfc822", "application/pgp-keys", "application/x-gnupg-keyring", } def is_text_mimetype(mimetype): if mimetype.startswith("text/"): return True return mimetype.split(";")[0] in TEXT_MIMETYPES def is_binary_or_large(filename, size): """Decide if is a binary file based on the extension or size""" binary_suffix = BINARY non_binary_suffix = { ".SUSE", ".asc", ".c", ".cabal", ".changes", ".conf", ".desktop", ".dif", ".diff", ".dsc", ".el", ".html", ".in", ".init", ".install", ".keyring", ".kiwi", ".logrotate", ".macros", ".md", ".obsinfo", ".pamd", ".patch", ".pl", ".pom", ".py", ".rpmlintrc", ".rules", ".script", ".service", ".sh", ".sig", ".sign", ".spec", ".sysconfig", ".test", ".txt", ".xml", ".xml", ".yml", } suffix = pathlib.Path(filename).suffix if suffix in binary_suffix: return True if suffix in non_binary_suffix: return False if size >= 6 * 1024: return True return False