Also treat some more mimetypes as text #20

Merged
Ghost merged 2 commits from add_further_mimetypes into main 2022-11-15 07:28:05 +01:00
2 changed files with 16 additions and 5 deletions
Showing only changes of commit 4d1ca8d882 - Show all commits

View File

@ -25,18 +25,28 @@ BINARY = {
".zst",
}
TEXT_MIMETYPES = {
"message/rfc822",
"application/pgp-keys",
"application/x-gnupg-keyring",
}
def is_text_mimetype(mimetype):
if mimetype.startswith("text/"):
return True
return mimetype.split(";")[0] in TEXT_MIMETYPES
def is_binary_or_large(filename, size):
"""Decide if is a binary file based on the extension or size"""
binary_suffix = BINARY
non_binary_suffix = {
".1",
".8",
".SUSE",
".asc",
".c",
".cabal",
".cfg",
Review

OK for me, but this seems like a non-binary suffix

OK for me, but this seems like a non-binary suffix
Review

You're most certainly right. But I was thinking that if a .cfg is large enough to run into this function, it could also be a binary format and we better rely on mimetype detection.

In the database it's only https://gitea.opensuse.org/rpm/fluid-soundfont/src/branch/factory/fluidr3_gm.cfg and it's detected as text/plain - so we're good either way.

You're most certainly right. But I was thinking that if a .cfg is large enough to run into this function, it could also be a binary format and we better rely on mimetype detection. In the database it's only https://gitea.opensuse.org/rpm/fluid-soundfont/src/branch/factory/fluidr3_gm.cfg and it's detected as text/plain - so we're good either way.
".changes",
".conf",
".desktop",

View File

@ -6,6 +6,7 @@ import sys
import requests
from lib.binary import is_text_mimetype
from lib.db import DB
@ -71,13 +72,13 @@ class LFSOid:
VALUES (%s,%s,%s)""",
(package, filename, lfs_oid_id),
)
if mimetype.startswith("text/"):
if is_text_mimetype(mimetype):
cur.execute(
"INSERT INTO text_files (package,filename) VALUES (%s,%s)",
(package, filename),
)
self.db.conn.commit()
self.from_db(lfs_oid_id)
self.set_from_dbid(lfs_oid_id)
if not self.check():
self.register()