mirror of
https://github.com/openSUSE/osc.git
synced 2025-02-22 18:22:12 +01:00
Merge pull request #1697 from dmach/fix-detecting-binaries
Fix detecting binary files
This commit is contained in:
commit
b469f31d6c
13
osc/core.py
13
osc/core.py
@ -2790,11 +2790,14 @@ def sha256_dgst(file):
|
||||
return s.hexdigest()
|
||||
|
||||
|
||||
def binary(s):
|
||||
"""return ``True`` if a string is binary data using diff's heuristic"""
|
||||
if s and bytes('\0', "utf-8") in s[:4096]:
|
||||
return True
|
||||
return False
|
||||
def binary(data: bytes):
|
||||
"""
|
||||
Return ``True`` if ``data`` is binary data.
|
||||
|
||||
We're using heuristics according to OBS: src/backend/BSSrcServer/filediff - look for "diff binary detection"
|
||||
"""
|
||||
binary_chars = re.findall(b"[\x00-\0x07\x0e-\x1f]", data)
|
||||
return len(binary_chars) * 40 > len(data)
|
||||
|
||||
|
||||
def binary_file(fn):
|
||||
|
@ -1,5 +1,9 @@
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from osc.core import binary_file
|
||||
from osc.core import makeurl
|
||||
from osc.core import UrlQueryArray
|
||||
from osc.core import parseRevisionOption
|
||||
@ -138,5 +142,52 @@ class TestMakeurl(unittest.TestCase):
|
||||
self.assertEqual(url, f"https://example.com/api/v1?{encoded_char}={encoded_char}")
|
||||
|
||||
|
||||
class TestBinaryFile(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tmpdir = tempfile.mkdtemp(prefix="osc_test_")
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmpdir)
|
||||
|
||||
def test_text(self):
|
||||
path = os.path.join(self.tmpdir, "text")
|
||||
with open(path, "w") as f:
|
||||
f.write(1000 * "a")
|
||||
self.assertFalse(binary_file(path))
|
||||
|
||||
def test_text_with_binary_chars(self):
|
||||
path = os.path.join(self.tmpdir, "binary")
|
||||
with open(path, "wb") as f:
|
||||
f.write(1000 * b"a")
|
||||
f.write(b"\0")
|
||||
self.assertFalse(binary_file(path))
|
||||
|
||||
with open(path, "wb") as f:
|
||||
f.write(4096 * b"a")
|
||||
f.write(b"\0")
|
||||
self.assertFalse(binary_file(path))
|
||||
|
||||
def test_binary(self):
|
||||
path = os.path.join(self.tmpdir, "binary")
|
||||
|
||||
# sufficient control chars in first 4k
|
||||
with open(path, "wb") as f:
|
||||
f.write(1000 * b"a")
|
||||
f.write(26 * b"\0")
|
||||
self.assertTrue(binary_file(path))
|
||||
|
||||
# sufficient control chars in first 4k
|
||||
with open(path, "wb") as f:
|
||||
f.write(3993 * b"a")
|
||||
f.write(103 * b"\0")
|
||||
self.assertTrue(binary_file(path))
|
||||
|
||||
# detected as text because we're reading only first 4k characters
|
||||
with open(path, "wb") as f:
|
||||
f.write(4096 * b"a")
|
||||
f.write(1000 * b"\0")
|
||||
self.assertFalse(binary_file(path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user