mirror of
https://github.com/openSUSE/osc.git
synced 2025-02-22 10:12:12 +01:00
Merge pull request #1697 from dmach/fix-detecting-binaries
Fix detecting binary files
This commit is contained in:
commit
b469f31d6c
13
osc/core.py
13
osc/core.py
@ -2790,11 +2790,14 @@ def sha256_dgst(file):
|
|||||||
return s.hexdigest()
|
return s.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def binary(s):
|
def binary(data: bytes):
|
||||||
"""return ``True`` if a string is binary data using diff's heuristic"""
|
"""
|
||||||
if s and bytes('\0', "utf-8") in s[:4096]:
|
Return ``True`` if ``data`` is binary data.
|
||||||
return True
|
|
||||||
return False
|
We're using heuristics according to OBS: src/backend/BSSrcServer/filediff - look for "diff binary detection"
|
||||||
|
"""
|
||||||
|
binary_chars = re.findall(b"[\x00-\0x07\x0e-\x1f]", data)
|
||||||
|
return len(binary_chars) * 40 > len(data)
|
||||||
|
|
||||||
|
|
||||||
def binary_file(fn):
|
def binary_file(fn):
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
from osc.core import binary_file
|
||||||
from osc.core import makeurl
|
from osc.core import makeurl
|
||||||
from osc.core import UrlQueryArray
|
from osc.core import UrlQueryArray
|
||||||
from osc.core import parseRevisionOption
|
from osc.core import parseRevisionOption
|
||||||
@ -138,5 +142,52 @@ class TestMakeurl(unittest.TestCase):
|
|||||||
self.assertEqual(url, f"https://example.com/api/v1?{encoded_char}={encoded_char}")
|
self.assertEqual(url, f"https://example.com/api/v1?{encoded_char}={encoded_char}")
|
||||||
|
|
||||||
|
|
||||||
|
class TestBinaryFile(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.tmpdir = tempfile.mkdtemp(prefix="osc_test_")
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
shutil.rmtree(self.tmpdir)
|
||||||
|
|
||||||
|
def test_text(self):
|
||||||
|
path = os.path.join(self.tmpdir, "text")
|
||||||
|
with open(path, "w") as f:
|
||||||
|
f.write(1000 * "a")
|
||||||
|
self.assertFalse(binary_file(path))
|
||||||
|
|
||||||
|
def test_text_with_binary_chars(self):
|
||||||
|
path = os.path.join(self.tmpdir, "binary")
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(1000 * b"a")
|
||||||
|
f.write(b"\0")
|
||||||
|
self.assertFalse(binary_file(path))
|
||||||
|
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(4096 * b"a")
|
||||||
|
f.write(b"\0")
|
||||||
|
self.assertFalse(binary_file(path))
|
||||||
|
|
||||||
|
def test_binary(self):
|
||||||
|
path = os.path.join(self.tmpdir, "binary")
|
||||||
|
|
||||||
|
# sufficient control chars in first 4k
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(1000 * b"a")
|
||||||
|
f.write(26 * b"\0")
|
||||||
|
self.assertTrue(binary_file(path))
|
||||||
|
|
||||||
|
# sufficient control chars in first 4k
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(3993 * b"a")
|
||||||
|
f.write(103 * b"\0")
|
||||||
|
self.assertTrue(binary_file(path))
|
||||||
|
|
||||||
|
# detected as text because we're reading only first 4k characters
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(4096 * b"a")
|
||||||
|
f.write(1000 * b"\0")
|
||||||
|
self.assertFalse(binary_file(path))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user