diff --git a/osc/core.py b/osc/core.py index 00ed806c..5ac4a6af 100644 --- a/osc/core.py +++ b/osc/core.py @@ -2796,7 +2796,9 @@ def binary(data: bytes): We're using heuristics according to OBS: src/backend/BSSrcServer/filediff - look for "diff binary detection" """ - binary_chars = re.findall(b"[\x00-\0x07\x0e-\x1f]", data) + if b"\0" in data: + return True + binary_chars = re.findall(b"[\x00-\x07\x0e-\x1f]", data) return len(binary_chars) * 40 > len(data) diff --git a/tests/test_core.py b/tests/test_core.py index 1303645c..9009d967 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -159,12 +159,12 @@ class TestBinaryFile(unittest.TestCase): path = os.path.join(self.tmpdir, "binary") with open(path, "wb") as f: f.write(1000 * b"a") - f.write(b"\0") + f.write(b"\1") self.assertFalse(binary_file(path)) with open(path, "wb") as f: f.write(4096 * b"a") - f.write(b"\0") + f.write(b"\1") self.assertFalse(binary_file(path)) def test_binary(self): @@ -173,19 +173,26 @@ class TestBinaryFile(unittest.TestCase): # sufficient control chars in first 4k with open(path, "wb") as f: f.write(1000 * b"a") - f.write(26 * b"\0") + f.write(26 * b"\1") self.assertTrue(binary_file(path)) # sufficient control chars in first 4k with open(path, "wb") as f: f.write(3993 * b"a") - f.write(103 * b"\0") + f.write(103 * b"\1") + self.assertTrue(binary_file(path)) + + # a single \0 is good enough for us to say it's a binary file + with open(path, "wb") as f: + f.write(3993 * b"a") + f.write(b"\0") + f.write(999 * b"\1") self.assertTrue(binary_file(path)) # detected as text because we're reading only first 4k characters with open(path, "wb") as f: f.write(4096 * b"a") - f.write(1000 * b"\0") + f.write(1000 * b"\1") self.assertFalse(binary_file(path))