mirror of
https://github.com/openSUSE/osc.git
synced 2025-02-21 17:52:14 +01:00
Merge pull request #1701 from dmach/fix-detecting-binaries-v2
Fix typo in core.binary() that caused text files being detected as binary
This commit is contained in:
commit
47cd6b37c8
@ -2796,7 +2796,9 @@ def binary(data: bytes):
|
|||||||
|
|
||||||
We're using heuristics according to OBS: src/backend/BSSrcServer/filediff - look for "diff binary detection"
|
We're using heuristics according to OBS: src/backend/BSSrcServer/filediff - look for "diff binary detection"
|
||||||
"""
|
"""
|
||||||
binary_chars = re.findall(b"[\x00-\0x07\x0e-\x1f]", data)
|
if b"\0" in data:
|
||||||
|
return True
|
||||||
|
binary_chars = re.findall(b"[\x00-\x07\x0e-\x1f]", data)
|
||||||
return len(binary_chars) * 40 > len(data)
|
return len(binary_chars) * 40 > len(data)
|
||||||
|
|
||||||
|
|
||||||
|
@ -159,12 +159,12 @@ class TestBinaryFile(unittest.TestCase):
|
|||||||
path = os.path.join(self.tmpdir, "binary")
|
path = os.path.join(self.tmpdir, "binary")
|
||||||
with open(path, "wb") as f:
|
with open(path, "wb") as f:
|
||||||
f.write(1000 * b"a")
|
f.write(1000 * b"a")
|
||||||
f.write(b"\0")
|
f.write(b"\1")
|
||||||
self.assertFalse(binary_file(path))
|
self.assertFalse(binary_file(path))
|
||||||
|
|
||||||
with open(path, "wb") as f:
|
with open(path, "wb") as f:
|
||||||
f.write(4096 * b"a")
|
f.write(4096 * b"a")
|
||||||
f.write(b"\0")
|
f.write(b"\1")
|
||||||
self.assertFalse(binary_file(path))
|
self.assertFalse(binary_file(path))
|
||||||
|
|
||||||
def test_binary(self):
|
def test_binary(self):
|
||||||
@ -173,19 +173,26 @@ class TestBinaryFile(unittest.TestCase):
|
|||||||
# sufficient control chars in first 4k
|
# sufficient control chars in first 4k
|
||||||
with open(path, "wb") as f:
|
with open(path, "wb") as f:
|
||||||
f.write(1000 * b"a")
|
f.write(1000 * b"a")
|
||||||
f.write(26 * b"\0")
|
f.write(26 * b"\1")
|
||||||
self.assertTrue(binary_file(path))
|
self.assertTrue(binary_file(path))
|
||||||
|
|
||||||
# sufficient control chars in first 4k
|
# sufficient control chars in first 4k
|
||||||
with open(path, "wb") as f:
|
with open(path, "wb") as f:
|
||||||
f.write(3993 * b"a")
|
f.write(3993 * b"a")
|
||||||
f.write(103 * b"\0")
|
f.write(103 * b"\1")
|
||||||
|
self.assertTrue(binary_file(path))
|
||||||
|
|
||||||
|
# a single \0 is good enough for us to say it's a binary file
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(3993 * b"a")
|
||||||
|
f.write(b"\0")
|
||||||
|
f.write(999 * b"\1")
|
||||||
self.assertTrue(binary_file(path))
|
self.assertTrue(binary_file(path))
|
||||||
|
|
||||||
# detected as text because we're reading only first 4k characters
|
# detected as text because we're reading only first 4k characters
|
||||||
with open(path, "wb") as f:
|
with open(path, "wb") as f:
|
||||||
f.write(4096 * b"a")
|
f.write(4096 * b"a")
|
||||||
f.write(1000 * b"\0")
|
f.write(1000 * b"\1")
|
||||||
self.assertFalse(binary_file(path))
|
self.assertFalse(binary_file(path))
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user