From 851473490281f82d82560b2368284ef33cf6e8f9 Mon Sep 17 00:00:00 2001 From: lizhenghao Date: Wed, 22 Oct 2025 10:26:34 +0800 Subject: [PATCH 1/3] Fix: Fixed a read(-1) vulnerability caused by boundary handling error in #264 --- source/decoder.c | 8 +++++++- tests/test_decoder.py | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) Index: cbor2-5.6.5/source/decoder.c =================================================================== --- cbor2-5.6.5.orig/source/decoder.c +++ cbor2-5.6.5/source/decoder.c @@ -758,7 +758,7 @@ decode_definite_long_string(CBORDecoderO char *buffer = NULL; while (left) { // Read up to 65536 bytes of data from the stream - Py_ssize_t chunk_length = 65536 - buffer_size; + Py_ssize_t chunk_length = 65536 - buffer_length; if (left < chunk_length) chunk_length = left; @@ -828,7 +828,13 @@ decode_definite_long_string(CBORDecoderO memcpy(buffer, bytes_buffer + consumed, unconsumed); } buffer_length = unconsumed; + } else { + // All bytes consumed, reset buffer_length + buffer_length = 0; } + + Py_DECREF(chunk); + chunk = NULL; } if (ret && string_namespace_add(self, ret, length) == -1) Index: cbor2-5.6.5/tests/test_decoder.py =================================================================== --- cbor2-5.6.5.orig/tests/test_decoder.py +++ cbor2-5.6.5/tests/test_decoder.py @@ -260,6 +260,28 @@ def test_string_oversized(impl) -> None: (impl.loads(unhexlify("aeaeaeaeaeaeaeaeae0108c29843d90100d8249f0000aeaeffc26ca799")),) +def test_string_issue_264_multiple_chunks_utf8_boundary(impl) -> None: + """Test for Issue #264: UTF-8 characters split across multiple 65536-byte chunk boundaries.""" + import struct + + # Construct: 65535 'a' + '€' (3 bytes) + 65533 'b' + '€' (3 bytes) + 100 'd' + # Total: 131174 bytes, which spans 3 chunks (65536 + 65536 + 102) + total_bytes = 65535 + 3 + 65533 + 3 + 100 + + payload = b"\x7a" + struct.pack(">I", total_bytes) # major type 3, 4-byte length + payload += b"a" * 65535 + payload += "€".encode() # U+20AC: E2 82 AC + payload += b"b" * 65533 + payload += "€".encode() + payload += b"d" * 100 + + expected = "a" * 65535 + "€" + "b" * 65533 + "€" + "d" * 100 + + result = impl.loads(payload) + assert result == expected + assert len(result) == 131170 # 65535 + 1 + 65533 + 1 + 100 characters + + @pytest.mark.parametrize( "payload, expected", [