From e3e936f7ba244c77fe2f66c69b6fd35e9a1cfcf6 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Tue, 15 Oct 2024 11:45:59 +0100 Subject: [PATCH 1/2] gdatainputstream: Use memchr() for the multi-stop-char case too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a follow up to commit e7e5ddd2a. oss-fuzz found a case where performance was pathologically bad with a long `stop_chars` string. Since our inner loop in that case was iterating over `stop_chars` and comparing each of them to `buffer[i]`, we can use `memchr()` the opposite way round to in commit e7e5ddd2a to speed that up, using `buffer[i]` as the needle in a `stop_chars` haystack. From some brief testing, this doesn’t impact on the performance of a more normal use case of having a short (<10 bytes long) `stop_chars`. I was slightly concerned that the function call overhead of calling out to `memchr()` would have an impact there, but apparently not. Signed-off-by: Philip Withnall oss-fuzz#372994443 --- gio/gdatainputstream.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/gio/gdatainputstream.c b/gio/gdatainputstream.c index ef728f006..bf2e71cd4 100644 --- a/gio/gdatainputstream.c +++ b/gio/gdatainputstream.c @@ -861,11 +861,8 @@ scan_for_chars (GDataInputStream *stream, gsize start, end, peeked; gsize i; gsize available, checked; - const char *stop_char; - const char *stop_end; bstream = G_BUFFERED_INPUT_STREAM (stream); - stop_end = stop_chars + stop_chars_len; checked = *checked_out; @@ -874,8 +871,8 @@ scan_for_chars (GDataInputStream *stream, end = available; peeked = end - start; - /* For single-char case such as \0, defer to memchr which can - * take advantage of simd/etc. + /* For single-char case such as \0, defer the entire operation to memchr which + * can take advantage of simd/etc. */ if (stop_chars_len == 1) { @@ -888,11 +885,13 @@ scan_for_chars (GDataInputStream *stream, { for (i = 0; checked < available && i < peeked; i++) { - for (stop_char = stop_chars; stop_char != stop_end; stop_char++) - { - if (buffer[i] == *stop_char) - return (start + i); - } + /* We can use memchr() the other way round. Less fast than the + * single-char case above, but still faster than doing our own inner + * loop. */ + const char *p = memchr (stop_chars, buffer[i], stop_chars_len); + + if (p != NULL) + return (start + i); } } From 50ccb04c71c95966a40849b21be98b587ef60248 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Tue, 15 Oct 2024 11:49:33 +0100 Subject: [PATCH 2/2] tests: Fix 1-byte overread in data-input-stream tests Commit 760a6f647 rearranged how the lengths are calculated for the test data and added `escape_data_string()` so they could be printed safely. Unfortunately there was a miscount in the length of the first test vector in `test_read_upto()`: there are 31 bytes in the string literal, plus one nul terminator which is added by the compiler. The quoted string length was 32 bytes. This should be fine (explicitly including the nul delimiter), but then `escape_data_string()` adds another byte to the length because it assumes the nul delimiter has *not* been included in the count. Changing the string length from 32 to 31 breaks the tests, as the final component of the data is then the wrong length, so add an additional explicit nul byte to the string literal so that it matches the length. Signed-off-by: Philip Withnall --- gio/tests/data-input-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gio/tests/data-input-stream.c b/gio/tests/data-input-stream.c index 11c997bce..163231e1c 100644 --- a/gio/tests/data-input-stream.c +++ b/gio/tests/data-input-stream.c @@ -314,7 +314,7 @@ test_read_upto (void) const char *data_sep; size_t data_sep_len; } vectors[] = { - { 10, " part1 # part2 $ part3 \0 part4 ", 32, 7, "#$\0^", 4 }, + { 10, " part1 # part2 $ part3 \0 part4 \0", 32, 7, "#$\0^", 4 }, { 20, "{\"key\": \"value\"}\0", 17, 16, "\0", 1 }, };