From e3e936f7ba244c77fe2f66c69b6fd35e9a1cfcf6 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Tue, 15 Oct 2024 11:45:59 +0100 Subject: [PATCH] gdatainputstream: Use memchr() for the multi-stop-char case too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a follow up to commit e7e5ddd2a. oss-fuzz found a case where performance was pathologically bad with a long `stop_chars` string. Since our inner loop in that case was iterating over `stop_chars` and comparing each of them to `buffer[i]`, we can use `memchr()` the opposite way round to in commit e7e5ddd2a to speed that up, using `buffer[i]` as the needle in a `stop_chars` haystack. From some brief testing, this doesn’t impact on the performance of a more normal use case of having a short (<10 bytes long) `stop_chars`. I was slightly concerned that the function call overhead of calling out to `memchr()` would have an impact there, but apparently not. Signed-off-by: Philip Withnall oss-fuzz#372994443 --- gio/gdatainputstream.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/gio/gdatainputstream.c b/gio/gdatainputstream.c index ef728f006..bf2e71cd4 100644 --- a/gio/gdatainputstream.c +++ b/gio/gdatainputstream.c @@ -861,11 +861,8 @@ scan_for_chars (GDataInputStream *stream, gsize start, end, peeked; gsize i; gsize available, checked; - const char *stop_char; - const char *stop_end; bstream = G_BUFFERED_INPUT_STREAM (stream); - stop_end = stop_chars + stop_chars_len; checked = *checked_out; @@ -874,8 +871,8 @@ scan_for_chars (GDataInputStream *stream, end = available; peeked = end - start; - /* For single-char case such as \0, defer to memchr which can - * take advantage of simd/etc. + /* For single-char case such as \0, defer the entire operation to memchr which + * can take advantage of simd/etc. */ if (stop_chars_len == 1) { @@ -888,11 +885,13 @@ scan_for_chars (GDataInputStream *stream, { for (i = 0; checked < available && i < peeked; i++) { - for (stop_char = stop_chars; stop_char != stop_end; stop_char++) - { - if (buffer[i] == *stop_char) - return (start + i); - } + /* We can use memchr() the other way round. Less fast than the + * single-char case above, but still faster than doing our own inner + * loop. */ + const char *p = memchr (stop_chars, buffer[i], stop_chars_len); + + if (p != NULL) + return (start + i); } }