Merge branch 'wip/chergert/gdatainputstream-memchr' into 'main'

gio/gdatainputstream: use memchr() when possible

See merge request GNOME/glib!4327
This commit is contained in:
Philip Withnall 2024-10-05 22:09:34 +00:00
commit c94cbf2368
2 changed files with 96 additions and 45 deletions

View File

@ -870,6 +870,18 @@ scan_for_chars (GDataInputStream *stream,
end = available;
peeked = end - start;
/* For single-char case such as \0, defer to memchr which can
* take advantage of simd/etc.
*/
if (stop_chars_len == 1)
{
const char *p = memchr (buffer, stop_chars[0], peeked);
if (p != NULL)
return start + (p - buffer);
}
else
{
for (i = 0; checked < available && i < peeked; i++)
{
for (stop_char = stop_chars; stop_char != stop_end; stop_char++)
@ -878,6 +890,7 @@ scan_for_chars (GDataInputStream *stream,
return (start + i);
}
}
}
checked = end;

View File

@ -274,9 +274,51 @@ test_read_until (void)
G_GNUC_END_IGNORE_DEPRECATIONS
static char *
escape_data_string (const char *str,
size_t len)
{
char *escaped = g_memdup2 (str, len + 1);
for (size_t i = 0; i < len; i++)
{
if (escaped[i] == '\0')
escaped[i] = '?';
}
return escaped;
}
static void
test_read_upto (void)
{
const struct {
int n_repeats;
const char *data_string;
size_t data_string_len;
size_t data_part_len;
const char *data_sep;
size_t data_sep_len;
} vectors[] = {
{ 10, " part1 # part2 $ part3 \0 part4 ", 32, 7, "#$\0^", 4 },
{ 20, "{\"key\": \"value\"}\0", 17, 16, "\0", 1 },
};
#undef REPEATS
#undef DATA_STRING
#undef DATA_PART_LEN
#undef DATA_SEP
#undef DATA_SEP_LEN
#define REPEATS vectors[n].n_repeats
#define DATA_STRING vectors[n].data_string
#define DATA_STRING_LEN vectors[n].data_string_len
#define DATA_PART_LEN vectors[n].data_part_len
#define DATA_SEP vectors[n].data_sep
#define DATA_SEP_LEN vectors[n].data_sep_len
for (guint n = 0; n < G_N_ELEMENTS (vectors); n++)
{
const int DATA_PARTS_NUM = DATA_SEP_LEN * REPEATS;
GInputStream *stream;
GInputStream *base_stream;
GError *error = NULL;
@ -285,23 +327,17 @@ test_read_upto (void)
int i;
guchar stop_char;
#undef REPEATS
#undef DATA_STRING
#undef DATA_PART_LEN
#undef DATA_SEP
#undef DATA_SEP_LEN
#define REPEATS 10 /* number of rounds */
#define DATA_STRING " part1 # part2 $ part3 \0 part4 ^"
#define DATA_PART_LEN 7 /* number of characters between separators */
#define DATA_SEP "#$\0^"
#define DATA_SEP_LEN 4
const int DATA_PARTS_NUM = DATA_SEP_LEN * REPEATS;
char *escaped_data = escape_data_string (DATA_STRING, DATA_STRING_LEN);
char *escaped_sep = escape_data_string (DATA_SEP, DATA_SEP_LEN);
g_test_message ("Test vector %u: %s and %s", n, escaped_data, escaped_sep);
g_free (escaped_data);
g_free (escaped_sep);
base_stream = g_memory_input_stream_new ();
stream = G_INPUT_STREAM (g_data_input_stream_new (base_stream));
for (i = 0; i < REPEATS; i++)
g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream), DATA_STRING, 32, NULL);
g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream), DATA_STRING, DATA_STRING_LEN, NULL);
/* Test stop characters */
error = NULL;
@ -329,6 +365,8 @@ test_read_upto (void)
g_object_unref (base_stream);
g_object_unref (stream);
}
}
enum TestDataType {
TEST_DATA_BYTE = 0,
TEST_DATA_INT16,