Make g_utf8_make_valid optionally take a length

g_utf8_make_valid was turned into a public API this cycle. However
now that it is public we should make the API more generic, allowing
the caller to specify the length. This is especially useful if
the function is called with a string that has \0 in the middle
or for chunks of a strings that are not nul terminated.
This is also consistent with most of the other utf8 utils.

Callers inside glib are updated to the new signature.

https://bugzilla.gnome.org/show_bug.cgi?id=779456
This commit is contained in:
Paolo Borelli 2017-03-02 09:10:35 +01:00 committed by Ignacio Casal Quinteiro
parent 9aaf7588fc
commit f559bc01dc
5 changed files with 24 additions and 16 deletions

View File

@ -1938,7 +1938,7 @@ g_filename_display_name (const gchar *filename)
* by a question mark * by a question mark
*/ */
if (!display_name) if (!display_name)
display_name = g_utf8_make_valid (filename); display_name = g_utf8_make_valid (filename, -1);
return display_name; return display_name;
} }

View File

@ -1206,7 +1206,7 @@ g_key_file_parse_line (GKeyFile *key_file,
&parse_error); &parse_error);
else else
{ {
gchar *line_utf8 = g_utf8_make_valid (line); gchar *line_utf8 = g_utf8_make_valid (line, length);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_PARSE, G_KEY_FILE_ERROR_PARSE,
_("Key file contains line “%s” which is not " _("Key file contains line “%s” which is not "
@ -1338,7 +1338,7 @@ g_key_file_parse_key_value_pair (GKeyFile *key_file,
{ {
if (g_ascii_strcasecmp (value, "UTF-8") != 0) if (g_ascii_strcasecmp (value, "UTF-8") != 0)
{ {
gchar *value_utf8 = g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value, value_len);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_UNKNOWN_ENCODING, G_KEY_FILE_ERROR_UNKNOWN_ENCODING,
_("Key file contains unsupported " _("Key file contains unsupported "
@ -1871,7 +1871,7 @@ g_key_file_get_string (GKeyFile *key_file,
if (!g_utf8_validate (value, -1, NULL)) if (!g_utf8_validate (value, -1, NULL))
{ {
gchar *value_utf8 = g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value, -1);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_UNKNOWN_ENCODING, G_KEY_FILE_ERROR_UNKNOWN_ENCODING,
_("Key file contains key “%s” with value “%s” " _("Key file contains key “%s” with value “%s” "
@ -1987,7 +1987,7 @@ g_key_file_get_string_list (GKeyFile *key_file,
if (!g_utf8_validate (value, -1, NULL)) if (!g_utf8_validate (value, -1, NULL))
{ {
gchar *value_utf8 = g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value, -1);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_UNKNOWN_ENCODING, G_KEY_FILE_ERROR_UNKNOWN_ENCODING,
_("Key file contains key “%s” with value “%s” " _("Key file contains key “%s” with value “%s” "
@ -4301,7 +4301,7 @@ g_key_file_parse_value_as_integer (GKeyFile *key_file,
if (*value == '\0' || (*eof_int != '\0' && !g_ascii_isspace(*eof_int))) if (*value == '\0' || (*eof_int != '\0' && !g_ascii_isspace(*eof_int)))
{ {
gchar *value_utf8 = g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value, -1);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
_("Value “%s” cannot be interpreted " _("Value “%s” cannot be interpreted "
@ -4314,7 +4314,7 @@ g_key_file_parse_value_as_integer (GKeyFile *key_file,
int_value = long_value; int_value = long_value;
if (int_value != long_value || errno == ERANGE) if (int_value != long_value || errno == ERANGE)
{ {
gchar *value_utf8 = g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value, -1);
g_set_error (error, g_set_error (error,
G_KEY_FILE_ERROR, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
@ -4348,7 +4348,7 @@ g_key_file_parse_value_as_double (GKeyFile *key_file,
if (*end_of_valid_d != '\0' || end_of_valid_d == value) if (*end_of_valid_d != '\0' || end_of_valid_d == value)
{ {
gchar *value_utf8 = g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value, -1);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
_("Value “%s” cannot be interpreted " _("Value “%s” cannot be interpreted "
@ -4387,7 +4387,7 @@ g_key_file_parse_value_as_boolean (GKeyFile *key_file,
else if (strcmp_sized (value, length, "false") == 0 || strcmp_sized (value, length, "0") == 0) else if (strcmp_sized (value, length, "false") == 0 || strcmp_sized (value, length, "0") == 0)
return FALSE; return FALSE;
value_utf8 = g_utf8_make_valid (value); value_utf8 = g_utf8_make_valid (value, -1);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
_("Value “%s” cannot be interpreted " _("Value “%s” cannot be interpreted "

View File

@ -422,7 +422,7 @@ set_error (GMarkupParseContext *context,
/* Make sure that the GError message is valid UTF-8 /* Make sure that the GError message is valid UTF-8
* even if it is complaining about invalid UTF-8 in the markup * even if it is complaining about invalid UTF-8 in the markup
*/ */
s_valid = g_utf8_make_valid (s); s_valid = g_utf8_make_valid (s, -1);
set_error_literal (context, error, code, s); set_error_literal (context, error, code, s);
g_free (s); g_free (s);

View File

@ -885,7 +885,8 @@ gchar *g_utf8_collate_key_for_filename (const gchar *str,
gssize len) G_GNUC_MALLOC; gssize len) G_GNUC_MALLOC;
GLIB_AVAILABLE_IN_2_52 GLIB_AVAILABLE_IN_2_52
gchar *g_utf8_make_valid (const gchar *str); gchar *g_utf8_make_valid (const gchar *str,
gssize len) G_GNUC_MALLOC;
G_END_DECLS G_END_DECLS

View File

@ -1738,6 +1738,8 @@ g_utf8_strreverse (const gchar *str,
/** /**
* g_utf8_make_valid: * g_utf8_make_valid:
* @str: string to coerce into UTF-8 * @str: string to coerce into UTF-8
* @len: the maximum length of @str to use, in bytes. If @len < 0,
* then the string is nul-terminated.
* *
* If the provided string is valid UTF-8, return a copy of it. If not, * If the provided string is valid UTF-8, return a copy of it. If not,
* return a copy in which bytes that could not be interpreted as valid Unicode * return a copy in which bytes that could not be interpreted as valid Unicode
@ -1754,17 +1756,21 @@ g_utf8_strreverse (const gchar *str,
* Since: 2.52 * Since: 2.52
*/ */
gchar * gchar *
g_utf8_make_valid (const gchar *str) g_utf8_make_valid (const gchar *str,
gssize len)
{ {
GString *string; GString *string;
const gchar *remainder, *invalid; const gchar *remainder, *invalid;
gint remaining_bytes, valid_bytes; gsize remaining_bytes, valid_bytes;
g_return_val_if_fail (str != NULL, NULL); g_return_val_if_fail (str != NULL, NULL);
if (len < 0)
len = strlen (str);
string = NULL; string = NULL;
remainder = str; remainder = str;
remaining_bytes = strlen (str); remaining_bytes = len;
while (remaining_bytes != 0) while (remaining_bytes != 0)
{ {
@ -1784,11 +1790,12 @@ g_utf8_make_valid (const gchar *str)
} }
if (string == NULL) if (string == NULL)
return g_strdup (str); return g_strndup (str, len);
g_string_append (string, remainder); g_string_append (string, remainder);
g_string_append_c (string, '\0');
g_assert (g_utf8_validate (string->str, -1, NULL)); g_assert (g_utf8_validate (string->str, -1, NULL));
return g_string_free (string, FALSE); return g_string_free (string, FALSE);
} }