Make g_utf8_make_valid public

Based on a patch by Simon van der Linden and rebased onto current GLib,
with improved documentation loosely based on Telepathy's
tp_utf8_make_valid().

Signed-off-by: Simon McVittie <simon.mcvittie@collabora.co.uk>
Bug: https://bugzilla.gnome.org/show_bug.cgi?id=591603
Bug: https://bugzilla.gnome.org/show_bug.cgi?id=610969
Reviewed-by: Colin Walters <walters@verbum.org>
This commit is contained in:
Simon McVittie 2016-10-13 12:42:09 +01:00 committed by Simon McVittie
parent 01bfa16986
commit c46dbd4752
6 changed files with 36 additions and 19 deletions

View File

@ -2923,6 +2923,7 @@ g_utf8_strrchr
g_utf8_strreverse g_utf8_strreverse
g_utf8_substring g_utf8_substring
g_utf8_validate g_utf8_validate
g_utf8_make_valid
<SUBSECTION> <SUBSECTION>
g_utf8_strup g_utf8_strup

View File

@ -1939,7 +1939,7 @@ g_filename_display_name (const gchar *filename)
* by a question mark * by a question mark
*/ */
if (!display_name) if (!display_name)
display_name = _g_utf8_make_valid (filename); display_name = g_utf8_make_valid (filename);
return display_name; return display_name;
} }

View File

@ -1198,7 +1198,7 @@ g_key_file_parse_line (GKeyFile *key_file,
&parse_error); &parse_error);
else else
{ {
gchar *line_utf8 = _g_utf8_make_valid (line); gchar *line_utf8 = g_utf8_make_valid (line);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_PARSE, G_KEY_FILE_ERROR_PARSE,
_("Key file contains line “%s” which is not " _("Key file contains line “%s” which is not "
@ -1330,7 +1330,7 @@ g_key_file_parse_key_value_pair (GKeyFile *key_file,
{ {
if (g_ascii_strcasecmp (value, "UTF-8") != 0) if (g_ascii_strcasecmp (value, "UTF-8") != 0)
{ {
gchar *value_utf8 = _g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_UNKNOWN_ENCODING, G_KEY_FILE_ERROR_UNKNOWN_ENCODING,
_("Key file contains unsupported " _("Key file contains unsupported "
@ -1863,7 +1863,7 @@ g_key_file_get_string (GKeyFile *key_file,
if (!g_utf8_validate (value, -1, NULL)) if (!g_utf8_validate (value, -1, NULL))
{ {
gchar *value_utf8 = _g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_UNKNOWN_ENCODING, G_KEY_FILE_ERROR_UNKNOWN_ENCODING,
_("Key file contains key “%s” with value “%s” " _("Key file contains key “%s” with value “%s” "
@ -1979,7 +1979,7 @@ g_key_file_get_string_list (GKeyFile *key_file,
if (!g_utf8_validate (value, -1, NULL)) if (!g_utf8_validate (value, -1, NULL))
{ {
gchar *value_utf8 = _g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_UNKNOWN_ENCODING, G_KEY_FILE_ERROR_UNKNOWN_ENCODING,
_("Key file contains key “%s” with value “%s” " _("Key file contains key “%s” with value “%s” "
@ -4291,7 +4291,7 @@ g_key_file_parse_value_as_integer (GKeyFile *key_file,
if (*value == '\0' || (*eof_int != '\0' && !g_ascii_isspace(*eof_int))) if (*value == '\0' || (*eof_int != '\0' && !g_ascii_isspace(*eof_int)))
{ {
gchar *value_utf8 = _g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
_("Value “%s” cannot be interpreted " _("Value “%s” cannot be interpreted "
@ -4304,7 +4304,7 @@ g_key_file_parse_value_as_integer (GKeyFile *key_file,
int_value = long_value; int_value = long_value;
if (int_value != long_value || errno == ERANGE) if (int_value != long_value || errno == ERANGE)
{ {
gchar *value_utf8 = _g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value);
g_set_error (error, g_set_error (error,
G_KEY_FILE_ERROR, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
@ -4338,7 +4338,7 @@ g_key_file_parse_value_as_double (GKeyFile *key_file,
if (*end_of_valid_d != '\0' || end_of_valid_d == value) if (*end_of_valid_d != '\0' || end_of_valid_d == value)
{ {
gchar *value_utf8 = _g_utf8_make_valid (value); gchar *value_utf8 = g_utf8_make_valid (value);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
_("Value “%s” cannot be interpreted " _("Value “%s” cannot be interpreted "
@ -4377,7 +4377,7 @@ g_key_file_parse_value_as_boolean (GKeyFile *key_file,
else if (strcmp_sized (value, length, "false") == 0 || strcmp_sized (value, length, "0") == 0) else if (strcmp_sized (value, length, "false") == 0 || strcmp_sized (value, length, "0") == 0)
return FALSE; return FALSE;
value_utf8 = _g_utf8_make_valid (value); value_utf8 = g_utf8_make_valid (value);
g_set_error (error, G_KEY_FILE_ERROR, g_set_error (error, G_KEY_FILE_ERROR,
G_KEY_FILE_ERROR_INVALID_VALUE, G_KEY_FILE_ERROR_INVALID_VALUE,
_("Value “%s” cannot be interpreted " _("Value “%s” cannot be interpreted "

View File

@ -423,7 +423,7 @@ set_error (GMarkupParseContext *context,
/* Make sure that the GError message is valid UTF-8 /* Make sure that the GError message is valid UTF-8
* even if it is complaining about invalid UTF-8 in the markup * even if it is complaining about invalid UTF-8 in the markup
*/ */
s_valid = _g_utf8_make_valid (s); s_valid = g_utf8_make_valid (s);
set_error_literal (context, error, code, s); set_error_literal (context, error, code, s);
g_free (s); g_free (s);

View File

@ -885,9 +885,8 @@ GLIB_AVAILABLE_IN_ALL
gchar *g_utf8_collate_key_for_filename (const gchar *str, gchar *g_utf8_collate_key_for_filename (const gchar *str,
gssize len) G_GNUC_MALLOC; gssize len) G_GNUC_MALLOC;
GLIB_AVAILABLE_IN_2_52
/* private */ gchar *g_utf8_make_valid (const gchar *str);
gchar *_g_utf8_make_valid (const gchar *name);
G_END_DECLS G_END_DECLS

View File

@ -1735,19 +1735,36 @@ g_utf8_strreverse (const gchar *str,
return result; return result;
} }
/**
* g_utf8_make_valid:
* @str: string to coerce into UTF-8
*
* If the provided string is valid UTF-8, return a copy of it. If not,
* return a copy in which bytes that could not be interpreted as valid Unicode
* are replaced with the Unicode replacement character (U+FFFD).
*
* For example, this is an appropriate function to use if you have received
* a string that was incorrectly declared to be UTF-8, and you need a valid
* UTF-8 version of it that can be logged or displayed to the user, with the
* assumption that it is close enough to ASCII or UTF-8 to be mostly
* readable as-is.
*
* Returns: (transfer full): a valid UTF-8 string whose content resembles @str
*
* Since: 2.52
*/
gchar * gchar *
_g_utf8_make_valid (const gchar *name) g_utf8_make_valid (const gchar *str)
{ {
GString *string; GString *string;
const gchar *remainder, *invalid; const gchar *remainder, *invalid;
gint remaining_bytes, valid_bytes; gint remaining_bytes, valid_bytes;
g_return_val_if_fail (name != NULL, NULL); g_return_val_if_fail (str != NULL, NULL);
string = NULL; string = NULL;
remainder = name; remainder = str;
remaining_bytes = strlen (name); remaining_bytes = strlen (str);
while (remaining_bytes != 0) while (remaining_bytes != 0)
{ {
@ -1767,7 +1784,7 @@ _g_utf8_make_valid (const gchar *name)
} }
if (string == NULL) if (string == NULL)
return g_strdup (name); return g_strdup (str);
g_string_append (string, remainder); g_string_append (string, remainder);