From c46dbd47522df130c17efeb7959d4be99e3e4bb8 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Thu, 13 Oct 2016 12:42:09 +0100 Subject: [PATCH] Make g_utf8_make_valid public Based on a patch by Simon van der Linden and rebased onto current GLib, with improved documentation loosely based on Telepathy's tp_utf8_make_valid(). Signed-off-by: Simon McVittie Bug: https://bugzilla.gnome.org/show_bug.cgi?id=591603 Bug: https://bugzilla.gnome.org/show_bug.cgi?id=610969 Reviewed-by: Colin Walters --- docs/reference/glib/glib-sections.txt | 1 + glib/gconvert.c | 2 +- glib/gkeyfile.c | 16 +++++++-------- glib/gmarkup.c | 2 +- glib/gunicode.h | 5 ++--- glib/gutf8.c | 29 +++++++++++++++++++++------ 6 files changed, 36 insertions(+), 19 deletions(-) diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index d8f1643bd..4745446d0 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -2923,6 +2923,7 @@ g_utf8_strrchr g_utf8_strreverse g_utf8_substring g_utf8_validate +g_utf8_make_valid g_utf8_strup diff --git a/glib/gconvert.c b/glib/gconvert.c index 892337120..1de941edc 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -1939,7 +1939,7 @@ g_filename_display_name (const gchar *filename) * by a question mark */ if (!display_name) - display_name = _g_utf8_make_valid (filename); + display_name = g_utf8_make_valid (filename); return display_name; } diff --git a/glib/gkeyfile.c b/glib/gkeyfile.c index 4c444ab65..70d8fda3e 100644 --- a/glib/gkeyfile.c +++ b/glib/gkeyfile.c @@ -1198,7 +1198,7 @@ g_key_file_parse_line (GKeyFile *key_file, &parse_error); else { - gchar *line_utf8 = _g_utf8_make_valid (line); + gchar *line_utf8 = g_utf8_make_valid (line); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_PARSE, _("Key file contains line “%s” which is not " @@ -1330,7 +1330,7 @@ g_key_file_parse_key_value_pair (GKeyFile *key_file, { if (g_ascii_strcasecmp (value, "UTF-8") != 0) { - gchar *value_utf8 = _g_utf8_make_valid (value); + gchar *value_utf8 = g_utf8_make_valid (value); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_UNKNOWN_ENCODING, _("Key file contains unsupported " @@ -1863,7 +1863,7 @@ g_key_file_get_string (GKeyFile *key_file, if (!g_utf8_validate (value, -1, NULL)) { - gchar *value_utf8 = _g_utf8_make_valid (value); + gchar *value_utf8 = g_utf8_make_valid (value); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_UNKNOWN_ENCODING, _("Key file contains key “%s” with value “%s” " @@ -1979,7 +1979,7 @@ g_key_file_get_string_list (GKeyFile *key_file, if (!g_utf8_validate (value, -1, NULL)) { - gchar *value_utf8 = _g_utf8_make_valid (value); + gchar *value_utf8 = g_utf8_make_valid (value); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_UNKNOWN_ENCODING, _("Key file contains key “%s” with value “%s” " @@ -4291,7 +4291,7 @@ g_key_file_parse_value_as_integer (GKeyFile *key_file, if (*value == '\0' || (*eof_int != '\0' && !g_ascii_isspace(*eof_int))) { - gchar *value_utf8 = _g_utf8_make_valid (value); + gchar *value_utf8 = g_utf8_make_valid (value); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_INVALID_VALUE, _("Value “%s” cannot be interpreted " @@ -4304,7 +4304,7 @@ g_key_file_parse_value_as_integer (GKeyFile *key_file, int_value = long_value; if (int_value != long_value || errno == ERANGE) { - gchar *value_utf8 = _g_utf8_make_valid (value); + gchar *value_utf8 = g_utf8_make_valid (value); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_INVALID_VALUE, @@ -4338,7 +4338,7 @@ g_key_file_parse_value_as_double (GKeyFile *key_file, if (*end_of_valid_d != '\0' || end_of_valid_d == value) { - gchar *value_utf8 = _g_utf8_make_valid (value); + gchar *value_utf8 = g_utf8_make_valid (value); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_INVALID_VALUE, _("Value “%s” cannot be interpreted " @@ -4377,7 +4377,7 @@ g_key_file_parse_value_as_boolean (GKeyFile *key_file, else if (strcmp_sized (value, length, "false") == 0 || strcmp_sized (value, length, "0") == 0) return FALSE; - value_utf8 = _g_utf8_make_valid (value); + value_utf8 = g_utf8_make_valid (value); g_set_error (error, G_KEY_FILE_ERROR, G_KEY_FILE_ERROR_INVALID_VALUE, _("Value “%s” cannot be interpreted " diff --git a/glib/gmarkup.c b/glib/gmarkup.c index 834eb9e2c..dc828d531 100644 --- a/glib/gmarkup.c +++ b/glib/gmarkup.c @@ -423,7 +423,7 @@ set_error (GMarkupParseContext *context, /* Make sure that the GError message is valid UTF-8 * even if it is complaining about invalid UTF-8 in the markup */ - s_valid = _g_utf8_make_valid (s); + s_valid = g_utf8_make_valid (s); set_error_literal (context, error, code, s); g_free (s); diff --git a/glib/gunicode.h b/glib/gunicode.h index ce35f6b71..558b37254 100644 --- a/glib/gunicode.h +++ b/glib/gunicode.h @@ -885,9 +885,8 @@ GLIB_AVAILABLE_IN_ALL gchar *g_utf8_collate_key_for_filename (const gchar *str, gssize len) G_GNUC_MALLOC; - -/* private */ -gchar *_g_utf8_make_valid (const gchar *name); +GLIB_AVAILABLE_IN_2_52 +gchar *g_utf8_make_valid (const gchar *str); G_END_DECLS diff --git a/glib/gutf8.c b/glib/gutf8.c index eae10daa3..4c5caccfd 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -1735,19 +1735,36 @@ g_utf8_strreverse (const gchar *str, return result; } - +/** + * g_utf8_make_valid: + * @str: string to coerce into UTF-8 + * + * If the provided string is valid UTF-8, return a copy of it. If not, + * return a copy in which bytes that could not be interpreted as valid Unicode + * are replaced with the Unicode replacement character (U+FFFD). + * + * For example, this is an appropriate function to use if you have received + * a string that was incorrectly declared to be UTF-8, and you need a valid + * UTF-8 version of it that can be logged or displayed to the user, with the + * assumption that it is close enough to ASCII or UTF-8 to be mostly + * readable as-is. + * + * Returns: (transfer full): a valid UTF-8 string whose content resembles @str + * + * Since: 2.52 + */ gchar * -_g_utf8_make_valid (const gchar *name) +g_utf8_make_valid (const gchar *str) { GString *string; const gchar *remainder, *invalid; gint remaining_bytes, valid_bytes; - g_return_val_if_fail (name != NULL, NULL); + g_return_val_if_fail (str != NULL, NULL); string = NULL; - remainder = name; - remaining_bytes = strlen (name); + remainder = str; + remaining_bytes = strlen (str); while (remaining_bytes != 0) { @@ -1767,7 +1784,7 @@ _g_utf8_make_valid (const gchar *name) } if (string == NULL) - return g_strdup (name); + return g_strdup (str); g_string_append (string, remainder);