gconvert: Optimize UTF-8 conversions, fix output on error

In the strdup_len() path, no need to do what g_utf8_validate() already does: locate the string-terminating nul byte. Also in strdup_len(), make the out parameter bytes_read receive the length of the valid (meaning also nul-free) part of the input string, as the documentation on g_{locale,filename}_{from,to}_utf8() says it does. https://bugzilla.gnome.org/show_bug.cgi?id=792516
2025-09-27 17:52:58 +02:00 · 2018-01-13 12:40:22 +02:00
parent 1e6803be3b
commit 413605a6f3
1 changed files with 19 additions and 16 deletions
--- a/glib/gconvert.c
+++ b/glib/gconvert.c
@@ -823,20 +823,31 @@ g_convert_with_fallback (const gchar *str,
 * 
 */

+/*
+ * Validate @string as UTF-8. @len can be negative if @string is
+ * nul-terminated, or a non-negative value in bytes. If @string ends in an
+ * incomplete sequence, or contains any illegal sequences or nul codepoints,
+ * %NULL will be returned and the error set to
+ * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
+ * On success, @bytes_read and @bytes_written, if provided, will be set to
+ * the number of bytes in @string up to @len or the terminating nul byte.
+ * On error, @bytes_read will be set to the byte offset after the last valid
+ * and non-nul UTF-8 sequence in @string, and @bytes_written will be set to 0.
+ */
 static gchar *
 strdup_len (const gchar *string,
 	    gssize       len,
-	    gsize       *bytes_written,
 	    gsize       *bytes_read,
-	    GError      **error)
-	 
+	    gsize       *bytes_written,
+	    GError     **error)
 {
  gsize real_len;
+  const gchar *end_valid;

-  if (!g_utf8_validate (string, len, NULL))
+  if (!g_utf8_validate (string, len, &end_valid))
    {
      if (bytes_read)
-	*bytes_read = 0;
+	*bytes_read = end_valid - string;
      if (bytes_written)
 	*bytes_written = 0;

@@ -844,17 +855,9 @@ strdup_len (const gchar *string,
                           _("Invalid byte sequence in conversion input"));
      return NULL;
    }
-  
-  if (len < 0)
-    real_len = strlen (string);
-  else
-    {
-      real_len = 0;
-      
-      while (real_len < len && string[real_len])
-	real_len++;
-    }
-  
+
+  real_len = end_valid - string;
+
  if (bytes_read)
    *bytes_read = real_len;
  if (bytes_written)