gconvert: Correctly annotate string types and output parameters

Note that the g_convert() API works with byte arrays. It's wrong to default to utf8 there, because iconv can read and produce strings with interior nul characters which are not allowed in (type utf8). The documentation was misleading about that in some places, so that got corrected as well. Strings in the locale encoding are annotated as dynamic-length byte arrays because they don't have any guaranteed format and can contain nul bytes. For UTF-8 strings in g_*_{from,to}_utf8(), GLib assumes no embedded nul bytes and the (type utf8) annotations on the UTF-8 parameters and return values remain as they were. Likewise for (type filename). https://bugzilla.gnome.org/show_bug.cgi?id=756128
2025-08-22 17:08:53 +02:00 · 2018-01-06 00:40:12 +02:00
parent 565d8fa1ee
commit 8a93e2d54e
1 changed files with 40 additions and 32 deletions
--- a/glib/gconvert.c
+++ b/glib/gconvert.c
@@ -347,22 +347,23 @@ close_converter (GIConv cd)
 /**
 * g_convert_with_iconv: (skip)
- * @str:           the string to convert
+ * @str:           (array length=len) (element-type guint8):
 *                 the string to convert.
 * @len:           the length of the string in bytes, or -1 if the string is
 *                 nul-terminated (Note that some encodings may allow nul
 *                 bytes to occur inside strings. In that case, using -1
 *                 for the @len parameter is unsafe)
 * @converter:     conversion descriptor from g_iconv_open()
- * @bytes_read:    location to store the number of bytes in the
+ * @bytes_read:    (out) (optional): location to store the number of bytes in
- *                 input string that were successfully converted, or %NULL.
+ *                 the input string that were successfully converted, or %NULL.
 *                 Even if the conversion was successful, this may be 
 *                 less than @len if there were partial characters
 *                 at the end of the input. If the error
 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 *                 stored will the byte offset after the last valid
 *                 input sequence.
- * @bytes_written: the number of bytes stored in the output buffer (not 
+ * @bytes_written: (out) (optional): the number of bytes stored in
- *                 including the terminating nul).
+ *                 the output buffer (not including the terminating nul).
 * @error:         location to store the error occurring, or %NULL to ignore
 *                 errors. Any of the errors in #GConvertError may occur.
 *
@@ -386,8 +387,9 @@ close_converter (GIConv cd)
 * the input character set. To get defined behaviour for conversion of
 * unrepresentable characters, use g_convert_with_fallback().
 *
- * Returns: If the conversion was successful, a newly allocated
+ * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
- *               nul-terminated string, which must be freed with
+ *               If the conversion was successful, a newly allocated buffer
 *               containing the converted string, which must be freed with
 *               g_free(). Otherwise %NULL and @error will be set.
 **/
 gchar*
@@ -515,23 +517,24 @@ g_convert_with_iconv (const gchar *str,
 /**
 * g_convert:
- * @str:           the string to convert
+ * @str:           (array length=len) (element-type guint8):
 *                 the string to convert.
 * @len:           the length of the string in bytes, or -1 if the string is
 *                 nul-terminated (Note that some encodings may allow nul
 *                 bytes to occur inside strings. In that case, using -1
 *                 for the @len parameter is unsafe)
 * @to_codeset:    name of character set into which to convert @str
 * @from_codeset:  character set of @str.
- * @bytes_read: (out):   location to store the number of bytes in the
+ * @bytes_read:    (out) (optional): location to store the number of bytes in
- *                 input string that were successfully converted, or %NULL.
+ *                 the input string that were successfully converted, or %NULL.
 *                 Even if the conversion was successful, this may be 
 *                 less than @len if there were partial characters
 *                 at the end of the input. If the error
 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 *                 stored will the byte offset after the last valid
 *                 input sequence.
- * @bytes_written: (out): the number of bytes stored in the output buffer (not 
+ * @bytes_written: (out) (optional): the number of bytes stored in
- *                 including the terminating nul).
+ *                 the output buffer (not including the terminating nul).
 * @error:         location to store the error occurring, or %NULL to ignore
 *                 errors. Any of the errors in #GConvertError may occur.
 *
@@ -550,9 +553,10 @@ g_convert_with_iconv (const gchar *str,
 * Using extensions such as "//TRANSLIT" may not work (or may not work
 * well) on many platforms.  Consider using g_str_to_ascii() instead.
 *
- * Returns: If the conversion was successful, a newly allocated
+ * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
- *               nul-terminated string, which must be freed with
+ *          If the conversion was successful, a newly allocated buffer
- *               g_free(). Otherwise %NULL and @error will be set.
+ *          containing the converted string, which must be freed with g_free().
 *          Otherwise %NULL and @error will be set.
 **/
 gchar*
 g_convert (const gchar *str,
@@ -594,7 +598,8 @@ g_convert (const gchar *str,
 /**
 * g_convert_with_fallback:
- * @str:          the string to convert
+ * @str:          (array length=len) (element-type guint8):
 *                the string to convert.
 * @len:          the length of the string in bytes, or -1 if the string is
 *                 nul-terminated (Note that some encodings may allow nul
 *                 bytes to occur inside strings. In that case, using -1
@@ -606,13 +611,13 @@ g_convert (const gchar *str,
 *                representable in the target encoding). 
 *                If %NULL, characters not in the target encoding will 
 *                be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
- * @bytes_read:   location to store the number of bytes in the
+ * @bytes_read:   (out) (optional): location to store the number of bytes in
- *                input string that were successfully converted, or %NULL.
+ *                the input string that were successfully converted, or %NULL.
 *                Even if the conversion was successful, this may be 
 *                less than @len if there were partial characters
 *                at the end of the input.
- * @bytes_written: the number of bytes stored in the output buffer (not 
+ * @bytes_written: (out) (optional): the number of bytes stored in
- *                including the terminating nul).
+ *                 the output buffer (not including the terminating nul).
 * @error:        location to store the error occurring, or %NULL to ignore
 *                errors. Any of the errors in #GConvertError may occur.
 *
@@ -634,9 +639,10 @@ g_convert (const gchar *str,
 * character until it knows that the next character is not a mark that
 * could combine with the base character.)
 *
- * Returns: If the conversion was successful, a newly allocated
+ * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
- *               nul-terminated string, which must be freed with
+ *          If the conversion was successful, a newly allocated buffer
- *               g_free(). Otherwise %NULL and @error will be set.
+ *          containing the converted string, which must be freed with g_free().
 *          Otherwise %NULL and @error will be set.
 **/
 gchar*
 g_convert_with_fallback (const gchar *str,
@@ -963,7 +969,8 @@ convert_checked (const gchar      *string,
 /**
 * g_locale_to_utf8:
- * @opsysstring:   a string in the encoding of the current locale. On Windows
+ * @opsysstring:   (array length=len) (element-type guint8): a string in the
 *                 encoding of the current locale. On Windows
 *                 this means the system codepage.
 * @len:           the length of the string, or -1 if the string is
 *                 nul-terminated (Note that some encodings may allow nul
@@ -994,8 +1001,7 @@ convert_checked (const gchar      *string,
 * earlier versions of this library. Use g_convert() to produce output that
 * may contain embedded nul characters.
 * 
- * Returns: A newly-allocated buffer containing the converted string,
+ * Returns: (type utf8): The converted string, or %NULL on an error.
 *               or %NULL on an error, and error will be set.
 **/
 gchar *
 g_locale_to_utf8 (const gchar  *opsysstring,
@@ -1042,8 +1048,9 @@ g_locale_to_utf8 (const gchar  *opsysstring,
 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
 * input that may contain embedded nul characters.
 *
- * Returns: A newly-allocated buffer containing the converted string,
+ * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
- *               or %NULL on an error, and error will be set.
+ *          A newly-allocated buffer containing the converted string,
 *          or %NULL on an error, and error will be set.
 **/
 gchar *
 g_locale_from_utf8 (const gchar *utf8string,
@@ -1083,7 +1090,8 @@ filename_charset_cache_free (gpointer data)
 /**
 * g_get_filename_charsets:
- * @charsets: return location for the %NULL-terminated list of encoding names
+ * @filename_charsets: (out) (transfer none) (array zero-terminated=1):
 *    return location for the %NULL-terminated list of encoding names
 *
 * Determines the preferred character sets used for filenames.
 * The first character set from the @charsets is the filename encoding, the
@@ -1255,7 +1263,7 @@ get_filename_charset (const gchar **filename_charset)
 * function returns %NULL. Use g_convert() to produce output that
 * may contain embedded nul characters.
 * 
- * Returns: The converted string, or %NULL on an error.
+ * Returns: (type utf8): The converted string, or %NULL on an error.
 **/
 gchar*
 g_filename_to_utf8 (const gchar *opsysstring, 
@@ -1290,8 +1298,8 @@ g_filename_to_utf8 (const gchar *opsysstring,
 *                 %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 *                 stored will the byte offset after the last valid
 *                 input sequence.
- * @bytes_written: (out): the number of bytes stored in the output buffer (not 
+ * @bytes_written: (out) (optional): the number of bytes stored in
- *                 including the terminating nul).
+ *                 the output buffer (not including the terminating nul).
 * @error:         location to store the error occurring, or %NULL to ignore
 *                 errors. Any of the errors in #GConvertError may occur.
 *