Add length arguments to g_utf8_{strup,strdown,casefold,collate_key}.

Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com> * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c glib/gunicollate.c: Add length arguments to g_utf8_{strup,strdown,casefold,collate_key}. * glib/gdate.c: Fix for above.
2025-11-01 08:52:18 +01:00 · 2001-07-07 02:42:49 +00:00
parent 33e1075b22
commit f1f680b68c
16 changed files with 129 additions and 54 deletions
--- a/9
+++ b/9
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/ChangeLog.pre-2-0
+++ b/ChangeLog.pre-2-0
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/ChangeLog.pre-2-10
+++ b/ChangeLog.pre-2-10
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/ChangeLog.pre-2-12
+++ b/ChangeLog.pre-2-12
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/ChangeLog.pre-2-2
+++ b/ChangeLog.pre-2-2
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/ChangeLog.pre-2-4
+++ b/ChangeLog.pre-2-4
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/ChangeLog.pre-2-6
+++ b/ChangeLog.pre-2-6
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/ChangeLog.pre-2-8
+++ b/ChangeLog.pre-2-8
@@ -1,8 +1,15 @@
+Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>
+
+	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c 
+	  glib/gunicollate.c: Add length arguments to
+	g_utf8_{strup,strdown,casefold,collate_key}.
+
+	* glib/gdate.c: Fix for above.
+
 2001-07-06  Pablo Saratxaga <pablo@mandrakesoft.com>

 	* configure.in: added Basque (eu) to ALL_LINGUAS

-
 Mon Jul  2 19:48:52  2001 Andrew Lanoix <alanoix@umich.edu>

 	*giowin32.c: g_source_remove()ing an socket iochannel closes
--- a/glib/gdate.c
+++ b/glib/gdate.c
@@ -515,8 +515,8 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
      gchar *casefold;
      gchar *normalized;
      
-      casefold = g_utf8_casefold (str);
-      normalized = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
+      casefold = g_utf8_casefold (str, -1);
+      normalized = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
      g_free (casefold);

      i = 1;
@@ -586,15 +586,15 @@ g_date_prepare_to_parse (const gchar *str, GDateParseTokens *pt)
 	  
          g_date_strftime (buf, 127, "%b", &d);

-	  casefold = g_utf8_casefold (buf);
+	  casefold = g_utf8_casefold (buf, -1);
          g_free (short_month_names[i]);
-          short_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
+          short_month_names[i] = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
 	  g_free (casefold);
 	  
          g_date_strftime (buf, 127, "%B", &d);
-	  casefold = g_utf8_casefold (buf);
+	  casefold = g_utf8_casefold (buf, -1);
          g_free (long_month_names[i]);
-          long_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
+          long_month_names[i] = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
 	  g_free (casefold);
          
          ++i;
--- a/glib/gunicode.h
+++ b/glib/gunicode.h
@@ -247,9 +247,12 @@ gboolean g_utf8_validate (const gchar  *str,
 /* Validate a Unicode character */
 gboolean g_unichar_validate (gunichar ch);

-gchar *g_utf8_strup   (const gchar *str);
-gchar *g_utf8_strdown (const gchar *str);
-gchar *g_utf8_casefold (const gchar *str);
+gchar *g_utf8_strup   (const gchar *str,
+		       gssize       len);
+gchar *g_utf8_strdown (const gchar *str,
+		       gssize       len);
+gchar *g_utf8_casefold (const gchar *str,
+			gssize       len);

 typedef enum {
  G_NORMALIZE_DEFAULT,
@@ -263,11 +266,13 @@ typedef enum {
 } GNormalizeMode;

 gchar *g_utf8_normalize (const gchar   *str,
+			 gssize         len,
 			 GNormalizeMode mode);

 gint   g_utf8_collate     (const gchar *str1,
 			   const gchar *str2);
-gchar *g_utf8_collate_key (const gchar *str);
+gchar *g_utf8_collate_key (const gchar *str,
+			   gssize       len);

 G_END_DECLS

--- a/glib/gunicollate.c
+++ b/glib/gunicollate.c
@@ -27,6 +27,7 @@
 #include "glib.h"

 extern gunichar *_g_utf8_normalize_wc (const gchar    *str,
+				       gssize          max_len,
 				       GNormalizeMode  mode);

 /**
@@ -52,8 +53,8 @@ g_utf8_collate (const gchar *str1,
  
 #ifdef __STDC_ISO_10646__

-  gunichar *str1_norm = _g_utf8_normalize_wc (str1, G_NORMALIZE_ALL_COMPOSE);
-  gunichar *str2_norm = _g_utf8_normalize_wc (str2, G_NORMALIZE_ALL_COMPOSE);
+  gunichar *str1_norm = _g_utf8_normalize_wc (str1, -1, G_NORMALIZE_ALL_COMPOSE);
+  gunichar *str2_norm = _g_utf8_normalize_wc (str2, -1, G_NORMALIZE_ALL_COMPOSE);

  result = wcscoll ((wchar_t *)str1_norm, (wchar_t *)str2_norm);

@@ -63,8 +64,8 @@ g_utf8_collate (const gchar *str1,
 #else /* !__STDC_ISO_10646__ */

  const gchar *charset;
-  gchar *str1_norm = g_utf8_normalize (str1, G_NORMALIZE_ALL_COMPOSE);
-  gchar *str2_norm = g_utf8_normalize (str2, G_NORMALIZE_ALL_COMPOSE);
+  gchar *str1_norm = g_utf8_normalize (str1, -1, G_NORMALIZE_ALL_COMPOSE);
+  gchar *str2_norm = g_utf8_normalize (str2, -1, G_NORMALIZE_ALL_COMPOSE);

  if (g_get_charset (&charset))
    {
@@ -148,7 +149,8 @@ utf8_encode (char *buf, wchar_t val)
 /**
 * g_utf8_collate_key:
 * @str: a UTF-8 encoded string.
- * 
+ * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
+ *
 * Converts a string into a collation key that can be compared
 * with other collation keys using strcmp(). The results of
 * comparing the collation keys of two strings with strcmp()
@@ -159,14 +161,15 @@ utf8_encode (char *buf, wchar_t val)
 *   be freed with g_free when you are done with it.
 **/
 gchar *
-g_utf8_collate_key (const gchar *str)
+g_utf8_collate_key (const gchar *str,
+		    gssize       len)
 {
  gchar *result;
  size_t len;
  
 #ifdef __STDC_ISO_10646__

-  gunichar *str_norm = _g_utf8_normalize_wc (str, G_NORMALIZE_ALL_COMPOSE);
+  gunichar *str_norm = _g_utf8_normalize_wc (str, len, G_NORMALIZE_ALL_COMPOSE);
  wchar_t *result_wc;
  size_t i;
  size_t result_len = 0;
@@ -194,7 +197,7 @@ g_utf8_collate_key (const gchar *str)
 #else /* !__STDC_ISO_10646__ */

  const gchar *charset;
-  gchar *str_norm = g_utf8_normalize (str, G_NORMALIZE_ALL_COMPOSE);
+  gchar *str_norm = g_utf8_normalize (str, len, G_NORMALIZE_ALL_COMPOSE);

  if (g_get_charset (&charset))
    {
--- a/glib/gunidecomp.c
+++ b/glib/gunidecomp.c
@@ -218,6 +218,7 @@ combine (gunichar  a,

 gunichar *
 _g_utf8_normalize_wc (const gchar    *str,
+		      gssize          max_len,
 		      GNormalizeMode  mode)
 {
  gsize n_wc;
@@ -231,7 +232,7 @@ _g_utf8_normalize_wc (const gchar    *str,

  n_wc = 0;
  p = str;
-  while (*p)
+  while ((max_len < 0 || p < str + max_len) && *p)
    {
      gunichar wc = g_utf8_get_char (p);

@@ -257,7 +258,7 @@ _g_utf8_normalize_wc (const gchar    *str,
  last_start = 0;
  n_wc = 0;
  p = str;
-  while (*p)
+  while ((max_len < 0 || p < str + max_len) && *p)
    {
      gunichar wc = g_utf8_get_char (p);
      guchar *decomp;
@@ -345,6 +346,7 @@ _g_utf8_normalize_wc (const gchar    *str,
 /**
 * g_utf8_normalize:
 * @str: a UTF-8 encoded string.
+ * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
 * @mode: the type of normalization to perform.
 * 
 * Convert a string into canonical form, standardizing
@@ -378,9 +380,10 @@ _g_utf8_normalize_wc (const gchar    *str,
 **/
 gchar *
 g_utf8_normalize (const gchar    *str,
+		  gssize          len,
 		  GNormalizeMode  mode)
 {
-  gunichar *result_wc = _g_utf8_normalize_wc (str, mode);
+  gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
  gchar *result;
  
  result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -588,6 +588,7 @@ output_special_case (gchar *out_buffer,

 static gsize
 real_toupper (const gchar *str,
+	      gssize       max_len,
 	      gchar       *out_buffer,
 	      LocaleType   locale_type)
 {
@@ -596,7 +597,7 @@ real_toupper (const gchar *str,
  gsize len = 0;
  gboolean last_was_i = FALSE;

-  while (*p)
+  while ((max_len < 0 || p < str + max_len) && *p)
    {
      gunichar c = g_utf8_get_char (p);
      int t = TYPE (c);
@@ -693,8 +694,9 @@ real_toupper (const gchar *str,
 }

 /**
- * g_ut8f_strup:
- * @string: a UTF-8 encoded string
+ * g_utf8_strup:
+ * @str: a UTF-8 encoded string
+ * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
 * 
 * Converts all Unicode characters in the string that have a case
 * to uppercase. The exact manner that this is done depends
@@ -706,7 +708,8 @@ real_toupper (const gchar *str,
 *    converted to uppercase.  
 **/
 gchar *
-g_utf8_strup (const gchar *str)
+g_utf8_strup (const gchar *str,
+	      gssize       len)
 {
  gsize len;
  LocaleType locale_type;
@@ -719,9 +722,9 @@ g_utf8_strup (const gchar *str)
  /*
   * We use a two pass approach to keep memory management simple
   */
-  len = real_toupper (str, NULL, locale_type);
+  len = real_toupper (str, len, NULL, locale_type);
  result = g_malloc (len + 1);
-  real_toupper (str, result, locale_type);
+  real_toupper (str, len, result, locale_type);
  result[len] = '\0';

  return result;
@@ -729,6 +732,7 @@ g_utf8_strup (const gchar *str)

 static gsize
 real_tolower (const gchar *str,
+	      gssize       max_len,
 	      gchar       *out_buffer,
 	      LocaleType   locale_type)
 {
@@ -736,7 +740,7 @@ real_tolower (const gchar *str,
  const char *last = NULL;
  gsize len = 0;

-  while (*p)
+  while ((max_len < 0 || p < str + max_len) && *p)
    {
      gunichar c = g_utf8_get_char (p);
      int t = TYPE (c);
@@ -807,8 +811,9 @@ real_tolower (const gchar *str,
 }

 /**
- * g_ut8f_strdown:
- * @string: a UTF-8 encoded string
+ * g_utf8_strdown:
+ * @str: a UTF-8 encoded string
+ * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
 * 
 * Converts all Unicode characters in the string that have a case
 * to lowercase. The exact manner that this is done depends
@@ -819,7 +824,8 @@ real_tolower (const gchar *str,
 *    converted to lowercase.  
 **/
 gchar *
-g_utf8_strdown (const gchar *str)
+g_utf8_strdown (const gchar *str,
+		gssize       len)
 {
  gsize len;
  LocaleType locale_type;
@@ -832,9 +838,9 @@ g_utf8_strdown (const gchar *str)
  /*
   * We use a two pass approach to keep memory management simple
   */
-  len = real_tolower (str, NULL, locale_type);
+  len = real_tolower (str, len, NULL, locale_type);
  result = g_malloc (len + 1);
-  real_tolower (str, result, locale_type);
+  real_tolower (str, len, result, locale_type);
  result[len] = '\0';

  return result;
@@ -843,6 +849,7 @@ g_utf8_strdown (const gchar *str)
 /**
 * g_utf8_casefold:
 * @str: a UTF-8 encoded string
+ * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
 * 
 * Converts a string into a form that is independent of case. The
 * result will not correspond to any particular case, but can be
@@ -860,15 +867,16 @@ g_utf8_strdown (const gchar *str)
 *   case independent form of @str.
 **/
 gchar *
-g_utf8_casefold (const gchar *str)
+g_utf8_casefold (const gchar *str,
+		 gssize       len)
 {
  GString *result = g_string_new (NULL);
  const char *p;
  gchar buf[6];
-  int len;
+  int charlen;

  p = str;
-  while (*p)
+  while ((len < 0 || p < str + len) && *p)
    {
      gunichar ch = g_utf8_get_char (p);

@@ -896,8 +904,8 @@ g_utf8_casefold (const gchar *str)
 	}

      ch = g_unichar_tolower (ch);
-      len = g_unichar_to_utf8 (ch, buf);
-      g_string_append_len (result, buf, len);
+      charlen = g_unichar_to_utf8 (ch, buf);
+      g_string_append_len (result, buf, charlen);
      
    next:
      p = g_utf8_next_char (p);
--- a/tests/unicode-caseconv.c
+++ b/tests/unicode-caseconv.c
@@ -54,7 +54,7 @@ int main (int argc, char **argv)
      
      test = strings[1];

-      convert = g_utf8_strup (test);
+      convert = g_utf8_strup (test, -1);
      if (strcmp (convert, strings[4]) != 0)
 	{
 	  fprintf (stderr, "Failure: toupper(%s) == %s, should have been %s\n",
@@ -63,7 +63,7 @@ int main (int argc, char **argv)
 	}
      g_free (convert);

-      convert = g_utf8_strdown (test);
+      convert = g_utf8_strdown (test, -1);
      if (strcmp (convert, strings[2]) != 0)
 	{
 	  fprintf (stderr, "Failure: tolower(%s) == %s, should have been %s\n",
@@ -98,7 +98,7 @@ int main (int argc, char **argv)

      test = strings[0];

-      convert = g_utf8_casefold (test);
+      convert = g_utf8_casefold (test, -1);
      if (strcmp (convert, strings[1]) != 0)
 	{
 	  fprintf (stderr, "Failure: casefold(%s) == '%s', should have been '%s'\n",
--- a/tests/unicode-collate.c
+++ b/tests/unicode-collate.c
@@ -41,7 +41,7 @@ int main (int argc, char **argv)

  if (argc == 2)
    {
-      in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error);
+      in = g_io_channel_new_file (argv[1], "r", &error);
      if (!in)
 	{
 	  fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
@@ -64,7 +64,7 @@ int main (int argc, char **argv)

      str[term_pos] = '\0';

-      line.key = g_utf8_collate_key (str);
+      line.key = g_utf8_collate_key (str, -1);
      line.str = str;

      g_array_append_val (line_array, line);
--- a/tests/unicode-normalize.c
+++ b/tests/unicode-normalize.c
@@ -67,7 +67,7 @@ test_form (int            line,
    {
      for (i = 0; i < 3; i++)
 	{
-	  char *result = g_utf8_normalize (c[i], mode);
+	  char *result = g_utf8_normalize (c[i], -1, mode);
 	  if (strcmp (result, c[expected]) != 0)
 	    {
 	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
@@ -83,7 +83,7 @@ test_form (int            line,
    {
      for (i = 3; i < 5; i++)
 	{
-	  char *result = g_utf8_normalize (c[i], mode);
+	  char *result = g_utf8_normalize (c[i], -1, mode);
 	  if (strcmp (result, c[expected]) != 0)
 	    {
 	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
@@ -144,7 +144,7 @@ int main (int argc, char **argv)
  if (argc == 3)
    line_to_do = atoi(argv[2]);

-  in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error);
+  in = g_io_channel_new_file (argv[1], "r", &error);
  if (!in)
    {
      fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);