Add length arguments to g_utf8_{strup,strdown,casefold,collate_key}.

Fri Jul  6 22:34:32 2001  Owen Taylor  <otaylor@redhat.com>

	* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
	  glib/gunicollate.c: Add length arguments to
	g_utf8_{strup,strdown,casefold,collate_key}.

	* glib/gdate.c: Fix for above.
This commit is contained in:
Owen Taylor 2001-07-07 02:42:49 +00:00 committed by Owen Taylor
parent 33e1075b22
commit f1f680b68c
16 changed files with 129 additions and 54 deletions

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -1,8 +1,15 @@
Fri Jul 6 22:34:32 2001 Owen Taylor <otaylor@redhat.com>
* glib/gunicode.h glib/gunidecomp.c glib/guniprop.c
glib/gunicollate.c: Add length arguments to
g_utf8_{strup,strdown,casefold,collate_key}.
* glib/gdate.c: Fix for above.
2001-07-06 Pablo Saratxaga <pablo@mandrakesoft.com>
* configure.in: added Basque (eu) to ALL_LINGUAS
Mon Jul 2 19:48:52 2001 Andrew Lanoix <alanoix@umich.edu>
*giowin32.c: g_source_remove()ing an socket iochannel closes

View File

@ -515,8 +515,8 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
gchar *casefold;
gchar *normalized;
casefold = g_utf8_casefold (str);
normalized = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
casefold = g_utf8_casefold (str, -1);
normalized = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
g_free (casefold);
i = 1;
@ -586,15 +586,15 @@ g_date_prepare_to_parse (const gchar *str, GDateParseTokens *pt)
g_date_strftime (buf, 127, "%b", &d);
casefold = g_utf8_casefold (buf);
casefold = g_utf8_casefold (buf, -1);
g_free (short_month_names[i]);
short_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
short_month_names[i] = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
g_free (casefold);
g_date_strftime (buf, 127, "%B", &d);
casefold = g_utf8_casefold (buf);
casefold = g_utf8_casefold (buf, -1);
g_free (long_month_names[i]);
long_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
long_month_names[i] = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
g_free (casefold);
++i;

View File

@ -247,9 +247,12 @@ gboolean g_utf8_validate (const gchar *str,
/* Validate a Unicode character */
gboolean g_unichar_validate (gunichar ch);
gchar *g_utf8_strup (const gchar *str);
gchar *g_utf8_strdown (const gchar *str);
gchar *g_utf8_casefold (const gchar *str);
gchar *g_utf8_strup (const gchar *str,
gssize len);
gchar *g_utf8_strdown (const gchar *str,
gssize len);
gchar *g_utf8_casefold (const gchar *str,
gssize len);
typedef enum {
G_NORMALIZE_DEFAULT,
@ -263,11 +266,13 @@ typedef enum {
} GNormalizeMode;
gchar *g_utf8_normalize (const gchar *str,
gssize len,
GNormalizeMode mode);
gint g_utf8_collate (const gchar *str1,
const gchar *str2);
gchar *g_utf8_collate_key (const gchar *str);
gchar *g_utf8_collate_key (const gchar *str,
gssize len);
G_END_DECLS

View File

@ -27,6 +27,7 @@
#include "glib.h"
extern gunichar *_g_utf8_normalize_wc (const gchar *str,
gssize max_len,
GNormalizeMode mode);
/**
@ -52,8 +53,8 @@ g_utf8_collate (const gchar *str1,
#ifdef __STDC_ISO_10646__
gunichar *str1_norm = _g_utf8_normalize_wc (str1, G_NORMALIZE_ALL_COMPOSE);
gunichar *str2_norm = _g_utf8_normalize_wc (str2, G_NORMALIZE_ALL_COMPOSE);
gunichar *str1_norm = _g_utf8_normalize_wc (str1, -1, G_NORMALIZE_ALL_COMPOSE);
gunichar *str2_norm = _g_utf8_normalize_wc (str2, -1, G_NORMALIZE_ALL_COMPOSE);
result = wcscoll ((wchar_t *)str1_norm, (wchar_t *)str2_norm);
@ -63,8 +64,8 @@ g_utf8_collate (const gchar *str1,
#else /* !__STDC_ISO_10646__ */
const gchar *charset;
gchar *str1_norm = g_utf8_normalize (str1, G_NORMALIZE_ALL_COMPOSE);
gchar *str2_norm = g_utf8_normalize (str2, G_NORMALIZE_ALL_COMPOSE);
gchar *str1_norm = g_utf8_normalize (str1, -1, G_NORMALIZE_ALL_COMPOSE);
gchar *str2_norm = g_utf8_normalize (str2, -1, G_NORMALIZE_ALL_COMPOSE);
if (g_get_charset (&charset))
{
@ -148,7 +149,8 @@ utf8_encode (char *buf, wchar_t val)
/**
* g_utf8_collate_key:
* @str: a UTF-8 encoded string.
*
* @len: length of @str, in bytes, or -1 if @str is nul-terminated.
*
* Converts a string into a collation key that can be compared
* with other collation keys using strcmp(). The results of
* comparing the collation keys of two strings with strcmp()
@ -159,14 +161,15 @@ utf8_encode (char *buf, wchar_t val)
* be freed with g_free when you are done with it.
**/
gchar *
g_utf8_collate_key (const gchar *str)
g_utf8_collate_key (const gchar *str,
gssize len)
{
gchar *result;
size_t len;
#ifdef __STDC_ISO_10646__
gunichar *str_norm = _g_utf8_normalize_wc (str, G_NORMALIZE_ALL_COMPOSE);
gunichar *str_norm = _g_utf8_normalize_wc (str, len, G_NORMALIZE_ALL_COMPOSE);
wchar_t *result_wc;
size_t i;
size_t result_len = 0;
@ -194,7 +197,7 @@ g_utf8_collate_key (const gchar *str)
#else /* !__STDC_ISO_10646__ */
const gchar *charset;
gchar *str_norm = g_utf8_normalize (str, G_NORMALIZE_ALL_COMPOSE);
gchar *str_norm = g_utf8_normalize (str, len, G_NORMALIZE_ALL_COMPOSE);
if (g_get_charset (&charset))
{

View File

@ -218,6 +218,7 @@ combine (gunichar a,
gunichar *
_g_utf8_normalize_wc (const gchar *str,
gssize max_len,
GNormalizeMode mode)
{
gsize n_wc;
@ -231,7 +232,7 @@ _g_utf8_normalize_wc (const gchar *str,
n_wc = 0;
p = str;
while (*p)
while ((max_len < 0 || p < str + max_len) && *p)
{
gunichar wc = g_utf8_get_char (p);
@ -257,7 +258,7 @@ _g_utf8_normalize_wc (const gchar *str,
last_start = 0;
n_wc = 0;
p = str;
while (*p)
while ((max_len < 0 || p < str + max_len) && *p)
{
gunichar wc = g_utf8_get_char (p);
guchar *decomp;
@ -345,6 +346,7 @@ _g_utf8_normalize_wc (const gchar *str,
/**
* g_utf8_normalize:
* @str: a UTF-8 encoded string.
* @len: length of @str, in bytes, or -1 if @str is nul-terminated.
* @mode: the type of normalization to perform.
*
* Convert a string into canonical form, standardizing
@ -378,9 +380,10 @@ _g_utf8_normalize_wc (const gchar *str,
**/
gchar *
g_utf8_normalize (const gchar *str,
gssize len,
GNormalizeMode mode)
{
gunichar *result_wc = _g_utf8_normalize_wc (str, mode);
gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
gchar *result;
result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);

View File

@ -588,6 +588,7 @@ output_special_case (gchar *out_buffer,
static gsize
real_toupper (const gchar *str,
gssize max_len,
gchar *out_buffer,
LocaleType locale_type)
{
@ -596,7 +597,7 @@ real_toupper (const gchar *str,
gsize len = 0;
gboolean last_was_i = FALSE;
while (*p)
while ((max_len < 0 || p < str + max_len) && *p)
{
gunichar c = g_utf8_get_char (p);
int t = TYPE (c);
@ -693,8 +694,9 @@ real_toupper (const gchar *str,
}
/**
* g_ut8f_strup:
* @string: a UTF-8 encoded string
* g_utf8_strup:
* @str: a UTF-8 encoded string
* @len: length of @str, in bytes, or -1 if @str is nul-terminated.
*
* Converts all Unicode characters in the string that have a case
* to uppercase. The exact manner that this is done depends
@ -706,7 +708,8 @@ real_toupper (const gchar *str,
* converted to uppercase.
**/
gchar *
g_utf8_strup (const gchar *str)
g_utf8_strup (const gchar *str,
gssize len)
{
gsize len;
LocaleType locale_type;
@ -719,9 +722,9 @@ g_utf8_strup (const gchar *str)
/*
* We use a two pass approach to keep memory management simple
*/
len = real_toupper (str, NULL, locale_type);
len = real_toupper (str, len, NULL, locale_type);
result = g_malloc (len + 1);
real_toupper (str, result, locale_type);
real_toupper (str, len, result, locale_type);
result[len] = '\0';
return result;
@ -729,6 +732,7 @@ g_utf8_strup (const gchar *str)
static gsize
real_tolower (const gchar *str,
gssize max_len,
gchar *out_buffer,
LocaleType locale_type)
{
@ -736,7 +740,7 @@ real_tolower (const gchar *str,
const char *last = NULL;
gsize len = 0;
while (*p)
while ((max_len < 0 || p < str + max_len) && *p)
{
gunichar c = g_utf8_get_char (p);
int t = TYPE (c);
@ -807,8 +811,9 @@ real_tolower (const gchar *str,
}
/**
* g_ut8f_strdown:
* @string: a UTF-8 encoded string
* g_utf8_strdown:
* @str: a UTF-8 encoded string
* @len: length of @str, in bytes, or -1 if @str is nul-terminated.
*
* Converts all Unicode characters in the string that have a case
* to lowercase. The exact manner that this is done depends
@ -819,7 +824,8 @@ real_tolower (const gchar *str,
* converted to lowercase.
**/
gchar *
g_utf8_strdown (const gchar *str)
g_utf8_strdown (const gchar *str,
gssize len)
{
gsize len;
LocaleType locale_type;
@ -832,9 +838,9 @@ g_utf8_strdown (const gchar *str)
/*
* We use a two pass approach to keep memory management simple
*/
len = real_tolower (str, NULL, locale_type);
len = real_tolower (str, len, NULL, locale_type);
result = g_malloc (len + 1);
real_tolower (str, result, locale_type);
real_tolower (str, len, result, locale_type);
result[len] = '\0';
return result;
@ -843,6 +849,7 @@ g_utf8_strdown (const gchar *str)
/**
* g_utf8_casefold:
* @str: a UTF-8 encoded string
* @len: length of @str, in bytes, or -1 if @str is nul-terminated.
*
* Converts a string into a form that is independent of case. The
* result will not correspond to any particular case, but can be
@ -860,15 +867,16 @@ g_utf8_strdown (const gchar *str)
* case independent form of @str.
**/
gchar *
g_utf8_casefold (const gchar *str)
g_utf8_casefold (const gchar *str,
gssize len)
{
GString *result = g_string_new (NULL);
const char *p;
gchar buf[6];
int len;
int charlen;
p = str;
while (*p)
while ((len < 0 || p < str + len) && *p)
{
gunichar ch = g_utf8_get_char (p);
@ -896,8 +904,8 @@ g_utf8_casefold (const gchar *str)
}
ch = g_unichar_tolower (ch);
len = g_unichar_to_utf8 (ch, buf);
g_string_append_len (result, buf, len);
charlen = g_unichar_to_utf8 (ch, buf);
g_string_append_len (result, buf, charlen);
next:
p = g_utf8_next_char (p);

View File

@ -54,7 +54,7 @@ int main (int argc, char **argv)
test = strings[1];
convert = g_utf8_strup (test);
convert = g_utf8_strup (test, -1);
if (strcmp (convert, strings[4]) != 0)
{
fprintf (stderr, "Failure: toupper(%s) == %s, should have been %s\n",
@ -63,7 +63,7 @@ int main (int argc, char **argv)
}
g_free (convert);
convert = g_utf8_strdown (test);
convert = g_utf8_strdown (test, -1);
if (strcmp (convert, strings[2]) != 0)
{
fprintf (stderr, "Failure: tolower(%s) == %s, should have been %s\n",
@ -98,7 +98,7 @@ int main (int argc, char **argv)
test = strings[0];
convert = g_utf8_casefold (test);
convert = g_utf8_casefold (test, -1);
if (strcmp (convert, strings[1]) != 0)
{
fprintf (stderr, "Failure: casefold(%s) == '%s', should have been '%s'\n",

View File

@ -41,7 +41,7 @@ int main (int argc, char **argv)
if (argc == 2)
{
in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error);
in = g_io_channel_new_file (argv[1], "r", &error);
if (!in)
{
fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
@ -64,7 +64,7 @@ int main (int argc, char **argv)
str[term_pos] = '\0';
line.key = g_utf8_collate_key (str);
line.key = g_utf8_collate_key (str, -1);
line.str = str;
g_array_append_val (line_array, line);

View File

@ -67,7 +67,7 @@ test_form (int line,
{
for (i = 0; i < 3; i++)
{
char *result = g_utf8_normalize (c[i], mode);
char *result = g_utf8_normalize (c[i], -1, mode);
if (strcmp (result, c[expected]) != 0)
{
fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
@ -83,7 +83,7 @@ test_form (int line,
{
for (i = 3; i < 5; i++)
{
char *result = g_utf8_normalize (c[i], mode);
char *result = g_utf8_normalize (c[i], -1, mode);
if (strcmp (result, c[expected]) != 0)
{
fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
@ -144,7 +144,7 @@ int main (int argc, char **argv)
if (argc == 3)
line_to_do = atoi(argv[2]);
in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error);
in = g_io_channel_new_file (argv[1], "r", &error);
if (!in)
{
fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);