mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2024-12-24 14:36:13 +01:00
Add functions to insert a unichar as UTF-8, since this is reasonably
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com> * glib/gstring.c (g_string_insert/append/prepend_unichar): Add functions to insert a unichar as UTF-8, since this is reasonably common. * glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated): New function exposing iterating through possibly invalid/incomplete UTF-8 to unicode to the outside world. * glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument to be gssize, not gsize.
This commit is contained in:
parent
926af68d34
commit
f37c13dbde
13
ChangeLog
13
ChangeLog
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -1,3 +1,16 @@
|
|||||||
|
Fri Jul 13 19:20:06 2001 Owen Taylor <otaylor@redhat.com>
|
||||||
|
|
||||||
|
* glib/gstring.c (g_string_insert/append/prepend_unichar):
|
||||||
|
Add functions to insert a unichar as UTF-8, since this
|
||||||
|
is reasonably common.
|
||||||
|
|
||||||
|
* glib/gutf8.c glib/gunicode.h (g_utf8_get_char_validated):
|
||||||
|
New function exposing iterating through possibly invalid/incomplete
|
||||||
|
UTF-8 to unicode to the outside world.
|
||||||
|
|
||||||
|
* glib/gutf8.c (g_utf8_get_char_extended): Fix max_len argument
|
||||||
|
to be gssize, not gsize.
|
||||||
|
|
||||||
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
2001-07-17 Kjartan Maraas <kmaraas@gnome.org>
|
||||||
|
|
||||||
* configure.in: Added "nn" to ALL_LINGUAS.
|
* configure.in: Added "nn" to ALL_LINGUAS.
|
||||||
|
@ -465,6 +465,25 @@ g_string_append_c (GString *fstring,
|
|||||||
return g_string_insert_c (fstring, -1, c);
|
return g_string_insert_c (fstring, -1, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* g_string_append_unichar:
|
||||||
|
* @string: a #GString
|
||||||
|
* @wc: a Unicode character
|
||||||
|
*
|
||||||
|
* Converts a Unicode character into UTF-8, and appends it
|
||||||
|
* to the string.
|
||||||
|
*
|
||||||
|
* Return value: @string
|
||||||
|
**/
|
||||||
|
GString*
|
||||||
|
g_string_append_unichar (GString *string,
|
||||||
|
gunichar wc)
|
||||||
|
{
|
||||||
|
g_return_val_if_fail (string != NULL, NULL);
|
||||||
|
|
||||||
|
return g_string_insert_unichar (string, -1, wc);
|
||||||
|
}
|
||||||
|
|
||||||
GString*
|
GString*
|
||||||
g_string_prepend (GString *fstring,
|
g_string_prepend (GString *fstring,
|
||||||
const gchar *val)
|
const gchar *val)
|
||||||
@ -495,6 +514,25 @@ g_string_prepend_c (GString *fstring,
|
|||||||
return g_string_insert_c (fstring, 0, c);
|
return g_string_insert_c (fstring, 0, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* g_string_append_unichar:
|
||||||
|
* @string: a #GString
|
||||||
|
* @wc: a Unicode character
|
||||||
|
*
|
||||||
|
* Converts a Unicode character into UTF-8, and prepends it
|
||||||
|
* to the string.
|
||||||
|
*
|
||||||
|
* Return value: @string
|
||||||
|
**/
|
||||||
|
GString*
|
||||||
|
g_string_prepend_unichar (GString *string,
|
||||||
|
gunichar wc)
|
||||||
|
{
|
||||||
|
g_return_val_if_fail (string != NULL, NULL);
|
||||||
|
|
||||||
|
return g_string_insert_unichar (string, 0, wc);
|
||||||
|
}
|
||||||
|
|
||||||
GString*
|
GString*
|
||||||
g_string_insert (GString *fstring,
|
g_string_insert (GString *fstring,
|
||||||
gssize pos,
|
gssize pos,
|
||||||
@ -537,6 +575,36 @@ g_string_insert_c (GString *fstring,
|
|||||||
return fstring;
|
return fstring;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* g_string_insert_unichar:
|
||||||
|
* @string: a #Gstring
|
||||||
|
* @pos: the position at which to insert character, or -1 to
|
||||||
|
* append at the end of the string.
|
||||||
|
* @wc: a Unicode character
|
||||||
|
*
|
||||||
|
* Converts a Unicode character into UTF-8, and insert it
|
||||||
|
* into the string at the given position.
|
||||||
|
*
|
||||||
|
* Return value: @string
|
||||||
|
**/
|
||||||
|
GString*
|
||||||
|
g_string_insert_unichar (GString *string,
|
||||||
|
gssize pos,
|
||||||
|
gunichar wc)
|
||||||
|
{
|
||||||
|
gchar buf[6];
|
||||||
|
gint charlen;
|
||||||
|
|
||||||
|
/* We could be somewhat more efficient here by computing
|
||||||
|
* the length, adding the space, then converting into that
|
||||||
|
* space, by cut-and-pasting the internals of g_unichar_to_utf8.
|
||||||
|
*/
|
||||||
|
g_return_val_if_fail (string != NULL, NULL);
|
||||||
|
|
||||||
|
charlen = g_unichar_to_utf8 (wc, buf);
|
||||||
|
return g_string_insert_len (string, pos, buf, charlen);
|
||||||
|
}
|
||||||
|
|
||||||
GString*
|
GString*
|
||||||
g_string_erase (GString *fstring,
|
g_string_erase (GString *fstring,
|
||||||
gsize pos,
|
gsize pos,
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#define __G_STRING_H__
|
#define __G_STRING_H__
|
||||||
|
|
||||||
#include <glib/gtypes.h>
|
#include <glib/gtypes.h>
|
||||||
|
#include <glib/gunicode.h>
|
||||||
|
|
||||||
G_BEGIN_DECLS
|
G_BEGIN_DECLS
|
||||||
|
|
||||||
@ -79,10 +80,14 @@ GString* g_string_append_len (GString *string,
|
|||||||
gssize len);
|
gssize len);
|
||||||
GString* g_string_append_c (GString *string,
|
GString* g_string_append_c (GString *string,
|
||||||
gchar c);
|
gchar c);
|
||||||
|
GString* g_string_append_unichar (GString *string,
|
||||||
|
gunichar wc);
|
||||||
GString* g_string_prepend (GString *string,
|
GString* g_string_prepend (GString *string,
|
||||||
const gchar *val);
|
const gchar *val);
|
||||||
GString* g_string_prepend_c (GString *string,
|
GString* g_string_prepend_c (GString *string,
|
||||||
gchar c);
|
gchar c);
|
||||||
|
GString* g_string_prepend_unichar (GString *string,
|
||||||
|
gunichar wc);
|
||||||
GString* g_string_prepend_len (GString *string,
|
GString* g_string_prepend_len (GString *string,
|
||||||
const gchar *val,
|
const gchar *val,
|
||||||
gssize len);
|
gssize len);
|
||||||
@ -92,6 +97,9 @@ GString* g_string_insert (GString *string,
|
|||||||
GString* g_string_insert_c (GString *string,
|
GString* g_string_insert_c (GString *string,
|
||||||
gssize pos,
|
gssize pos,
|
||||||
gchar c);
|
gchar c);
|
||||||
|
GString* g_string_insert_unichar (GString *string,
|
||||||
|
gssize pos,
|
||||||
|
gunichar wc);
|
||||||
GString* g_string_erase (GString *string,
|
GString* g_string_erase (GString *string,
|
||||||
gsize pos,
|
gsize pos,
|
||||||
gsize len);
|
gsize len);
|
||||||
|
@ -167,7 +167,10 @@ GLIB_VAR char g_utf8_skip[256];
|
|||||||
|
|
||||||
#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
|
#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
|
||||||
|
|
||||||
gunichar g_utf8_get_char (const gchar *p);
|
gunichar g_utf8_get_char (const gchar *p);
|
||||||
|
gunichar g_utf8_get_char_validated (const gchar *p,
|
||||||
|
gssize max_len);
|
||||||
|
|
||||||
gchar* g_utf8_offset_to_pointer (const gchar *str,
|
gchar* g_utf8_offset_to_pointer (const gchar *str,
|
||||||
glong offset);
|
glong offset);
|
||||||
glong g_utf8_pointer_to_offset (const gchar *str,
|
glong g_utf8_pointer_to_offset (const gchar *str,
|
||||||
|
@ -872,8 +872,6 @@ g_utf8_casefold (const gchar *str,
|
|||||||
{
|
{
|
||||||
GString *result = g_string_new (NULL);
|
GString *result = g_string_new (NULL);
|
||||||
const char *p;
|
const char *p;
|
||||||
gchar buf[6];
|
|
||||||
int charlen;
|
|
||||||
|
|
||||||
p = str;
|
p = str;
|
||||||
while ((len < 0 || p < str + len) && *p)
|
while ((len < 0 || p < str + len) && *p)
|
||||||
@ -903,9 +901,7 @@ g_utf8_casefold (const gchar *str,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ch = g_unichar_tolower (ch);
|
g_string_append_unichar (result, g_unichar_tolower (ch));
|
||||||
charlen = g_unichar_to_utf8 (ch, buf);
|
|
||||||
g_string_append_len (result, buf, charlen);
|
|
||||||
|
|
||||||
next:
|
next:
|
||||||
p = g_utf8_next_char (p);
|
p = g_utf8_next_char (p);
|
||||||
|
36
glib/gutf8.c
36
glib/gutf8.c
@ -245,7 +245,9 @@ g_utf8_strlen (const gchar *p,
|
|||||||
*
|
*
|
||||||
* Convert a sequence of bytes encoded as UTF-8 to a unicode character.
|
* Convert a sequence of bytes encoded as UTF-8 to a unicode character.
|
||||||
* If @p does not point to a valid UTF-8 encoded character, results are
|
* If @p does not point to a valid UTF-8 encoded character, results are
|
||||||
* undefined.
|
* undefined. If you are not sure that the bytes are complete
|
||||||
|
* valid unicode characters, you should use g_utf8_get_char_validated()
|
||||||
|
* instead.
|
||||||
*
|
*
|
||||||
* Return value: the resulting character
|
* Return value: the resulting character
|
||||||
**/
|
**/
|
||||||
@ -550,7 +552,8 @@ g_utf8_strrchr (const char *p,
|
|||||||
* and return (gunichar)-2 on incomplete trailing character
|
* and return (gunichar)-2 on incomplete trailing character
|
||||||
*/
|
*/
|
||||||
static inline gunichar
|
static inline gunichar
|
||||||
g_utf8_get_char_extended (const gchar *p, gsize max_len)
|
g_utf8_get_char_extended (const gchar *p,
|
||||||
|
gssize max_len)
|
||||||
{
|
{
|
||||||
guint i, len;
|
guint i, len;
|
||||||
gunichar wc = (guchar) *p;
|
gunichar wc = (guchar) *p;
|
||||||
@ -625,6 +628,35 @@ g_utf8_get_char_extended (const gchar *p, gsize max_len)
|
|||||||
return wc;
|
return wc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* g_utf8_get_char_validated:
|
||||||
|
* @p: a pointer to unicode character encoded as UTF-8
|
||||||
|
* @max_len: the maximum number of bytes to read, or -1, for no maximum.
|
||||||
|
*
|
||||||
|
* Convert a sequence of bytes encoded as UTF-8 to a unicode character.
|
||||||
|
* This function checks for incomplete characters, for invalid characters
|
||||||
|
* such as characters that are out of the range of Unicode, and for
|
||||||
|
* overlong encodings of valid characters.
|
||||||
|
*
|
||||||
|
* Return value: the resulting character. If @p points to a partial
|
||||||
|
* sequence at the end of a string that could begin a valid character,
|
||||||
|
* returns (gunichar)-2; otherwise, if @p does not point to a valid
|
||||||
|
* UTF-8 encoded unicode character, returns (gunichar)-1.
|
||||||
|
**/
|
||||||
|
gunichar
|
||||||
|
g_utf8_get_char_validated (const gchar *p,
|
||||||
|
gssize max_len)
|
||||||
|
{
|
||||||
|
gunichar result = g_utf8_get_char_extended (p, max_len);
|
||||||
|
|
||||||
|
if (result & 0x80000000)
|
||||||
|
return result;
|
||||||
|
else if (!UNICODE_VALID (result))
|
||||||
|
return (gunichar)-1;
|
||||||
|
else
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* g_utf8_to_ucs4_fast:
|
* g_utf8_to_ucs4_fast:
|
||||||
* @str: a UTF-8 encoded string
|
* @str: a UTF-8 encoded string
|
||||||
|
@ -11,8 +11,6 @@ decode (const gchar *input)
|
|||||||
unsigned ch;
|
unsigned ch;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
GString *result = g_string_new (NULL);
|
GString *result = g_string_new (NULL);
|
||||||
int len;
|
|
||||||
char buf[6];
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@ -30,8 +28,7 @@ decode (const gchar *input)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
len = g_unichar_to_utf8 (ch, buf);
|
g_string_append_unichar (result, ch);
|
||||||
g_string_append_len (result, buf, len);
|
|
||||||
|
|
||||||
while (input[offset] && input[offset] != ' ')
|
while (input[offset] && input[offset] != ' ')
|
||||||
offset++;
|
offset++;
|
||||||
|
Loading…
Reference in New Issue
Block a user