glib/gunicode.h

/* gunicode.h - Unicode manipulation functions
 *
 *  Copyright (C) 1999, 2000 Tom Tromey
 *  Copyright 2000 Red Hat, Inc.
 *
 * The Gnome Library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * The Gnome Library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with the Gnome Library; see the file COPYING.LIB.  If not,
 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 *   Boston, MA 02111-1307, USA.
 */

#ifndef __GUNICODE_H__
#define __GUNICODE_H__

#include <stddef.h>      /* For size_t */

#ifdef __cplusplus
extern "C"
{
#endif

typedef guint32 gunichar;
typedef guint16 gunichar2;

/* These are the possible character classifications.  */
typedef enum {
  G_UNICODE_CONTROL,
  G_UNICODE_FORMAT,
  G_UNICODE_UNASSIGNED,
  G_UNICODE_PRIVATE_USE,
  G_UNICODE_SURROGATE,
  G_UNICODE_LOWERCASE_LETTER,
  G_UNICODE_MODIFIER_LETTER,
  G_UNICODE_OTHER_LETTER,
  G_UNICODE_TITLECASE_LETTER,
  G_UNICODE_UPPERCASE_LETTER,
  G_UNICODE_COMBINING_MARK,
  G_UNICODE_ENCLOSING_MARK,
  G_UNICODE_NON_SPACING_MARK,
  G_UNICODE_DECIMAL_NUMBER,
  G_UNICODE_LETTER_NUMBER,
  G_UNICODE_OTHER_NUMBER,
  G_UNICODE_CONNECT_PUNCTUATION,
  G_UNICODE_DASH_PUNCTUATION,
  G_UNICODE_CLOSE_PUNCTUATION,
  G_UNICODE_FINAL_PUNCTUATION,
  G_UNICODE_INITIAL_PUNCTUATION,
  G_UNICODE_OTHER_PUNCTUATION,
  G_UNICODE_OPEN_PUNCTUATION,
  G_UNICODE_CURRENCY_SYMBOL,
  G_UNICODE_MODIFIER_SYMBOL,
  G_UNICODE_MATH_SYMBOL,
  G_UNICODE_OTHER_SYMBOL,
  G_UNICODE_LINE_SEPARATOR,
  G_UNICODE_PARAGRAPH_SEPARATOR,
  G_UNICODE_SPACE_SEPARATOR
} GUnicodeType;

/* Returns TRUE if current locale uses UTF-8 charset.  If CHARSET is
 * not null, sets *CHARSET to the name of the current locale's
 * charset.  This value is statically allocated.
 */
gboolean g_get_charset (char **charset);

/* These are all analogs of the <ctype.h> functions.
 */
gboolean g_unichar_isalnum   (gunichar c);
gboolean g_unichar_isalpha   (gunichar c);
gboolean g_unichar_iscntrl   (gunichar c);
gboolean g_unichar_isdigit   (gunichar c);
gboolean g_unichar_isgraph   (gunichar c);
gboolean g_unichar_islower   (gunichar c);
gboolean g_unichar_isprint   (gunichar c);
gboolean g_unichar_ispunct   (gunichar c);
gboolean g_unichar_isspace   (gunichar c);
gboolean g_unichar_isupper   (gunichar c);
gboolean g_unichar_isxdigit  (gunichar c);
gboolean g_unichar_istitle   (gunichar c);
gboolean g_unichar_isdefined (gunichar c);
gboolean g_unichar_iswide    (gunichar c);

/* More <ctype.h> functions.  These convert between the three cases.
 * See the Unicode book to understand title case.  */
gunichar g_unichar_toupper (gunichar c);
gunichar g_unichar_tolower (gunichar c);
gunichar g_unichar_totitle (gunichar c);

/* If C is a digit (according to `g_unichar_isdigit'), then return its
   numeric value.  Otherwise return -1.  */
gint g_unichar_digit_value (gunichar c);

gint g_unichar_xdigit_value (gunichar c);

/* Return the Unicode character type of a given character.  */
GUnicodeType g_unichar_type (gunichar c);


/* Compute canonical ordering of a string in-place.  This rearranges
   decomposed characters in the string according to their combining
   classes.  See the Unicode manual for more information.  */
void g_unicode_canonical_ordering (gunichar *string,
				   size_t   len);

/* Compute canonical decomposition of a character.  Returns g_malloc()d
   string of Unicode characters.  RESULT_LEN is set to the resulting
   length of the string.  */
gunichar *g_unicode_canonical_decomposition (gunichar  ch,
					     size_t   *result_len);

/* Array of skip-bytes-per-initial character.
 * We prefix variable declarations so they can
 * properly get exported in windows dlls.
 */
#ifndef GLIB_VAR
#  ifdef G_OS_WIN32
#    ifdef GLIB_COMPILATION
#      define GLIB_VAR __declspec(dllexport)
#    else /* !GLIB_COMPILATION */
#      define GLIB_VAR extern __declspec(dllimport)
#    endif /* !GLIB_COMPILATION */
#  else /* !G_OS_WIN32 */
#    define GLIB_VAR extern
#  endif /* !G_OS_WIN32 */
#endif /* !GLIB_VAR */

GLIB_VAR char g_utf8_skip[256];

#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])

gunichar g_utf8_get_char          (const gchar *p);
gchar *  g_utf8_offset_to_pointer  (const gchar *str,
				    gint         offset);
gint     g_utf8_pointer_to_offset (const gchar *str,
				   const gchar *pos);
gchar *  g_utf8_prev_char         (const gchar *p);
gchar *  g_utf8_find_next_char    (const gchar *p,
				   const gchar *bound);
gchar *  g_utf8_find_prev_char    (const gchar *str,
				   const gchar *p);

gint g_utf8_strlen (const gchar *p,
		    gint         max);

/* Copies n characters from src to dest */
gchar *g_utf8_strncpy (gchar       *dest,
		       const gchar *src,
		       size_t       n);

/* Find the UTF-8 character corresponding to ch, in string p. These
   functions are equivalants to strchr and strrchr */

gchar *g_utf8_strchr  (const gchar *p,
		       gunichar     ch);
gchar *g_utf8_strrchr (const gchar *p,
		       gunichar     ch);

gunichar2 *g_utf8_to_utf16 (const gchar     *str,
			    gint             len);
gunichar * g_utf8_to_ucs4  (const gchar     *str,
			    gint             len);
gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
			    gint             len);
gchar *    g_utf16_to_utf8 (const gunichar2 *str,
			    gint             len);
gunichar * g_ucs4_to_utf16 (const gunichar  *str,
			    gint             len);
gchar *    g_ucs4_to_utf8  (const gunichar  *str,
			    gint             len);

/* Convert a single character into UTF-8. outbuf must have at
 * least 6 bytes of space. Returns the number of bytes in the
 * result.
 */
gint      g_unichar_to_utf8 (gunichar    c,
			     char       *outbuf);

#ifdef __cplusplus
}
#endif

#endif /* GUNICODE_H */
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00			`/* gunicode.h - Unicode manipulation functions`
			`*`
			`* Copyright (C) 1999, 2000 Tom Tromey`
			`* Copyright 2000 Red Hat, Inc.`
			`*`
			`* The Gnome Library is free software; you can redistribute it and/or`
applied patch from Andreas Persenius <ndap@swipnet.se> that updates the Wed Jul 26 12:59:31 2000 Tim Janik <timj@gtk.org> * *.[hc]: applied patch from Andreas Persenius <ndap@swipnet.se> that updates the license headers to the GNU Lesser General Public License, as well as updating the copyright year to 2000. 2000-07-26 13:02:02 +02:00			`* modify it under the terms of the GNU Lesser General Public License as`
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00			`* published by the Free Software Foundation; either version 2 of the`
			`* License, or (at your option) any later version.`
			`*`
			`* The Gnome Library is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
applied patch from Andreas Persenius <ndap@swipnet.se> that updates the Wed Jul 26 12:59:31 2000 Tim Janik <timj@gtk.org> * *.[hc]: applied patch from Andreas Persenius <ndap@swipnet.se> that updates the license headers to the GNU Lesser General Public License, as well as updating the copyright year to 2000. 2000-07-26 13:02:02 +02:00			`* Lesser General Public License for more details.`
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00			`*`
applied patch from Andreas Persenius <ndap@swipnet.se> that updates the Wed Jul 26 12:59:31 2000 Tim Janik <timj@gtk.org> * *.[hc]: applied patch from Andreas Persenius <ndap@swipnet.se> that updates the license headers to the GNU Lesser General Public License, as well as updating the copyright year to 2000. 2000-07-26 13:02:02 +02:00			`* You should have received a copy of the GNU Lesser General Public`
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00			`* License along with the Gnome Library; see the file COPYING.LIB. If not,`
			`* write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,`
			`* Boston, MA 02111-1307, USA.`
			`*/`

			`#ifndef __GUNICODE_H__`
			`#define __GUNICODE_H__`

Move string.h include into the .c file instead of where it was in the .h Sun Jul 9 21:21:46 2000 Owen Taylor <otaylor@redhat.com> * genums.[ch]: Move string.h include into the .c file instead of where it was in the .h file by mistake. Sun Jul 9 21:20:45 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h: Include stddef.h instead of stdlib.h 2000-07-10 03:28:03 +02:00			`#include <stddef.h> /* For size_t */`
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00
			`#ifdef __cplusplus`
			`extern "C"`
			`{`
			`#endif`

			`typedef guint32 gunichar;`
			`typedef guint16 gunichar2;`

			`/* These are the possible character classifications. */`
			`typedef enum {`
			`G_UNICODE_CONTROL,`
			`G_UNICODE_FORMAT,`
			`G_UNICODE_UNASSIGNED,`
			`G_UNICODE_PRIVATE_USE,`
			`G_UNICODE_SURROGATE,`
			`G_UNICODE_LOWERCASE_LETTER,`
			`G_UNICODE_MODIFIER_LETTER,`
			`G_UNICODE_OTHER_LETTER,`
			`G_UNICODE_TITLECASE_LETTER,`
			`G_UNICODE_UPPERCASE_LETTER,`
			`G_UNICODE_COMBINING_MARK,`
			`G_UNICODE_ENCLOSING_MARK,`
			`G_UNICODE_NON_SPACING_MARK,`
			`G_UNICODE_DECIMAL_NUMBER,`
			`G_UNICODE_LETTER_NUMBER,`
			`G_UNICODE_OTHER_NUMBER,`
			`G_UNICODE_CONNECT_PUNCTUATION,`
			`G_UNICODE_DASH_PUNCTUATION,`
			`G_UNICODE_CLOSE_PUNCTUATION,`
			`G_UNICODE_FINAL_PUNCTUATION,`
			`G_UNICODE_INITIAL_PUNCTUATION,`
			`G_UNICODE_OTHER_PUNCTUATION,`
			`G_UNICODE_OPEN_PUNCTUATION,`
			`G_UNICODE_CURRENCY_SYMBOL,`
			`G_UNICODE_MODIFIER_SYMBOL,`
			`G_UNICODE_MATH_SYMBOL,`
			`G_UNICODE_OTHER_SYMBOL,`
			`G_UNICODE_LINE_SEPARATOR,`
			`G_UNICODE_PARAGRAPH_SEPARATOR,`
			`G_UNICODE_SPACE_SEPARATOR`
			`} GUnicodeType;`

			`/* Returns TRUE if current locale uses UTF-8 charset. If CHARSET is`
			`* not null, sets *CHARSET to the name of the current locale's`
			`* charset. This value is statically allocated.`
			`*/`
			`gboolean g_get_charset (char **charset);`

			`/* These are all analogs of the <ctype.h> functions.`
			`*/`
			`gboolean g_unichar_isalnum (gunichar c);`
			`gboolean g_unichar_isalpha (gunichar c);`
			`gboolean g_unichar_iscntrl (gunichar c);`
Fix stray character Sun Jul 30 16:54:13 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h: Fix stray character * gutf8.c (g_unichar_to_utf8): Allow outbuf to be NULL, in which case we just compute the length. 2000-07-31 20:52:11 +02:00			`gboolean g_unichar_isdigit (gunichar c);`
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00			`gboolean g_unichar_isgraph (gunichar c);`
			`gboolean g_unichar_islower (gunichar c);`
			`gboolean g_unichar_isprint (gunichar c);`
			`gboolean g_unichar_ispunct (gunichar c);`
			`gboolean g_unichar_isspace (gunichar c);`
			`gboolean g_unichar_isupper (gunichar c);`
			`gboolean g_unichar_isxdigit (gunichar c);`
			`gboolean g_unichar_istitle (gunichar c);`
			`gboolean g_unichar_isdefined (gunichar c);`
			`gboolean g_unichar_iswide (gunichar c);`

			`/* More <ctype.h> functions. These convert between the three cases.`
			`* See the Unicode book to understand title case. */`
			`gunichar g_unichar_toupper (gunichar c);`
			`gunichar g_unichar_tolower (gunichar c);`
			`gunichar g_unichar_totitle (gunichar c);`

			/* If C is a digit (according to `g_unichar_isdigit'), then return its
			`numeric value. Otherwise return -1. */`
			`gint g_unichar_digit_value (gunichar c);`

			`gint g_unichar_xdigit_value (gunichar c);`

			`/* Return the Unicode character type of a given character. */`
			`GUnicodeType g_unichar_type (gunichar c);`



			`/* Compute canonical ordering of a string in-place. This rearranges`
			`decomposed characters in the string according to their combining`
			`classes. See the Unicode manual for more information. */`
			`void g_unicode_canonical_ordering (gunichar *string,`
			`size_t len);`

			`/* Compute canonical decomposition of a character. Returns g_malloc()d`
			`string of Unicode characters. RESULT_LEN is set to the resulting`
			`length of the string. */`
			`gunichar *g_unicode_canonical_decomposition (gunichar ch,`
			`size_t *result_len);`

Rename the GUTILS_C_VAR macro to GLIB_VAR. 2000-07-08 Tor Lillqvist <tml@iki.fi> * glib.h (GLIB_VAR): Rename the GUTILS_C_VAR macro to GLIB_VAR. * gunicode.h: Mark the g_utf8_skip array with GLIB_VAR. * glib.def: Add two missing entry points. 2000-07-08 14:40:09 +02:00			`/* Array of skip-bytes-per-initial character.`
			`* We prefix variable declarations so they can`
			`* properly get exported in windows dlls.`
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00			`*/`
Rename the GUTILS_C_VAR macro to GLIB_VAR. 2000-07-08 Tor Lillqvist <tml@iki.fi> * glib.h (GLIB_VAR): Rename the GUTILS_C_VAR macro to GLIB_VAR. * gunicode.h: Mark the g_utf8_skip array with GLIB_VAR. * glib.def: Add two missing entry points. 2000-07-08 14:40:09 +02:00			`#ifndef GLIB_VAR`
			`# ifdef G_OS_WIN32`
			`# ifdef GLIB_COMPILATION`
			`# define GLIB_VAR __declspec(dllexport)`
			`# else /* !GLIB_COMPILATION */`
			`# define GLIB_VAR extern __declspec(dllimport)`
			`# endif /* !GLIB_COMPILATION */`
			`# else /* !G_OS_WIN32 */`
			`# define GLIB_VAR extern`
			`# endif /* !G_OS_WIN32 */`
			`#endif /* !GLIB_VAR */`

			`GLIB_VAR char g_utf8_skip[256];`
Initial pass at adding unicode support functions. A few things still need Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode. 2000-06-21 18:11:21 +02:00
			`#define g_utf8_next_char(p) (char )((p) + g_utf8_skip[(guchar *)(p)])`

			`gunichar g_utf8_get_char (const gchar *p);`
			`gchar * g_utf8_offset_to_pointer (const gchar *str,`
			`gint offset);`
			`gint g_utf8_pointer_to_offset (const gchar *str,`
			`const gchar *pos);`
			`gchar * g_utf8_prev_char (const gchar *p);`
			`gchar * g_utf8_find_next_char (const gchar *p,`
			`const gchar *bound);`
			`gchar * g_utf8_find_prev_char (const gchar *str,`
			`const gchar *p);`

			`gint g_utf8_strlen (const gchar *p,`
			`gint max);`

			`/* Copies n characters from src to dest */`
			`gchar g_utf8_strncpy (gchar dest,`
			`const gchar *src,`
			`size_t n);`

			`/* Find the UTF-8 character corresponding to ch, in string p. These`
			`functions are equivalants to strchr and strrchr */`

			`gchar g_utf8_strchr (const gchar p,`
			`gunichar ch);`
			`gchar g_utf8_strrchr (const gchar p,`
			`gunichar ch);`

			`gunichar2 g_utf8_to_utf16 (const gchar str,`
			`gint len);`
			`gunichar * g_utf8_to_ucs4 (const gchar *str,`
			`gint len);`
			`gunichar * g_utf16_to_ucs4 (const gunichar2 *str,`
			`gint len);`
			`gchar * g_utf16_to_utf8 (const gunichar2 *str,`
			`gint len);`
			`gunichar * g_ucs4_to_utf16 (const gunichar *str,`
			`gint len);`
			`gchar * g_ucs4_to_utf8 (const gunichar *str,`
			`gint len);`

			`/* Convert a single character into UTF-8. outbuf must have at`
			`* least 6 bytes of space. Returns the number of bytes in the`
			`* result.`
			`*/`
			`gint g_unichar_to_utf8 (gunichar c,`
			`char *outbuf);`

			`#ifdef __cplusplus`
			`}`
			`#endif`

			`#endif /* GUNICODE_H */`