mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-13 07:56:17 +01:00
0891c64816
Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode.
179 lines
5.7 KiB
C
179 lines
5.7 KiB
C
/* gunicode.h - Unicode manipulation functions
|
|
*
|
|
* Copyright (C) 1999, 2000 Tom Tromey
|
|
* Copyright 2000 Red Hat, Inc.
|
|
*
|
|
* The Gnome Library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public License as
|
|
* published by the Free Software Foundation; either version 2 of the
|
|
* License, or (at your option) any later version.
|
|
*
|
|
* The Gnome Library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with the Gnome Library; see the file COPYING.LIB. If not,
|
|
* write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 02111-1307, USA.
|
|
*/
|
|
|
|
#ifndef __GUNICODE_H__
|
|
#define __GUNICODE_H__
|
|
|
|
#include <stdlib.h> /* For size_t */
|
|
|
|
#ifdef __cplusplus
|
|
extern "C"
|
|
{
|
|
#endif
|
|
|
|
typedef guint32 gunichar;
|
|
typedef guint16 gunichar2;
|
|
|
|
/* These are the possible character classifications. */
|
|
typedef enum {
|
|
G_UNICODE_CONTROL,
|
|
G_UNICODE_FORMAT,
|
|
G_UNICODE_UNASSIGNED,
|
|
G_UNICODE_PRIVATE_USE,
|
|
G_UNICODE_SURROGATE,
|
|
G_UNICODE_LOWERCASE_LETTER,
|
|
G_UNICODE_MODIFIER_LETTER,
|
|
G_UNICODE_OTHER_LETTER,
|
|
G_UNICODE_TITLECASE_LETTER,
|
|
G_UNICODE_UPPERCASE_LETTER,
|
|
G_UNICODE_COMBINING_MARK,
|
|
G_UNICODE_ENCLOSING_MARK,
|
|
G_UNICODE_NON_SPACING_MARK,
|
|
G_UNICODE_DECIMAL_NUMBER,
|
|
G_UNICODE_LETTER_NUMBER,
|
|
G_UNICODE_OTHER_NUMBER,
|
|
G_UNICODE_CONNECT_PUNCTUATION,
|
|
G_UNICODE_DASH_PUNCTUATION,
|
|
G_UNICODE_CLOSE_PUNCTUATION,
|
|
G_UNICODE_FINAL_PUNCTUATION,
|
|
G_UNICODE_INITIAL_PUNCTUATION,
|
|
G_UNICODE_OTHER_PUNCTUATION,
|
|
G_UNICODE_OPEN_PUNCTUATION,
|
|
G_UNICODE_CURRENCY_SYMBOL,
|
|
G_UNICODE_MODIFIER_SYMBOL,
|
|
G_UNICODE_MATH_SYMBOL,
|
|
G_UNICODE_OTHER_SYMBOL,
|
|
G_UNICODE_LINE_SEPARATOR,
|
|
G_UNICODE_PARAGRAPH_SEPARATOR,
|
|
G_UNICODE_SPACE_SEPARATOR
|
|
} GUnicodeType;
|
|
|
|
/* Returns TRUE if current locale uses UTF-8 charset. If CHARSET is
|
|
* not null, sets *CHARSET to the name of the current locale's
|
|
* charset. This value is statically allocated.
|
|
*/
|
|
gboolean g_get_charset (char **charset);
|
|
|
|
/* These are all analogs of the <ctype.h> functions.
|
|
*/
|
|
gboolean g_unichar_isalnum (gunichar c);
|
|
gboolean g_unichar_isalpha (gunichar c);
|
|
gboolean g_unichar_iscntrl (gunichar c);
|
|
gboolean g_unicphar_isdigit (gunichar c);
|
|
gboolean g_unichar_isgraph (gunichar c);
|
|
gboolean g_unichar_islower (gunichar c);
|
|
gboolean g_unichar_isprint (gunichar c);
|
|
gboolean g_unichar_ispunct (gunichar c);
|
|
gboolean g_unichar_isspace (gunichar c);
|
|
gboolean g_unichar_isupper (gunichar c);
|
|
gboolean g_unichar_isxdigit (gunichar c);
|
|
gboolean g_unichar_istitle (gunichar c);
|
|
gboolean g_unichar_isdefined (gunichar c);
|
|
gboolean g_unichar_iswide (gunichar c);
|
|
|
|
/* More <ctype.h> functions. These convert between the three cases.
|
|
* See the Unicode book to understand title case. */
|
|
gunichar g_unichar_toupper (gunichar c);
|
|
gunichar g_unichar_tolower (gunichar c);
|
|
gunichar g_unichar_totitle (gunichar c);
|
|
|
|
/* If C is a digit (according to `g_unichar_isdigit'), then return its
|
|
numeric value. Otherwise return -1. */
|
|
gint g_unichar_digit_value (gunichar c);
|
|
|
|
gint g_unichar_xdigit_value (gunichar c);
|
|
|
|
/* Return the Unicode character type of a given character. */
|
|
GUnicodeType g_unichar_type (gunichar c);
|
|
|
|
|
|
|
|
/* Compute canonical ordering of a string in-place. This rearranges
|
|
decomposed characters in the string according to their combining
|
|
classes. See the Unicode manual for more information. */
|
|
void g_unicode_canonical_ordering (gunichar *string,
|
|
size_t len);
|
|
|
|
/* Compute canonical decomposition of a character. Returns g_malloc()d
|
|
string of Unicode characters. RESULT_LEN is set to the resulting
|
|
length of the string. */
|
|
gunichar *g_unicode_canonical_decomposition (gunichar ch,
|
|
size_t *result_len);
|
|
|
|
/* Array of skip-bytes-per-initial character
|
|
*/
|
|
extern char g_utf8_skip[256];
|
|
|
|
#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
|
|
|
|
gunichar g_utf8_get_char (const gchar *p);
|
|
gchar * g_utf8_offset_to_pointer (const gchar *str,
|
|
gint offset);
|
|
gint g_utf8_pointer_to_offset (const gchar *str,
|
|
const gchar *pos);
|
|
gchar * g_utf8_prev_char (const gchar *p);
|
|
gchar * g_utf8_find_next_char (const gchar *p,
|
|
const gchar *bound);
|
|
gchar * g_utf8_find_prev_char (const gchar *str,
|
|
const gchar *p);
|
|
|
|
gint g_utf8_strlen (const gchar *p,
|
|
gint max);
|
|
|
|
/* Copies n characters from src to dest */
|
|
gchar *g_utf8_strncpy (gchar *dest,
|
|
const gchar *src,
|
|
size_t n);
|
|
|
|
/* Find the UTF-8 character corresponding to ch, in string p. These
|
|
functions are equivalants to strchr and strrchr */
|
|
|
|
gchar *g_utf8_strchr (const gchar *p,
|
|
gunichar ch);
|
|
gchar *g_utf8_strrchr (const gchar *p,
|
|
gunichar ch);
|
|
|
|
gunichar2 *g_utf8_to_utf16 (const gchar *str,
|
|
gint len);
|
|
gunichar * g_utf8_to_ucs4 (const gchar *str,
|
|
gint len);
|
|
gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
|
|
gint len);
|
|
gchar * g_utf16_to_utf8 (const gunichar2 *str,
|
|
gint len);
|
|
gunichar * g_ucs4_to_utf16 (const gunichar *str,
|
|
gint len);
|
|
gchar * g_ucs4_to_utf8 (const gunichar *str,
|
|
gint len);
|
|
|
|
/* Convert a single character into UTF-8. outbuf must have at
|
|
* least 6 bytes of space. Returns the number of bytes in the
|
|
* result.
|
|
*/
|
|
gint g_unichar_to_utf8 (gunichar c,
|
|
char *outbuf);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* GUNICODE_H */
|