mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-04-14 11:38:05 +02:00
2008-12-07 Behdad Esfahbod <behdad@gnome.org> Bug 563156 – Document printing and scanning gunichar values * glib/tmpl/unicode.sgml: Document printing and scanning gunichar values. svn path=/trunk/; revision=7731
887 lines
18 KiB
Plaintext
887 lines
18 KiB
Plaintext
<!-- ##### SECTION Title ##### -->
|
|
Unicode Manipulation
|
|
|
|
<!-- ##### SECTION Short_Description ##### -->
|
|
functions operating on Unicode characters and UTF-8 strings
|
|
|
|
<!-- ##### SECTION Long_Description ##### -->
|
|
<para>
|
|
This section describes a number of functions for dealing with
|
|
Unicode characters and strings. There are analogues of the
|
|
traditional <filename>ctype.h</filename> character classification
|
|
and case conversion functions, UTF-8 analogues of some string utility
|
|
functions, functions to perform normalization, case conversion and
|
|
collation on UTF-8 strings and finally functions to convert between
|
|
the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
|
|
</para>
|
|
|
|
<para>
|
|
The implementations of the Unicode functions in GLib are based
|
|
on the Unicode Character Data tables, which are available from
|
|
<ulink url="http://www.unicode.org/">www.unicode.org</ulink>.
|
|
GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
|
|
GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1.
|
|
</para>
|
|
|
|
<!-- ##### SECTION See_Also ##### -->
|
|
<para>
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
|
<term>g_locale_to_utf8(), g_locale_from_utf8()</term>
|
|
<listitem><para>
|
|
Convenience functions for converting between UTF-8 and the locale encoding.
|
|
</para></listitem>
|
|
</varlistentry>
|
|
|
|
</variablelist>
|
|
</para>
|
|
|
|
<!-- ##### SECTION Stability_Level ##### -->
|
|
|
|
|
|
<!-- ##### TYPEDEF gunichar ##### -->
|
|
<para>
|
|
A type which can hold any UTF-32 or UCS-4 character code, also known
|
|
as a Unicode code point.
|
|
</para>
|
|
<para>
|
|
To print/scan values of this type to/from text you need to convert
|
|
to/from UTF-8, using g_utf32_to_utf8()/g_utf8_to_utf32().
|
|
</para>
|
|
<para>
|
|
To print/scan values of this type as integer, use
|
|
%G_GINT32_MODIFIER and/or %G_GUINT32_FORMAT.
|
|
</para>
|
|
<para>
|
|
The notation to express a Unicode code point in running text is as a
|
|
hexadecimal number with four to six digits and uppercase letters, prefixed
|
|
by the string "U+". Leading zeros are omitted, unless the code point would
|
|
have fewer than four hexadecimal digits.
|
|
For example, "U+0041 LATIN CAPITAL LETTER A".
|
|
To print a code point in the U+-notation, use the format string
|
|
"U+%04"G_GINT32_FORMAT"X".
|
|
To scan, use the format string "U+%06"G_GINT32_FORMAT"X".
|
|
<informalexample>
|
|
<programlisting>
|
|
gunichar c;
|
|
sscanf ("U+0041", "U+%06"G_GINT32_FORMAT"X", &c)
|
|
g_print ("Read U+%04"G_GINT32_FORMAT"X", c);
|
|
</programlisting>
|
|
</informalexample>
|
|
</para>
|
|
|
|
|
|
<!-- ##### TYPEDEF gunichar2 ##### -->
|
|
<para>
|
|
A type which can hold any UTF-16 code
|
|
point<footnote id="utf16_surrogate_pairs">UTF-16 also has so called
|
|
<firstterm>surrogate pairs</firstterm> to encode characters beyond the
|
|
BMP as pairs of 16bit numbers. Surrogate pairs cannot be stored in a
|
|
single gunichar2 field, but all GLib functions accepting gunichar2 arrays
|
|
will correctly interpret surrogate pairs.</footnote>.
|
|
</para>
|
|
<para>
|
|
To print/scan values of this type to/from text you need to convert
|
|
to/from UTF-8, using g_utf16_to_utf8()/g_utf8_to_utf16().
|
|
</para>
|
|
<para>
|
|
To print/scan values of this type as integer, use
|
|
%G_GINT16_MODIFIER and/or %G_GUINT16_FORMAT.
|
|
</para>
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_validate ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@ch:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isalnum ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isalpha ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_iscntrl ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isdefined ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isdigit ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isgraph ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_islower ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_ismark ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isprint ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_ispunct ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isspace ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_istitle ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isupper ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_isxdigit ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_iswide ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_iswide_cjk ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_iszerowidth ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_toupper ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_tolower ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_totitle ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_digit_value ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_xdigit_value ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### ENUM GUnicodeType ##### -->
|
|
<para>
|
|
These are the possible character classifications from the
|
|
Unicode specification.
|
|
See <ulink url="http://www.unicode.org/Public/UNIDATA/UnicodeData.html"
|
|
>http://www.unicode.org/Public/UNIDATA/UnicodeData.html</ulink>.
|
|
</para>
|
|
|
|
@G_UNICODE_CONTROL: General category "Other, Control" (Cc)
|
|
@G_UNICODE_FORMAT: General category "Other, Format" (Cf)
|
|
@G_UNICODE_UNASSIGNED: General category "Other, Not Assigned" (Cn)
|
|
@G_UNICODE_PRIVATE_USE: General category "Other, Private Use" (Co)
|
|
@G_UNICODE_SURROGATE: General category "Other, Surrogate" (Cs)
|
|
@G_UNICODE_LOWERCASE_LETTER: General category "Letter, Lowercase" (Ll)
|
|
@G_UNICODE_MODIFIER_LETTER: General category "Letter, Modifier" (Lm)
|
|
@G_UNICODE_OTHER_LETTER: General category "Letter, Other" (Lo)
|
|
@G_UNICODE_TITLECASE_LETTER: General category "Letter, Titlecase" (Lt)
|
|
@G_UNICODE_UPPERCASE_LETTER: General category "Letter, Uppercase" (Lu)
|
|
@G_UNICODE_COMBINING_MARK: General category "Mark, Spacing Combining" (Mc)
|
|
@G_UNICODE_ENCLOSING_MARK: General category "Mark, Enclosing" (Me)
|
|
@G_UNICODE_NON_SPACING_MARK: General category "Mark, Nonspacing" (Mn)
|
|
@G_UNICODE_DECIMAL_NUMBER: General category "Number, Decimal Digit" (Nd)
|
|
@G_UNICODE_LETTER_NUMBER: General category "Number, Letter" (Nl)
|
|
@G_UNICODE_OTHER_NUMBER: General category "Number, Other" (No)
|
|
@G_UNICODE_CONNECT_PUNCTUATION: General category "Punctuation, Connector" (Pc)
|
|
@G_UNICODE_DASH_PUNCTUATION: General category "Punctuation, Dash" (Pd)
|
|
@G_UNICODE_CLOSE_PUNCTUATION: General category "Punctuation, Close" (Pe)
|
|
@G_UNICODE_FINAL_PUNCTUATION: General category "Punctuation, Final quote" (Pf)
|
|
@G_UNICODE_INITIAL_PUNCTUATION: General category "Punctuation, Initial quote" (Pi)
|
|
@G_UNICODE_OTHER_PUNCTUATION: General category "Punctuation, Other" (Po)
|
|
@G_UNICODE_OPEN_PUNCTUATION: General category "Punctuation, Open" (Ps)
|
|
@G_UNICODE_CURRENCY_SYMBOL: General category "Symbol, Currency" (Sc)
|
|
@G_UNICODE_MODIFIER_SYMBOL: General category "Symbol, Modifier" (Sk)
|
|
@G_UNICODE_MATH_SYMBOL: General category "Symbol, Math" (Sm)
|
|
@G_UNICODE_OTHER_SYMBOL: General category "Symbol, Other" (So)
|
|
@G_UNICODE_LINE_SEPARATOR: General category "Separator, Line" (Zl)
|
|
@G_UNICODE_PARAGRAPH_SEPARATOR: General category "Separator, Paragraph" (Zp)
|
|
@G_UNICODE_SPACE_SEPARATOR: General category "Separator, Space" (Zs)
|
|
|
|
<!-- ##### FUNCTION g_unichar_type ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### ENUM GUnicodeBreakType ##### -->
|
|
<para>
|
|
These are the possible line break classifications.
|
|
The five Hangul types were added in Unicode 4.1, so, has been
|
|
introduced in GLib 2.10. Note that new types may be added in the future.
|
|
Applications should be ready to handle unknown values.
|
|
They may be regarded as %G_UNICODE_BREAK_UNKNOWN.
|
|
See <ulink url="http://www.unicode.org/unicode/reports/tr14/"
|
|
>http://www.unicode.org/unicode/reports/tr14/</ulink>.
|
|
</para>
|
|
|
|
@G_UNICODE_BREAK_MANDATORY: Mandatory Break (BK)
|
|
@G_UNICODE_BREAK_CARRIAGE_RETURN: Carriage Return (CR)
|
|
@G_UNICODE_BREAK_LINE_FEED: Line Feed (LF)
|
|
@G_UNICODE_BREAK_COMBINING_MARK: Attached Characters and Combining Marks (CM)
|
|
@G_UNICODE_BREAK_SURROGATE: Surrogates (SG)
|
|
@G_UNICODE_BREAK_ZERO_WIDTH_SPACE: Zero Width Space (ZW)
|
|
@G_UNICODE_BREAK_INSEPARABLE: Inseparable (IN)
|
|
@G_UNICODE_BREAK_NON_BREAKING_GLUE: Non-breaking ("Glue") (GL)
|
|
@G_UNICODE_BREAK_CONTINGENT: Contingent Break Opportunity (CB)
|
|
@G_UNICODE_BREAK_SPACE: Space (SP)
|
|
@G_UNICODE_BREAK_AFTER: Break Opportunity After (BA)
|
|
@G_UNICODE_BREAK_BEFORE: Break Opportunity Before (BB)
|
|
@G_UNICODE_BREAK_BEFORE_AND_AFTER: Break Opportunity Before and After (B2)
|
|
@G_UNICODE_BREAK_HYPHEN: Hyphen (HY)
|
|
@G_UNICODE_BREAK_NON_STARTER: Nonstarter (NS)
|
|
@G_UNICODE_BREAK_OPEN_PUNCTUATION: Opening Punctuation (OP)
|
|
@G_UNICODE_BREAK_CLOSE_PUNCTUATION: Closing Punctuation (CL)
|
|
@G_UNICODE_BREAK_QUOTATION: Ambiguous Quotation (QU)
|
|
@G_UNICODE_BREAK_EXCLAMATION: Exclamation/Interrogation (EX)
|
|
@G_UNICODE_BREAK_IDEOGRAPHIC: Ideographic (ID)
|
|
@G_UNICODE_BREAK_NUMERIC: Numeric (NU)
|
|
@G_UNICODE_BREAK_INFIX_SEPARATOR: Infix Separator (Numeric) (IS)
|
|
@G_UNICODE_BREAK_SYMBOL: Symbols Allowing Break After (SY)
|
|
@G_UNICODE_BREAK_ALPHABETIC: Ordinary Alphabetic and Symbol Characters (AL)
|
|
@G_UNICODE_BREAK_PREFIX: Prefix (Numeric) (PR)
|
|
@G_UNICODE_BREAK_POSTFIX: Postfix (Numeric) (PO)
|
|
@G_UNICODE_BREAK_COMPLEX_CONTEXT: Complex Content Dependent (South East Asian) (SA)
|
|
@G_UNICODE_BREAK_AMBIGUOUS: Ambiguous (Alphabetic or Ideographic) (AI)
|
|
@G_UNICODE_BREAK_UNKNOWN: Unknown (XX)
|
|
@G_UNICODE_BREAK_NEXT_LINE: Next Line (NL)
|
|
@G_UNICODE_BREAK_WORD_JOINER: Word Joiner (WJ)
|
|
@G_UNICODE_BREAK_HANGUL_L_JAMO: Hangul L Jamo (JL)
|
|
@G_UNICODE_BREAK_HANGUL_V_JAMO: Hangul V Jamo (JV)
|
|
@G_UNICODE_BREAK_HANGUL_T_JAMO: Hangul T Jamo (JT)
|
|
@G_UNICODE_BREAK_HANGUL_LV_SYLLABLE: Hangul LV Syllable (H2)
|
|
@G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE: Hangul LVT Syllable (H3)
|
|
|
|
<!-- ##### FUNCTION g_unichar_break_type ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_combining_class ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@uc:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unicode_canonical_ordering ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@string:
|
|
@len:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unicode_canonical_decomposition ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@ch:
|
|
@result_len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_get_mirror_char ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@ch:
|
|
@mirrored_ch:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### ENUM GUnicodeScript ##### -->
|
|
<para>
|
|
The #GUnicodeScript enumeration identifies different writing
|
|
systems. The values correspond to the names as defined in the
|
|
Unicode standard. The enumeration has been added in GLib 2.14,
|
|
and is interchangeable with #PangoScript.
|
|
Note that new types may be added in the future. Applications
|
|
should be ready to handle unknown values.
|
|
See <ulink
|
|
url="http://www.unicode.org/reports/tr24/">Unicode Standard Annex
|
|
#24: Script names</ulink>.
|
|
</para>
|
|
|
|
@G_UNICODE_SCRIPT_INVALID_CODE: a value never returned from g_unichar_get_script()
|
|
@G_UNICODE_SCRIPT_COMMON: a character used by multiple different scripts
|
|
@G_UNICODE_SCRIPT_INHERITED: a mark glyph that takes its script from the
|
|
base glyph to which it is attached
|
|
@G_UNICODE_SCRIPT_ARABIC: Arabic
|
|
@G_UNICODE_SCRIPT_ARMENIAN: Armenian
|
|
@G_UNICODE_SCRIPT_BENGALI: Bengali
|
|
@G_UNICODE_SCRIPT_BOPOMOFO: Bopomofo
|
|
@G_UNICODE_SCRIPT_CHEROKEE: Cherokee
|
|
@G_UNICODE_SCRIPT_COPTIC: Coptic
|
|
@G_UNICODE_SCRIPT_CYRILLIC: Cyrillic
|
|
@G_UNICODE_SCRIPT_DESERET: Deseret
|
|
@G_UNICODE_SCRIPT_DEVANAGARI: Devanagari
|
|
@G_UNICODE_SCRIPT_ETHIOPIC: Ethiopic
|
|
@G_UNICODE_SCRIPT_GEORGIAN: Georgian
|
|
@G_UNICODE_SCRIPT_GOTHIC: Gothic
|
|
@G_UNICODE_SCRIPT_GREEK: Greek
|
|
@G_UNICODE_SCRIPT_GUJARATI: Gujarati
|
|
@G_UNICODE_SCRIPT_GURMUKHI: Gurmukhi
|
|
@G_UNICODE_SCRIPT_HAN: Han
|
|
@G_UNICODE_SCRIPT_HANGUL: Hangul
|
|
@G_UNICODE_SCRIPT_HEBREW: Hebrew
|
|
@G_UNICODE_SCRIPT_HIRAGANA: Hiragana
|
|
@G_UNICODE_SCRIPT_KANNADA: Kannada
|
|
@G_UNICODE_SCRIPT_KATAKANA: Katakana
|
|
@G_UNICODE_SCRIPT_KHMER: Khmer
|
|
@G_UNICODE_SCRIPT_LAO: Lao
|
|
@G_UNICODE_SCRIPT_LATIN: Latin
|
|
@G_UNICODE_SCRIPT_MALAYALAM: Malayalam
|
|
@G_UNICODE_SCRIPT_MONGOLIAN: Mongolian
|
|
@G_UNICODE_SCRIPT_MYANMAR: Myanmar
|
|
@G_UNICODE_SCRIPT_OGHAM: Ogham
|
|
@G_UNICODE_SCRIPT_OLD_ITALIC: Old Italic
|
|
@G_UNICODE_SCRIPT_ORIYA: Oriya
|
|
@G_UNICODE_SCRIPT_RUNIC: Runic
|
|
@G_UNICODE_SCRIPT_SINHALA: Sinhala
|
|
@G_UNICODE_SCRIPT_SYRIAC: Syriac
|
|
@G_UNICODE_SCRIPT_TAMIL: Tamil
|
|
@G_UNICODE_SCRIPT_TELUGU: Telugu
|
|
@G_UNICODE_SCRIPT_THAANA: Thaana
|
|
@G_UNICODE_SCRIPT_THAI: Thai
|
|
@G_UNICODE_SCRIPT_TIBETAN: Tibetan
|
|
@G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL:
|
|
Canadian Aboriginal
|
|
@G_UNICODE_SCRIPT_YI: Yi
|
|
@G_UNICODE_SCRIPT_TAGALOG: Tagalog
|
|
@G_UNICODE_SCRIPT_HANUNOO: Hanunoo
|
|
@G_UNICODE_SCRIPT_BUHID: Buhid
|
|
@G_UNICODE_SCRIPT_TAGBANWA: Tagbanwa
|
|
@G_UNICODE_SCRIPT_BRAILLE: Braille
|
|
@G_UNICODE_SCRIPT_CYPRIOT: Cypriot
|
|
@G_UNICODE_SCRIPT_LIMBU: Limbu
|
|
@G_UNICODE_SCRIPT_OSMANYA: Osmanya
|
|
@G_UNICODE_SCRIPT_SHAVIAN: Shavian
|
|
@G_UNICODE_SCRIPT_LINEAR_B: Linear B
|
|
@G_UNICODE_SCRIPT_TAI_LE: Tai Le
|
|
@G_UNICODE_SCRIPT_UGARITIC: Ugaritic
|
|
@G_UNICODE_SCRIPT_NEW_TAI_LUE: New Tai Lue
|
|
@G_UNICODE_SCRIPT_BUGINESE: Buginese
|
|
@G_UNICODE_SCRIPT_GLAGOLITIC: Glagolitic
|
|
@G_UNICODE_SCRIPT_TIFINAGH: Tifinagh
|
|
@G_UNICODE_SCRIPT_SYLOTI_NAGRI: Syloti Nagri
|
|
@G_UNICODE_SCRIPT_OLD_PERSIAN: Old Persian
|
|
@G_UNICODE_SCRIPT_KHAROSHTHI: Kharoshthi
|
|
@G_UNICODE_SCRIPT_UNKNOWN: an unassigned code point
|
|
@G_UNICODE_SCRIPT_BALINESE: Balinese
|
|
@G_UNICODE_SCRIPT_CUNEIFORM: Cuneiform
|
|
@G_UNICODE_SCRIPT_PHOENICIAN: Phoenician
|
|
@G_UNICODE_SCRIPT_PHAGS_PA: Phags-pa
|
|
@G_UNICODE_SCRIPT_NKO: N'Ko
|
|
@G_UNICODE_SCRIPT_KAYAH_LI: Kayah Li. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_LEPCHA: Lepcha. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_REJANG: Rejang. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_SUNDANESE: Sundanese. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_SAURASHTRA: Saurashtra. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_CHAM: Cham. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_OL_CHIKI: Ol Chiki. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_VAI: Vai. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_CARIAN: Carian. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_LYCIAN: Lycian. Since 2.16.3
|
|
@G_UNICODE_SCRIPT_LYDIAN: Lydian. Since 2.16.3
|
|
|
|
<!-- ##### FUNCTION g_unichar_get_script ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@ch:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### MACRO g_utf8_next_char ##### -->
|
|
<para>
|
|
Skips to the next character in a UTF-8 string. The string must be
|
|
valid; this macro is as fast as possible, and has no error-checking.
|
|
You would use this macro to iterate over a string character by
|
|
character. The macro returns the start of the next UTF-8 character.
|
|
Before using this macro, use g_utf8_validate() to validate strings
|
|
that may contain invalid UTF-8.
|
|
</para>
|
|
|
|
@p: Pointer to the start of a valid UTF-8 character.
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_get_char ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@p:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_get_char_validated ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@p:
|
|
@max_len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_offset_to_pointer ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@offset:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_pointer_to_offset ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@pos:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_prev_char ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@p:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_find_next_char ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@p:
|
|
@end:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_find_prev_char ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@p:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_strlen ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@p:
|
|
@max:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_strncpy ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@dest:
|
|
@src:
|
|
@n:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_strchr ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@p:
|
|
@len:
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_strrchr ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@p:
|
|
@len:
|
|
@c:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_strreverse ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_validate ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@max_len:
|
|
@end:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_strup ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_strdown ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_casefold ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_normalize ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@mode:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### ENUM GNormalizeMode ##### -->
|
|
<para>
|
|
Defines how a Unicode string is transformed in a canonical
|
|
form, standardizing such issues as whether a character with an accent is
|
|
represented as a base character and combining accent or as a single precomposed
|
|
character. Unicode strings should generally be normalized before comparing them.
|
|
</para>
|
|
|
|
@G_NORMALIZE_DEFAULT: standardize differences that do not affect the
|
|
text content, such as the above-mentioned accent representation.
|
|
@G_NORMALIZE_NFD: another name for %G_NORMALIZE_DEFAULT.
|
|
@G_NORMALIZE_DEFAULT_COMPOSE: like %G_NORMALIZE_DEFAULT, but with composed
|
|
forms rather than a maximally decomposed form.
|
|
@G_NORMALIZE_NFC: another name for %G_NORMALIZE_DEFAULT_COMPOSE.
|
|
@G_NORMALIZE_ALL: beyond %G_NORMALIZE_DEFAULT also standardize the
|
|
"compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the
|
|
standard forms (in this case DIGIT THREE). Formatting information may be
|
|
lost but for most text operations such characters should be considered the
|
|
same.
|
|
@G_NORMALIZE_NFKD: another name for %G_NORMALIZE_ALL.
|
|
@G_NORMALIZE_ALL_COMPOSE: like %G_NORMALIZE_ALL, but with composed
|
|
forms rather than a maximally decomposed form.
|
|
@G_NORMALIZE_NFKC: another name for %G_NORMALIZE_ALL_COMPOSE.
|
|
|
|
<!-- ##### FUNCTION g_utf8_collate ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str1:
|
|
@str2:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_collate_key ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_collate_key_for_filename ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_to_utf16 ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@items_read:
|
|
@items_written:
|
|
@error:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_to_ucs4 ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@items_read:
|
|
@items_written:
|
|
@error:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf8_to_ucs4_fast ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@items_written:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf16_to_ucs4 ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@items_read:
|
|
@items_written:
|
|
@error:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_utf16_to_utf8 ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@items_read:
|
|
@items_written:
|
|
@error:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_ucs4_to_utf16 ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@items_read:
|
|
@items_written:
|
|
@error:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_ucs4_to_utf8 ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@str:
|
|
@len:
|
|
@items_read:
|
|
@items_written:
|
|
@error:
|
|
@Returns:
|
|
|
|
|
|
<!-- ##### FUNCTION g_unichar_to_utf8 ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
@c:
|
|
@outbuf:
|
|
@Returns:
|
|
|
|
|