Use G_N_ELEMENTS rather than a custom macro.

Sun Jul  1 20:16:25 2001  Owen Taylor  <otaylor@redhat.com>

	* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
	rather than a custom macro.

	* glib/gen-unicode-tables.pl: Adapt to changes in table
	formats for Unicode 3.1

	* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
	  glib/gen-unicode-tables.pl: Add case conversion functions
        g_utf8_casefold, g_utf8_strup, g_utf8_strdown.

	* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
	  tests/gen-casemap-txt.pl tests/casefold.txt
	  tests/casemap.txt: Test cases for case conversion.

	* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
	  glib/gen-unicode-tables.pl: Add function to do Unicode
        normalization g_utf8_normalize().

	* tests/unicode-normalize.c: Test program for case conversion.

	* glib/gunicode.h glib/gunicollate.c: Add collation functions
	g_utf8_collate, g_utf8_collate_key.

	* test/unicode-collate.c: Test program for collation.

	* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
	variable.

	* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
	Make work with UTF-8 even if the locale isn't UTF-8 based.
	Still somewhat of broken, if the format string contains
	characters not representable in the current locale, will warn
	and not work.

	* glib/gdate.c: Use UTF-8 normalization and casefolding.
This commit is contained in:
Owen Taylor 2001-07-02 00:49:21 +00:00 committed by Owen Taylor
parent b37e7bbb53
commit 4f96a13cba
32 changed files with 10042 additions and 3119 deletions

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -1,3 +1,41 @@
Sun Jul 1 20:16:25 2001 Owen Taylor <otaylor@redhat.com>
* glib/guniprop.c (g_unichar_totitle): Use G_N_ELEMENTS
rather than a custom macro.
* glib/gen-unicode-tables.pl: Adapt to changes in table
formats for Unicode 3.1
* glib/gunicode.h glib/guniprop.c glib/gunichartables.h
glib/gen-unicode-tables.pl: Add case conversion functions
g_utf8_casefold, g_utf8_strup, g_utf8_strdown.
* tests/unicode-caseconv.c tests/gen-casefold-txt.pl
tests/gen-casemap-txt.pl tests/casefold.txt
tests/casemap.txt: Test cases for case conversion.
* glib/gunicode.h glib/gunidecomp.[ch] glib/gunicomp.h
glib/gen-unicode-tables.pl: Add function to do Unicode
normalization g_utf8_normalize().
* tests/unicode-normalize.c: Test program for case conversion.
* glib/gunicode.h glib/gunicollate.c: Add collation functions
g_utf8_collate, g_utf8_collate_key.
* test/unicode-collate.c: Test program for collation.
* glib/gdate.c (g_date_fill_parse_tokens): Fix uninitialized
variable.
* glib/gdate.c (g_date_strftime) docs/Changes-2.0.txt:
Make work with UTF-8 even if the locale isn't UTF-8 based.
Still somewhat of broken, if the format string contains
characters not representable in the current locale, will warn
and not work.
* glib/gdate.c: Use UTF-8 normalization and casefolding.
Sat Jun 30 16:03:16 2001 Owen Taylor <otaylor@redhat.com>
* glib/giowin32.c glib/giounix.c glib/gmain.[ch]:

View File

@ -27,4 +27,8 @@
are now safe against removal of the current item, not the next item.
It's not recommended to mutate the list in the callback to these
functions in any case.
functions in any case.
* GDate now works in UTF-8, not in the current locale. If you
want to use it with the encoding of the locale, you need
to convert strings using g_locale_to_utf8 first.

View File

@ -120,6 +120,14 @@ the results may be needed.
See #G_PRIORITY_DEFAULT, #G_PRIORITY_DEFAULT_IDLE, #G_PRIORITY_HIGH,
#G_PRIORITY_HIGH_IDLE, and #G_PRIORITY_LOW.
<!-- ##### FUNCTION g_main_context_get ##### -->
<para>
</para>
@thread:
@Returns:
<!-- ##### FUNCTION g_main_loop_destroy ##### -->
<para>

View File

@ -259,15 +259,6 @@ It is not used within GLib or GTK+.
</para>
<!-- ##### FUNCTION g_main_context_get ##### -->
<para>
</para>
@thread:
@Returns:
<!-- ##### FUNCTION g_main_context_default ##### -->
<para>
@ -644,7 +635,7 @@ the required condition has been met, and returns TRUE if so.
@prepare:
@check:
@dispatch:
@destroy:
@finalize:
<!-- ##### STRUCT GSourceCallbackFuncs ##### -->
<para>

View File

@ -49,6 +49,7 @@ libglib_1_3_la_SOURCES = \
gunibreak.h \
gunibreak.c \
gunichartables.h \
gunicollate.c \
gunidecomp.h \
gunidecomp.c \
gutils.c

View File

@ -512,16 +512,19 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
if (pt->num_ints < 3)
{
gchar lcstr[128];
strncpy (lcstr, str, 127);
g_strdown (lcstr);
gchar *casefold;
gchar *normalized;
casefold = g_utf8_casefold (str);
normalized = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
g_free (casefold);
i = 1;
while (i < 13)
{
if (long_month_names[i] != NULL)
{
const gchar *found = strstr (lcstr, long_month_names[i]);
const gchar *found = strstr (normalized, long_month_names[i]);
if (found != NULL)
{
@ -532,7 +535,7 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
if (short_month_names[i] != NULL)
{
const gchar *found = strstr (lcstr, short_month_names[i]);
const gchar *found = strstr (normalized, short_month_names[i]);
if (found != NULL)
{
@ -575,21 +578,24 @@ g_date_prepare_to_parse (const gchar *str, GDateParseTokens *pt)
while (i < 13)
{
gchar *casefold;
g_date_set_dmy (&d, 1, i, 1);
g_return_if_fail (g_date_valid (&d));
g_date_strftime (buf, 127, "%b", &d);
casefold = g_utf8_casefold (buf);
g_free (short_month_names[i]);
g_strdown (buf);
short_month_names[i] = g_strdup (buf);
short_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
g_free (casefold);
g_date_strftime (buf, 127, "%B", &d);
casefold = g_utf8_casefold (buf);
g_free (long_month_names[i]);
g_strdown (buf);
long_month_names[i] = g_strdup (buf);
long_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL);
g_free (casefold);
++i;
}
@ -1331,7 +1337,7 @@ g_date_strftime (gchar *s,
const GDate *d)
{
struct tm tm;
gsize retval;
const gchar *charset;
g_return_val_if_fail (d != NULL, 0);
g_return_val_if_fail (g_date_valid (d), 0);
@ -1340,14 +1346,100 @@ g_date_strftime (gchar *s,
g_return_val_if_fail (s != 0, 0);
g_date_to_struct_tm (d, &tm);
retval = strftime (s, slen, format, &tm);
if (retval == 0)
if (g_get_charset (&charset))
{
/* If retval == 0, the contents of s are undefined. We define
* them.
*/
s[0] = '\0';
gint retval = strftime (s, slen, format, &tm);
if (retval == 0)
{
/* If retval == 0, the contents of s are undefined. We define
* them.
*/
s[0] = '\0';
}
return retval;
}
else
{
gchar *locale_format;
gsize tmplen;
gchar *tmpbuf;
gsize tmpbufsize;
gsize convlen = 0;
gchar *convbuf;
GError *error = NULL;
locale_format = g_convert (format, -1 , "UTF-8", charset,
NULL, NULL, &error);
if (error)
{
g_warning (G_STRLOC "Error converting format to %s: %s\n",
charset, error->message);
g_error_free (error);
return 0;
}
tmpbufsize = MAX (128, strlen (locale_format) * 2);
while (TRUE)
{
tmpbuf = g_malloc (tmpbufsize + 1);
tmplen = strftime (tmpbuf, tmpbufsize + 1, locale_format, &tm);
if (tmplen == tmpbufsize + 1)
{
g_free (tmpbuf);
tmpbufsize *= 2;
}
else
break;
}
g_free (locale_format);
if (tmplen == 0)
{
/* If retval == 0, the contents of s are undefined. We define
* them.
*/
g_free (locale_format);
s[0] = '\0';
return 0;
}
convbuf = g_convert (tmpbuf, tmplen, "UTF-8", charset, NULL, &convlen, &error);
g_free (tmpbuf);
if (error)
{
g_warning (G_STRLOC "Error converting results of strftime to UTF-8: %s\n", error->message);
g_error_free (error);
}
else
{
/* Only copy whole characters into the buffer
*/
gchar *in = convbuf;
gchar *out = s;
gchar *end = s + slen - 1;
while (*in)
{
int len = g_utf8_skip[*(guchar *)in];
if (out + len < end)
{
out += len;
in += len;
}
else
break;
}
memcpy (s, convbuf, out - s);
*out = '\0';
}
g_free (convbuf);
return convlen;
}
return retval;
}

View File

@ -1,6 +1,7 @@
#! /usr/bin/perl -w
# Copyright (C) 1998, 1999 Tom Tromey
# Copyright (C) 2001 Red Hat Software
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -19,7 +20,7 @@
# gen-unicode-tables.pl - Generate tables for libunicode from Unicode data.
# See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
# Usage: gen-unicode-tables.pl [-decomp | -both] UNICODE-VERSION UnicodeData.txt LineBreak.txt
# Usage: gen-unicode-tables.pl [-decomp | -both] UNICODE-VERSION UnicodeData.txt LineBreak.txt SpecialCasing.txt CaseFolding.txt
# I consider the output of this program to be unrestricted. Use it as
# you will.
@ -29,7 +30,7 @@
# * For decomp table it might make sense to use a shift count other
# than 8. We could easily compute the perfect shift count.
use vars qw($CODE $NAME $CATEGORY $COMBINING_CLASSES $BIDI_CATEGORY $DECOMPOSITION $DECIMAL_VALUE $DIGIT_VALUE $NUMERIC_VALUE $MIRRORED $OLD_NAME $COMMENT $UPPER $LOWER $TITLE $BREAK_CODE $BREAK_CATEGORY $BREAK_NAME);
use vars qw($CODE $NAME $CATEGORY $COMBINING_CLASSES $BIDI_CATEGORY $DECOMPOSITION $DECIMAL_VALUE $DIGIT_VALUE $NUMERIC_VALUE $MIRRORED $OLD_NAME $COMMENT $UPPER $LOWER $TITLE $BREAK_CODE $BREAK_CATEGORY $BREAK_NAME $CASE_CODE $CASE_LOWER $CASE_TITLE $CASE_UPPER $CASE_CONDITION);
# Names of fields in Unicode data table.
$CODE = 0;
@ -51,7 +52,18 @@ $TITLE = 14;
# Names of fields in the line break table
$BREAK_CODE = 0;
$BREAK_PROPERTY = 1;
$BREAK_NAME = 2;
# Names of fields in the SpecialCasing table
$CASE_CODE = 0;
$CASE_LOWER = 1;
$CASE_TITLE = 2;
$CASE_UPPER = 3;
$CASE_CONDITION = 4;
# Names of fields in the CaseFolding table
$FOLDING_CODE = 0;
$FOLDING_STATUS = 1;
$FOLDING_MAPPING = 2;
# Map general category code onto symbolic name.
%mappings =
@ -128,23 +140,54 @@ $BREAK_NAME = 2;
%title_to_lower = ();
%title_to_upper = ();
# Maximum length of special-case strings
my $special_case_len = 0;
my @special_cases;
$do_decomp = 0;
$do_props = 1;
if ($ARGV[0] eq '-decomp')
if (@ARGV && $ARGV[0] eq '-decomp')
{
$do_decomp = 1;
$do_props = 0;
shift @ARGV;
}
elsif ($ARGV[0] eq '-both')
elsif (@ARGV && $ARGV[0] eq '-both')
{
$do_decomp = 1;
shift @ARGV;
}
if (@ARGV != 6) {
$0 =~ s@.*/@@;
die "Usage: $0 [-decomp | -both] UNICODE-VERSION UnicodeData.txt LineBreak.txt SpecialCasing.txt CaseFolding.txt CompositionExclusions.txt\n";
}
print "Creating decomp table\n" if ($do_decomp);
print "Creating property table\n" if ($do_props);
print "Composition exlusions from $ARGV[5]\n";
open (INPUT, "< $ARGV[5]") || exit 1;
while (<INPUT>) {
chop;
next if /^#/;
next if /^\s*$/;
s/\s*#.*//;
s/^\s*//;
s/\s*$//;
$composition_exclusions{hex($_)} = 1;
}
close INPUT;
print "Unicode data from $ARGV[1]\n";
open (INPUT, "< $ARGV[1]") || exit 1;
@ -189,6 +232,8 @@ while (<INPUT>)
$last_code = $code;
}
close INPUT;
@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
'', '', '', '');
for (++$last_code; $last_code < 0x10000; ++$last_code)
@ -207,54 +252,59 @@ open (INPUT, "< $ARGV[2]") || exit 1;
$last_code = -1;
while (<INPUT>)
{
my ($start_code, $end_code);
chop;
next if /^#/;
s/\s*#.*//;
@fields = split (';', $_, 30);
if ($#fields != 2)
if ($#fields != 1)
{
printf STDERR ("Entry for $fields[$CODE] has wrong number of fields (%d)\n", $#fields);
next;
}
$code = hex ($fields[$CODE]);
last if ($code > 0xFFFF); # ignore characters out of the basic plane
if ($code > $last_code + 1)
if ($fields[$CODE] =~ /([A-F0-9]{4})..([A-F0-9]{4})/)
{
# Found a gap.
if ($fields[$NAME] =~ /Last>/)
$start_code = hex ($1);
$end_code = hex ($2);
} else {
$start_code = $end_code = hex ($fields[$CODE]);
}
last if ($start_code > 0xFFFF); # FIXME ignore characters out of the basic plane
if ($start_code > $last_code + 1)
{
# The gap represents undefined characters. If assigned,
# they are AL, if not assigned, XX
for (++$last_code; $last_code < $start_code; ++$last_code)
{
# Fill the gap with the last character read,
# since this was a range specified in the char database
$gap_break_prop = $fields[$BREAK_PROPERTY];
for (++$last_code; $last_code < $code; ++$last_code)
{
$break_props[$last_code] = $gap_break_prop;
}
}
else
{
# The gap represents undefined characters. If assigned,
# they are AL, if not assigned, XX
for (++$last_code; $last_code < $code; ++$last_code)
{
if ($type[$last_code] eq 'Cn')
{
$break_props[$last_code] = 'XX';
}
else
{
$break_props[$last_code] = 'AL';
}
}
if ($type[$last_code] eq 'Cn')
{
$break_props[$last_code] = 'XX';
}
else
{
$break_props[$last_code] = 'AL';
}
}
}
$break_props[$code] = $fields[$BREAK_PROPERTY];
$last_code = $code;
for ($last_code = $start_code; $last_code <= $end_code; $last_code++)
{
$break_props[$last_code] = $fields[$BREAK_PROPERTY];
}
$last_code = $end_code;
}
close INPUT;
for (++$last_code; $last_code < 0x10000; ++$last_code)
{
if ($type[$last_code] eq 'Cn')
@ -270,10 +320,142 @@ for (++$last_code; $last_code < 0x10000; ++$last_code)
print STDERR "Last code is not 0xFFFF" if ($last_code != 0xFFFF);
&print_tables ($last_code)
if $do_props;
&print_decomp ($last_code)
if $do_decomp;
print "Reading special-casing table for case conversion\n";
open (INPUT, "< $ARGV[3]") || exit 1;
while (<INPUT>)
{
my $code;
chop;
next if /^#/;
next if /^\s*$/;
s/\s*#.*//;
@fields = split ('\s*;\s*', $_, 30);
$raw_code = $fields[$CASE_CODE];
$code = hex ($raw_code);
if ($#fields != 4 && $#fields != 5)
{
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
next;
}
if (!defined $type[$code])
{
printf STDERR "Special case for code point: $code, which has no defined type\n";
next;
}
if (defined $fields[5]) {
# Ignore conditional special cases - we'll handle them in code
next;
}
if ($type[$code] eq 'Lu')
{
(hex $fields[$CASE_UPPER] == $code) || die "$raw_code is Lu and UCD_Upper($raw_code) != $raw_code";
&add_special_case ($code, $value[$code],$fields[$CASE_LOWER], $fields[$CASE_TITLE]);
} elsif ($type[$code] eq 'Lt')
{
(hex $fields[$CASE_TITLE] == $code) || die "$raw_code is Lt and UCD_Title($raw_code) != $raw_code";
&add_special_case ($code, undef,$fields[$CASE_LOWER], $fields[$CASE_UPPER]);
} elsif ($type[$code] eq 'Ll')
{
(hex $fields[$CASE_LOWER] == $code) || die "$raw_code is Ll and UCD_Lower($raw_code) != $raw_code";
&add_special_case ($code, $value[$code],$fields[$CASE_UPPER], $fields[$CASE_TITLE]);
} else {
printf STDERR "Special case for non-alphabetic code point: $raw_code\n";
next;
}
}
close INPUT;
open (INPUT, "< $ARGV[4]") || exit 1;
my $casefoldlen = 0;
my @casefold;
while (<INPUT>)
{
my $code;
chop;
next if /^#/;
next if /^\s*$/;
s/\s*#.*//;
@fields = split ('\s*;\s*', $_, 30);
$raw_code = $fields[$FOLDING_CODE];
$code = hex ($raw_code);
next if $code > 0xffff; # FIXME!
if ($#fields != 3)
{
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
next;
}
next if ($fields[$FOLDING_STATUS] eq 'S');
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
# Check simple case
if (@values == 1 &&
!(defined $value[$code] && $value[$code] >= 0xd800 && $value[$code] < 0xdc00) &&
defined $type[$code]) {
my $lower;
if ($type[$code] eq 'Ll')
{
$lower = $code;
} elsif ($type[$code] eq 'Lt')
{
$lower = $title_to_lower{$code};
} elsif ($type[$code] eq 'Lu')
{
$lower = $value[$code];
} else {
$lower = $code;
}
if ($lower == $values[0]) {
next;
}
}
my $string = pack ("U*", @values);
if (1 + length $string > $casefoldlen) {
$casefoldlen = 1 + length $string;
}
push @casefold, [ $code, $string ];
}
close INPUT;
if ($do_props) {
&print_tables ($last_code)
}
if ($do_decomp) {
&print_decomp ($last_code);
&output_composition_table;
}
&print_line_break ($last_code);
@ -307,9 +489,17 @@ sub process_one
$cclass[$code] = $fields[$COMBINING_CLASSES];
# Handle decompositions.
if ($fields[$DECOMPOSITION] ne ''
&& $fields[$DECOMPOSITION] !~ /\<.*\>/)
if ($fields[$DECOMPOSITION] ne '')
{
if ($fields[$DECOMPOSITION] =~ s/\<.*\>\s*//) {
$decompose_compat[$code] = 1;
} else {
$decompose_compat[$code] = 0;
if (!exists $composition_exclusions{$code}) {
$compositions{$code} = $fields[$DECOMPOSITION];
}
}
$decompositions[$code] = $fields[$DECOMPOSITION];
}
}
@ -369,6 +559,10 @@ sub print_tables
}
print OUT "\n};\n\n";
#
# print title case table
#
# FIXME: type.
print OUT "static unsigned short title_table[][3] = {\n";
my ($item);
@ -383,6 +577,12 @@ sub print_tables
}
print OUT "\n};\n\n";
#
# And special case conversion table -- conversions that change length
#
&output_special_case_table (\*OUT);
&output_casefold_table (\*OUT);
print OUT "#endif /* CHARTABLES_H */\n";
close (OUT);
@ -494,6 +694,8 @@ sub print_decomp
print OUT "typedef struct\n{\n";
# FIXME: type.
print OUT " unsigned short ch;\n";
print OUT " unsigned char canon_offset;\n";
print OUT " unsigned char compat_offset;\n";
print OUT " unsigned char *expansion;\n";
print OUT "} decomposition;\n\n";
@ -507,16 +709,43 @@ sub print_decomp
print OUT ",\n"
if ! $first;
$first = 0;
printf OUT " { 0x%04x, \"", $count;
$bytes_out += 2;
foreach $iter (&expand_decomp ($count))
{
printf OUT "\\x%02x\\x%02x", $iter / 256, $iter & 0xff;
$bytes_out += 2;
my $canon_decomp;
my $compat_decomp;
if (!$decompose_compat[$count]) {
$canon_decomp = make_decomp ($count, 0);
}
$compat_decomp = make_decomp ($count, 1);
if (defined $canon_decomp && $compat_decomp eq $canon_decomp) {
undef $compat_decomp;
}
my $string = "";
my $canon_offset = 0xff;
my $compat_offset = 0xff;
if (defined $canon_decomp) {
$canon_offset = 0;
$string .= $canon_decomp;
}
if (defined $compat_decomp) {
if (defined $canon_decomp) {
$string .= "\\x00\\x00";
}
$compat_offset = (length $string) / 4;
$string .= $compat_decomp;
}
$bytes_out += (length $string) / 4; # "\x20"
# Only a single terminator because one is implied in the string.
print OUT "\\0\" }";
$bytes_out += 2;
printf OUT qq( { 0x%04x, %u, %u, "%s\\0" }),
$count, $canon_offset, $compat_offset, $string;
$bytes_out += 6;
}
}
print OUT "\n};\n\n";
@ -588,16 +817,17 @@ sub fetch_cclass
# Expand a character decomposition recursively.
sub expand_decomp
{
my ($code) = @_;
my ($code, $compat) = @_;
my ($iter, $val);
my (@result) = ();
foreach $iter (split (' ', $decompositions[$code]))
{
$val = hex ($iter);
if (defined $decompositions[$val])
if (defined $decompositions[$val] &&
($compat || !$decompose_compat[$val]))
{
push (@result, &expand_decomp ($val));
push (@result, &expand_decomp ($val, $compat));
}
else
{
@ -607,3 +837,309 @@ sub expand_decomp
return @result;
}
sub make_decomp
{
my ($code, $compat) = @_;
my $result = "";
foreach $iter (&expand_decomp ($code, $compat))
{
$result .= sprintf "\\x%02x\\x%02x", $iter / 256, $iter & 0xff;
}
$result;
}
# Generate special case data string from two fields
sub add_special_case
{
my ($code, $single, $field1, $field2) = @_;
@values = (defined $single ? $single : (),
(map { hex ($_) } split /\s+/, $field1),
0,
(map { hex ($_) } split /\s+/, $field2));
$result = "";
for $value (@values) {
$result .= sprintf ("\\x%02x\\x%02x", $value / 256, $value & 0xff);
}
$result .= "\\0";
if (2 * @values + 2 > $special_case_len) {
$special_case_len = 2 * @values + 2;
}
push @special_cases, $result;
#
# We encode special cases in the surrogate pair space
#
$value[$code] = 0xD800 + scalar(@special_cases) - 1;
}
sub output_special_case_table
{
my $out = shift;
print $out <<EOT;
/* Table of special cases for case conversion; each record contains
* First, the best single character mapping to lowercase if Lu,
* and to uppercase if Ll, followed by the output mapping for the two cases
* other than the case of the codepoint, in the order [Ll],[Lu],[Lt],
* separated and terminated by a double NUL.
*/
guchar special_case_table[][$special_case_len] = {
EOT
for $case (@special_cases) {
print $out qq( "$case",\n);
}
print $out <<EOT;
};
EOT
print STDERR "Generated ", ($special_case_len * scalar @special_cases), " bytes in special case table\n";
}
sub enumerate_ordered
{
my ($array) = @_;
my $n = 0;
for my $code (sort { $a <=> $b } keys %$array) {
if ($array->{$code} == 1) {
delete $array->{$code};
next;
}
$array->{$code} = $n++;
}
return $n;
}
sub output_composition_table
{
print STDERR "Generating composition table\n";
local ($bytes_out) = 0;
my %first;
my %second;
# First we need to go through and remove decompositions
# starting with a non-starter, and single-character
# decompositions. At the same time, record
# the first and second character of each decomposition
for $code (keys %compositions) {
@values = map { hex ($_) } split /\s+/, $compositions{$code};
if ($cclass[$values[0]]) {
delete $compositions{$code};
next;
}
if (@values == 1) {
delete $compositions{$code};
next;
}
if (@values != 2) {
die "$code has more than two elements in its decomposition!\n";
}
if (exists $first{$values[0]}) {
$first{$values[0]}++;
} else {
$first{$values[0]} = 1;
}
}
# Assign integer indicices, removing singletons
my $n_first = enumerate_ordered (\%first);
# Now record the second character if each (non-singleton) decomposition
for $code (keys %compositions) {
@values = map { hex ($_) } split /\s+/, $compositions{$code};
if (exists $first{$values[0]}) {
if (exists $second{$values[1]}) {
$second{$values[1]}++;
} else {
$second{$values[1]} = 1;
}
}
}
# Assign integer indices, removing duplicate
my $n_second = enumerate_ordered (\%second);
# Build reverse table
my @first_singletons;
my @second_singletons;
my %reverse;
for $code (keys %compositions) {
@values = map { hex ($_) } split /\s+/, $compositions{$code};
my $first = $first{$values[0]};
my $second = $second{$values[1]};
if (defined $first && defined $second) {
$reverse{"$first|$second"} = $code;
} elsif (!defined $first) {
push @first_singletons, [ $values[0], $values[1], $code ];
} else {
push @second_singletons, [ $values[1], $values[0], $code ];
}
}
@first_singletons = sort { $a->[0] <=> $b->[0] } @first_singletons;
@second_singletons = sort { $a->[0] <=> $b->[0] } @second_singletons;
my %vals;
open OUT, ">gunicomp.h" or die "Cannot open gunicomp.h: $!\n";
# Assign values in lookup table for all code points involved
my $total = 1;
my $last = 0;
printf OUT "#define COMPOSE_FIRST_START %d\n", $total;
for $code (keys %first) {
$vals{$code} = $first{$code} + $total;
$last = $code if $code > $last;
}
$total += $n_first;
$i = 0;
printf OUT "#define COMPOSE_FIRST_SINGLE_START %d\n", $total;
for $record (@first_singletons) {
my $code = $record->[0];
$vals{$code} = $i++ + $total;
$last = $code if $code > $last;
}
$total += @first_singletons;
printf OUT "#define COMPOSE_SECOND_START %d\n", $total;
for $code (keys %second) {
$vals{$code} = $second{$code} + $total;
$last = $code if $code > $last;
}
$total += $n_second;
$i = 0;
printf OUT "#define COMPOSE_SECOND_SINGLE_START %d\n\n", $total;
for $record (@second_singletons) {
my $code = $record->[0];
$vals{$code} = $i++ + $total;
$last = $code if $code > $last;
}
# Output lookup table
my @row;
for (my $count = 0; $count <= $last; $count += 256)
{
$row[$count / 256] = &print_row ($count, '(gushort *) ', 'gushort', 2,
'compose_page',
sub { exists $vals{$_[0]} ? $vals{$_[0]} : 0; });
}
print OUT "static unsigned short *compose_table[256] = {\n";
for (my $count = 0; $count <= $last; $count += 256)
{
print OUT ",\n" if $count > 0;
print OUT " ", $row[$count / 256];
$bytes_out += 4;
}
print OUT "\n};\n\n";
# Output first singletons
print OUT "gushort compose_first_single[][2] = {\n";
$i = 0;
for $record (@first_singletons) {
print OUT ",\n" if $i++ > 0;
printf OUT " { %#06x, %#06x }", $record->[1], $record->[2];
}
print OUT "\n};\n";
$bytes_out += @first_singletons * 4;
# Output second singletons
print OUT "gushort compose_second_single[][2] = {\n";
$i = 0;
for $record (@second_singletons) {
print OUT ",\n" if $i++ > 0;
printf OUT " { %#06x, %#06x }", $record->[1], $record->[2];
}
print OUT "\n};\n";
$bytes_out += @second_singletons * 4;
# Output array of composition pairs
print OUT <<EOT;
gushort compose_array[$n_first][$n_second] = {
EOT
for (my $i = 0; $i < $n_first; $i++) {
print OUT ",\n" if $i;
print OUT " { ";
for (my $j = 0; $j < $n_second; $j++) {
print OUT ", " if $j;
if (exists $reverse{"$i|$j"}) {
printf OUT "%#06x", $reverse{"$i|$j"};
} else {
print OUT " 0";
}
}
print OUT " }";
}
print OUT "\n";
print OUT <<EOT;
};
EOT
$bytes_out += $n_first * $n_second * 2;
printf STDERR "Generated %d bytes in compose tables\n", $bytes_out;
}
sub output_casefold_table
{
my $out = shift;
print $out <<EOT;
/* Table of casefolding cases that can't be derived by lowercasing
*/
struct {
guint16 ch;
gchar data[$casefoldlen];
} casefold_table[] = {
EOT
@casefold = sort { $a->[0] <=> $b->[0] } @casefold;
for $case (@casefold) {
$code = $case->[0];
$string = $case->[1];
print $out sprintf(qq({ %#04x, "$string" },\n), $code);
}
print $out <<EOT;
};
EOT
my $recordlen = (2+$casefoldlen+1) & ~1;
printf "Generated %d bytes for casefold table\n", $recordlen * @casefold;
}

View File

@ -706,7 +706,6 @@ g_io_channel_seek_position (GIOChannel* channel,
}
}
status = channel->funcs->io_seek (channel, offset, type, error);
if ((status == G_IO_STATUS_NORMAL) && (channel->use_buffer))

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@
#ifndef CHARTABLES_H
#define CHARTABLES_H
#define G_UNICODE_DATA_VERSION "3.0.1"
#define G_UNICODE_DATA_VERSION "3.1"
#define G_UNICODE_LAST_CHAR 0xffff
@ -487,11 +487,11 @@ static char page3[256] = {
G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER,
G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER,
G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER,
G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER,
G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED,
G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED,
G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED,
G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED,
G_UNICODE_UNASSIGNED
G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED
};
static char page4[256] = {
@ -2142,7 +2142,7 @@ static char page22[256] = {
G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER,
G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION,
G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION,
G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER,
G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER,
G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED,
G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED,
G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED,
@ -4982,7 +4982,7 @@ static unsigned short attrpage0[256] = {
0x0000, 0x0000, 0x0000, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5,
0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee,
0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0000,
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0000, 0x00c0,
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0xd800, 0x00c0,
0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9,
0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, 0x00d2,
0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000, 0x00d8, 0x00d9, 0x00da, 0x00db,
@ -4998,7 +4998,7 @@ static unsigned short attrpage1[256] = {
0x012c, 0x012f, 0x012e, 0x0069, 0x0049, 0x0133, 0x0132, 0x0135, 0x0134,
0x0137, 0x0136, 0x0000, 0x013a, 0x0139, 0x013c, 0x013b, 0x013e, 0x013d,
0x0140, 0x013f, 0x0142, 0x0141, 0x0144, 0x0143, 0x0146, 0x0145, 0x0148,
0x0147, 0x0000, 0x014b, 0x014a, 0x014d, 0x014c, 0x014f, 0x014e, 0x0151,
0x0147, 0xd80e, 0x014b, 0x014a, 0x014d, 0x014c, 0x014f, 0x014e, 0x0151,
0x0150, 0x0153, 0x0152, 0x0155, 0x0154, 0x0157, 0x0156, 0x0159, 0x0158,
0x015b, 0x015a, 0x015d, 0x015c, 0x015f, 0x015e, 0x0161, 0x0160, 0x0163,
0x0162, 0x0165, 0x0164, 0x0167, 0x0166, 0x0169, 0x0168, 0x016b, 0x016a,
@ -5016,7 +5016,7 @@ static unsigned short attrpage1[256] = {
0x01d0, 0x01cf, 0x01d2, 0x01d1, 0x01d4, 0x01d3, 0x01d6, 0x01d5, 0x01d8,
0x01d7, 0x01da, 0x01d9, 0x01dc, 0x01db, 0x018e, 0x01df, 0x01de, 0x01e1,
0x01e0, 0x01e3, 0x01e2, 0x01e5, 0x01e4, 0x01e7, 0x01e6, 0x01e9, 0x01e8,
0x01eb, 0x01ea, 0x01ed, 0x01ec, 0x01ef, 0x01ee, 0x0000, 0x01f3, 0x0000,
0x01eb, 0x01ea, 0x01ed, 0x01ec, 0x01ef, 0x01ee, 0xd811, 0x01f3, 0x0000,
0x01f1, 0x01f5, 0x01f4, 0x0195, 0x01bf, 0x01f9, 0x01f8, 0x01fb, 0x01fa,
0x01fd, 0x01fc, 0x01ff, 0x01fe
};
@ -5070,10 +5070,10 @@ static unsigned short attrpage3[256] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03ac,
0x0000, 0x03ad, 0x03ae, 0x03af, 0x0000, 0x03cc, 0x0000, 0x03cd, 0x03ce,
0x0000, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8,
0xd80f, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8,
0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, 0x03c1,
0x0000, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, 0x03ca,
0x03cb, 0x0386, 0x0388, 0x0389, 0x038a, 0x0000, 0x0391, 0x0392, 0x0393,
0x03cb, 0x0386, 0x0388, 0x0389, 0x038a, 0xd810, 0x0391, 0x0392, 0x0393,
0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c,
0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1, 0x03a3, 0x03a3, 0x03a4, 0x03a5,
0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f,
@ -5081,7 +5081,7 @@ static unsigned short attrpage3[256] = {
0x0000, 0x0000, 0x03db, 0x03da, 0x03dd, 0x03dc, 0x03df, 0x03de, 0x03e1,
0x03e0, 0x03e3, 0x03e2, 0x03e5, 0x03e4, 0x03e7, 0x03e6, 0x03e9, 0x03e8,
0x03eb, 0x03ea, 0x03ed, 0x03ec, 0x03ef, 0x03ee, 0x039a, 0x03a1, 0x03a3,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x03b8, 0x0395, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000
};
@ -5133,7 +5133,7 @@ static unsigned short attrpage5[256] = {
0x053c, 0x053d, 0x053e, 0x053f, 0x0540, 0x0541, 0x0542, 0x0543, 0x0544,
0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d,
0x054e, 0x054f, 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0xd808, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@ -5550,8 +5550,8 @@ static unsigned short attrpage30[256] = {
0x1e74, 0x1e77, 0x1e76, 0x1e79, 0x1e78, 0x1e7b, 0x1e7a, 0x1e7d, 0x1e7c,
0x1e7f, 0x1e7e, 0x1e81, 0x1e80, 0x1e83, 0x1e82, 0x1e85, 0x1e84, 0x1e87,
0x1e86, 0x1e89, 0x1e88, 0x1e8b, 0x1e8a, 0x1e8d, 0x1e8c, 0x1e8f, 0x1e8e,
0x1e91, 0x1e90, 0x1e93, 0x1e92, 0x1e95, 0x1e94, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x1e60, 0x0000, 0x0000, 0x0000, 0x0000, 0x1ea1, 0x1ea0,
0x1e91, 0x1e90, 0x1e93, 0x1e92, 0x1e95, 0x1e94, 0xd812, 0xd813, 0xd814,
0xd815, 0xd816, 0x1e60, 0x0000, 0x0000, 0x0000, 0x0000, 0x1ea1, 0x1ea0,
0x1ea3, 0x1ea2, 0x1ea5, 0x1ea4, 0x1ea7, 0x1ea6, 0x1ea9, 0x1ea8, 0x1eab,
0x1eaa, 0x1ead, 0x1eac, 0x1eaf, 0x1eae, 0x1eb1, 0x1eb0, 0x1eb3, 0x1eb2,
0x1eb5, 0x1eb4, 0x1eb7, 0x1eb6, 0x1eb9, 0x1eb8, 0x1ebb, 0x1eba, 0x1ebd,
@ -5574,27 +5574,27 @@ static unsigned short attrpage31[256] = {
0x1f25, 0x1f26, 0x1f27, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d,
0x1f3e, 0x1f3f, 0x1f30, 0x1f31, 0x1f32, 0x1f33, 0x1f34, 0x1f35, 0x1f36,
0x1f37, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000,
0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x0000, 0x0000, 0x0000,
0x1f59, 0x0000, 0x1f5b, 0x0000, 0x1f5d, 0x0000, 0x1f5f, 0x0000, 0x1f51,
0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x0000, 0x0000, 0xd817,
0x1f59, 0xd818, 0x1f5b, 0xd819, 0x1f5d, 0xd81a, 0x1f5f, 0x0000, 0x1f51,
0x0000, 0x1f53, 0x0000, 0x1f55, 0x0000, 0x1f57, 0x1f68, 0x1f69, 0x1f6a,
0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f60, 0x1f61, 0x1f62, 0x1f63,
0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca,
0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb,
0x0000, 0x0000, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e,
0x1f8f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1fa8, 0x1fa9,
0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1fb8, 0x1fb9, 0x0000, 0x1fbc,
0x0000, 0x0000, 0x0000, 0x0000, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x0000,
0x0000, 0x0399, 0x0000, 0x0000, 0x0000, 0x0000, 0x1fcc, 0x0000, 0x0000,
0x0000, 0x0000, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0x0000, 0x0000, 0x0000,
0x0000, 0x1fd8, 0x1fd9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0xd827, 0xd828, 0xd829, 0xd82a, 0xd82b, 0xd82c, 0xd82d,
0xd82e, 0xd82f, 0xd830, 0xd831, 0xd832, 0xd833, 0xd834, 0xd835, 0xd836,
0xd837, 0xd838, 0xd839, 0xd83a, 0xd83b, 0xd83c, 0xd83d, 0xd83e, 0xd83f,
0xd840, 0xd841, 0xd842, 0xd843, 0xd844, 0xd845, 0xd846, 0xd847, 0xd848,
0xd849, 0xd84a, 0xd84b, 0xd84c, 0xd84d, 0xd84e, 0xd84f, 0xd850, 0xd851,
0xd852, 0xd853, 0xd854, 0xd855, 0xd856, 0x1fb8, 0x1fb9, 0xd85d, 0xd857,
0xd85e, 0x0000, 0xd81b, 0xd863, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0xd858,
0x0000, 0x0399, 0x0000, 0x0000, 0x0000, 0xd85f, 0xd859, 0xd860, 0x0000,
0xd81c, 0xd864, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0xd85a, 0x0000, 0x0000,
0x0000, 0x1fd8, 0x1fd9, 0xd81d, 0xd81e, 0x0000, 0x0000, 0xd81f, 0xd820,
0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x0000, 0x0000, 0x0000, 0x0000, 0x1fe8,
0x1fe9, 0x0000, 0x0000, 0x0000, 0x1fec, 0x0000, 0x0000, 0x1fe0, 0x1fe1,
0x1f7a, 0x1f7b, 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x1ffc, 0x0000, 0x0000, 0x0000, 0x0000, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d,
0x0000, 0x0000, 0x0000, 0x0000
0x1fe9, 0xd821, 0xd822, 0xd823, 0x1fec, 0xd824, 0xd825, 0x1fe0, 0x1fe1,
0x1f7a, 0x1f7b, 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xd861,
0xd85b, 0xd862, 0x0000, 0xd826, 0xd865, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d,
0xd85c, 0x0000, 0x0000, 0x0000
};
static unsigned short attrpage33[256] = {
@ -5629,6 +5629,38 @@ static unsigned short attrpage33[256] = {
0x0000, 0x0000, 0x0000, 0x0000
};
static unsigned short attrpage251[256] = {
0xd801, 0xd802, 0xd803, 0xd804, 0xd805, 0xd806, 0xd807, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0xd809, 0xd80a, 0xd80b, 0xd80c, 0xd80d, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000
};
static unsigned short attrpage255[256] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001,
@ -5913,7 +5945,7 @@ static unsigned short *attr_table[256] = {
0x0000,
0x0000,
0x0000,
0x0000,
attrpage251,
0x0000,
0x0000,
0x0000,
@ -5954,4 +5986,284 @@ static unsigned short title_table[][3] = {
{ 0x1ffc, 0x0000, 0x1ff3 }
};
/* Table of special cases for case conversion; each record contains
* First, the best single character mapping to lowercase if Lu,
* and to uppercase if Ll, followed by the output mapping for the two cases
* other than the case of the codepoint, in the order [Ll],[Lu],[Lt],
* separated and terminated by a double NUL.
*/
guchar special_case_table[][18] = {
"\x00\x00\x00\x53\x00\x53\x00\x00\x00\x53\x00\x73\0",
"\x00\x00\x00\x46\x00\x46\x00\x00\x00\x46\x00\x66\0",
"\x00\x00\x00\x46\x00\x49\x00\x00\x00\x46\x00\x69\0",
"\x00\x00\x00\x46\x00\x4c\x00\x00\x00\x46\x00\x6c\0",
"\x00\x00\x00\x46\x00\x46\x00\x49\x00\x00\x00\x46\x00\x66\x00\x69\0",
"\x00\x00\x00\x46\x00\x46\x00\x4c\x00\x00\x00\x46\x00\x66\x00\x6c\0",
"\x00\x00\x00\x53\x00\x54\x00\x00\x00\x53\x00\x74\0",
"\x00\x00\x00\x53\x00\x54\x00\x00\x00\x53\x00\x74\0",
"\x00\x00\x05\x35\x05\x52\x00\x00\x05\x35\x05\x82\0",
"\x00\x00\x05\x44\x05\x46\x00\x00\x05\x44\x05\x76\0",
"\x00\x00\x05\x44\x05\x35\x00\x00\x05\x44\x05\x65\0",
"\x00\x00\x05\x44\x05\x3b\x00\x00\x05\x44\x05\x6b\0",
"\x00\x00\x05\x4e\x05\x46\x00\x00\x05\x4e\x05\x76\0",
"\x00\x00\x05\x44\x05\x3d\x00\x00\x05\x44\x05\x6d\0",
"\x00\x00\x02\xbc\x00\x4e\x00\x00\x02\xbc\x00\x4e\0",
"\x00\x00\x03\x99\x03\x08\x03\x01\x00\x00\x03\x99\x03\x08\x03\x01\0",
"\x00\x00\x03\xa5\x03\x08\x03\x01\x00\x00\x03\xa5\x03\x08\x03\x01\0",
"\x00\x00\x00\x4a\x03\x0c\x00\x00\x00\x4a\x03\x0c\0",
"\x00\x00\x00\x48\x03\x31\x00\x00\x00\x48\x03\x31\0",
"\x00\x00\x00\x54\x03\x08\x00\x00\x00\x54\x03\x08\0",
"\x00\x00\x00\x57\x03\x0a\x00\x00\x00\x57\x03\x0a\0",
"\x00\x00\x00\x59\x03\x0a\x00\x00\x00\x59\x03\x0a\0",
"\x00\x00\x00\x41\x02\xbe\x00\x00\x00\x41\x02\xbe\0",
"\x00\x00\x03\xa5\x03\x13\x00\x00\x03\xa5\x03\x13\0",
"\x00\x00\x03\xa5\x03\x13\x03\x00\x00\x00\x03\xa5\x03\x13\x03\x00\0",
"\x00\x00\x03\xa5\x03\x13\x03\x01\x00\x00\x03\xa5\x03\x13\x03\x01\0",
"\x00\x00\x03\xa5\x03\x13\x03\x42\x00\x00\x03\xa5\x03\x13\x03\x42\0",
"\x00\x00\x03\x91\x03\x42\x00\x00\x03\x91\x03\x42\0",
"\x00\x00\x03\x97\x03\x42\x00\x00\x03\x97\x03\x42\0",
"\x00\x00\x03\x99\x03\x08\x03\x00\x00\x00\x03\x99\x03\x08\x03\x00\0",
"\x00\x00\x03\x99\x03\x08\x03\x01\x00\x00\x03\x99\x03\x08\x03\x01\0",
"\x00\x00\x03\x99\x03\x42\x00\x00\x03\x99\x03\x42\0",
"\x00\x00\x03\x99\x03\x08\x03\x42\x00\x00\x03\x99\x03\x08\x03\x42\0",
"\x00\x00\x03\xa5\x03\x08\x03\x00\x00\x00\x03\xa5\x03\x08\x03\x00\0",
"\x00\x00\x03\xa5\x03\x08\x03\x01\x00\x00\x03\xa5\x03\x08\x03\x01\0",
"\x00\x00\x03\xa1\x03\x13\x00\x00\x03\xa1\x03\x13\0",
"\x00\x00\x03\xa5\x03\x42\x00\x00\x03\xa5\x03\x42\0",
"\x00\x00\x03\xa5\x03\x08\x03\x42\x00\x00\x03\xa5\x03\x08\x03\x42\0",
"\x00\x00\x03\xa9\x03\x42\x00\x00\x03\xa9\x03\x42\0",
"\x1f\x88\x1f\x08\x03\x99\x00\x00\x1f\x88\0",
"\x1f\x89\x1f\x09\x03\x99\x00\x00\x1f\x89\0",
"\x1f\x8a\x1f\x0a\x03\x99\x00\x00\x1f\x8a\0",
"\x1f\x8b\x1f\x0b\x03\x99\x00\x00\x1f\x8b\0",
"\x1f\x8c\x1f\x0c\x03\x99\x00\x00\x1f\x8c\0",
"\x1f\x8d\x1f\x0d\x03\x99\x00\x00\x1f\x8d\0",
"\x1f\x8e\x1f\x0e\x03\x99\x00\x00\x1f\x8e\0",
"\x1f\x8f\x1f\x0f\x03\x99\x00\x00\x1f\x8f\0",
"\x1f\x80\x00\x00\x1f\x08\x03\x99\0",
"\x1f\x81\x00\x00\x1f\x09\x03\x99\0",
"\x1f\x82\x00\x00\x1f\x0a\x03\x99\0",
"\x1f\x83\x00\x00\x1f\x0b\x03\x99\0",
"\x1f\x84\x00\x00\x1f\x0c\x03\x99\0",
"\x1f\x85\x00\x00\x1f\x0d\x03\x99\0",
"\x1f\x86\x00\x00\x1f\x0e\x03\x99\0",
"\x1f\x87\x00\x00\x1f\x0f\x03\x99\0",
"\x1f\x98\x1f\x28\x03\x99\x00\x00\x1f\x98\0",
"\x1f\x99\x1f\x29\x03\x99\x00\x00\x1f\x99\0",
"\x1f\x9a\x1f\x2a\x03\x99\x00\x00\x1f\x9a\0",
"\x1f\x9b\x1f\x2b\x03\x99\x00\x00\x1f\x9b\0",
"\x1f\x9c\x1f\x2c\x03\x99\x00\x00\x1f\x9c\0",
"\x1f\x9d\x1f\x2d\x03\x99\x00\x00\x1f\x9d\0",
"\x1f\x9e\x1f\x2e\x03\x99\x00\x00\x1f\x9e\0",
"\x1f\x9f\x1f\x2f\x03\x99\x00\x00\x1f\x9f\0",
"\x1f\x90\x00\x00\x1f\x28\x03\x99\0",
"\x1f\x91\x00\x00\x1f\x29\x03\x99\0",
"\x1f\x92\x00\x00\x1f\x2a\x03\x99\0",
"\x1f\x93\x00\x00\x1f\x2b\x03\x99\0",
"\x1f\x94\x00\x00\x1f\x2c\x03\x99\0",
"\x1f\x95\x00\x00\x1f\x2d\x03\x99\0",
"\x1f\x96\x00\x00\x1f\x2e\x03\x99\0",
"\x1f\x97\x00\x00\x1f\x2f\x03\x99\0",
"\x1f\xa8\x1f\x68\x03\x99\x00\x00\x1f\xa8\0",
"\x1f\xa9\x1f\x69\x03\x99\x00\x00\x1f\xa9\0",
"\x1f\xaa\x1f\x6a\x03\x99\x00\x00\x1f\xaa\0",
"\x1f\xab\x1f\x6b\x03\x99\x00\x00\x1f\xab\0",
"\x1f\xac\x1f\x6c\x03\x99\x00\x00\x1f\xac\0",
"\x1f\xad\x1f\x6d\x03\x99\x00\x00\x1f\xad\0",
"\x1f\xae\x1f\x6e\x03\x99\x00\x00\x1f\xae\0",
"\x1f\xaf\x1f\x6f\x03\x99\x00\x00\x1f\xaf\0",
"\x1f\xa0\x00\x00\x1f\x68\x03\x99\0",
"\x1f\xa1\x00\x00\x1f\x69\x03\x99\0",
"\x1f\xa2\x00\x00\x1f\x6a\x03\x99\0",
"\x1f\xa3\x00\x00\x1f\x6b\x03\x99\0",
"\x1f\xa4\x00\x00\x1f\x6c\x03\x99\0",
"\x1f\xa5\x00\x00\x1f\x6d\x03\x99\0",
"\x1f\xa6\x00\x00\x1f\x6e\x03\x99\0",
"\x1f\xa7\x00\x00\x1f\x6f\x03\x99\0",
"\x1f\xbc\x03\x91\x03\x99\x00\x00\x1f\xbc\0",
"\x1f\xb3\x00\x00\x03\x91\x03\x99\0",
"\x1f\xcc\x03\x97\x03\x99\x00\x00\x1f\xcc\0",
"\x1f\xc3\x00\x00\x03\x97\x03\x99\0",
"\x1f\xfc\x03\xa9\x03\x99\x00\x00\x1f\xfc\0",
"\x1f\xf3\x00\x00\x03\xa9\x03\x99\0",
"\x00\x00\x1f\xba\x03\x99\x00\x00\x1f\xba\x03\x45\0",
"\x00\x00\x03\x86\x03\x99\x00\x00\x03\x86\x03\x45\0",
"\x00\x00\x1f\xca\x03\x99\x00\x00\x1f\xca\x03\x45\0",
"\x00\x00\x03\x89\x03\x99\x00\x00\x03\x89\x03\x45\0",
"\x00\x00\x1f\xfa\x03\x99\x00\x00\x1f\xfa\x03\x45\0",
"\x00\x00\x03\x8f\x03\x99\x00\x00\x03\x8f\x03\x45\0",
"\x00\x00\x03\x91\x03\x42\x03\x99\x00\x00\x03\x91\x03\x42\x03\x45\0",
"\x00\x00\x03\x97\x03\x42\x03\x99\x00\x00\x03\x97\x03\x42\x03\x45\0",
"\x00\x00\x03\xa9\x03\x42\x03\x99\x00\x00\x03\xa9\x03\x42\x03\x45\0",
};
/* Table of casefolding cases that can't be derived by lowercasing
*/
struct {
guint16 ch;
gchar data[7];
} casefold_table[] = {
{ 0xb5, "μ" },
{ 0xdf, "ss" },
{ 0x131, "i" },
{ 0x149, "ʼn" },
{ 0x17f, "s" },
{ 0x1f0, "" },
{ 0x345, "ι" },
{ 0x390, "ΐ" },
{ 0x3b0, "ΰ" },
{ 0x3c2, "σ" },
{ 0x3d0, "β" },
{ 0x3d1, "θ" },
{ 0x3d5, "φ" },
{ 0x3d6, "π" },
{ 0x3f0, "κ" },
{ 0x3f1, "ρ" },
{ 0x3f2, "σ" },
{ 0x3f5, "ε" },
{ 0x587, "եւ" },
{ 0x1e96, "" },
{ 0x1e97, "" },
{ 0x1e98, "" },
{ 0x1e99, "" },
{ 0x1e9a, "aʾ" },
{ 0x1e9b, "" },
{ 0x1f50, "ὐ" },
{ 0x1f52, "ὒ" },
{ 0x1f54, "ὔ" },
{ 0x1f56, "ὖ" },
{ 0x1f80, "ἀι" },
{ 0x1f81, "ἁι" },
{ 0x1f82, "ἂι" },
{ 0x1f83, "ἃι" },
{ 0x1f84, "ἄι" },
{ 0x1f85, "ἅι" },
{ 0x1f86, "ἆι" },
{ 0x1f87, "ἇι" },
{ 0x1f88, "ἀι" },
{ 0x1f89, "ἁι" },
{ 0x1f8a, "ἂι" },
{ 0x1f8b, "ἃι" },
{ 0x1f8c, "ἄι" },
{ 0x1f8d, "ἅι" },
{ 0x1f8e, "ἆι" },
{ 0x1f8f, "ἇι" },
{ 0x1f90, "ἠι" },
{ 0x1f91, "ἡι" },
{ 0x1f92, "ἢι" },
{ 0x1f93, "ἣι" },
{ 0x1f94, "ἤι" },
{ 0x1f95, "ἥι" },
{ 0x1f96, "ἦι" },
{ 0x1f97, "ἧι" },
{ 0x1f98, "ἠι" },
{ 0x1f99, "ἡι" },
{ 0x1f9a, "ἢι" },
{ 0x1f9b, "ἣι" },
{ 0x1f9c, "ἤι" },
{ 0x1f9d, "ἥι" },
{ 0x1f9e, "ἦι" },
{ 0x1f9f, "ἧι" },
{ 0x1fa0, "ὠι" },
{ 0x1fa1, "ὡι" },
{ 0x1fa2, "ὢι" },
{ 0x1fa3, "ὣι" },
{ 0x1fa4, "ὤι" },
{ 0x1fa5, "ὥι" },
{ 0x1fa6, "ὦι" },
{ 0x1fa7, "ὧι" },
{ 0x1fa8, "ὠι" },
{ 0x1fa9, "ὡι" },
{ 0x1faa, "ὢι" },
{ 0x1fab, "ὣι" },
{ 0x1fac, "ὤι" },
{ 0x1fad, "ὥι" },
{ 0x1fae, "ὦι" },
{ 0x1faf, "ὧι" },
{ 0x1fb2, "ὰι" },
{ 0x1fb3, "αι" },
{ 0x1fb4, "άι" },
{ 0x1fb6, "ᾶ" },
{ 0x1fb7, "ᾶι" },
{ 0x1fbc, "αι" },
{ 0x1fbe, "ι" },
{ 0x1fc2, "ὴι" },
{ 0x1fc3, "ηι" },
{ 0x1fc4, "ήι" },
{ 0x1fc6, "ῆ" },
{ 0x1fc7, "ῆι" },
{ 0x1fcc, "ηι" },
{ 0x1fd2, "ῒ" },
{ 0x1fd3, "ΐ" },
{ 0x1fd6, "ῖ" },
{ 0x1fd7, "ῗ" },
{ 0x1fe2, "ῢ" },
{ 0x1fe3, "ΰ" },
{ 0x1fe4, "ῤ" },
{ 0x1fe6, "ῦ" },
{ 0x1fe7, "ῧ" },
{ 0x1ff2, "ὼι" },
{ 0x1ff3, "ωι" },
{ 0x1ff4, "ώι" },
{ 0x1ff6, "ῶ" },
{ 0x1ff7, "ῶι" },
{ 0x1ffc, "ωι" },
{ 0x2160, "" },
{ 0x2161, "" },
{ 0x2162, "" },
{ 0x2163, "" },
{ 0x2164, "" },
{ 0x2165, "" },
{ 0x2166, "" },
{ 0x2167, "" },
{ 0x2168, "" },
{ 0x2169, "" },
{ 0x216a, "" },
{ 0x216b, "" },
{ 0x216c, "" },
{ 0x216d, "" },
{ 0x216e, "" },
{ 0x216f, "" },
{ 0x24b6, "" },
{ 0x24b7, "" },
{ 0x24b8, "" },
{ 0x24b9, "" },
{ 0x24ba, "" },
{ 0x24bb, "" },
{ 0x24bc, "" },
{ 0x24bd, "" },
{ 0x24be, "" },
{ 0x24bf, "" },
{ 0x24c0, "" },
{ 0x24c1, "" },
{ 0x24c2, "" },
{ 0x24c3, "" },
{ 0x24c4, "" },
{ 0x24c5, "" },
{ 0x24c6, "" },
{ 0x24c7, "" },
{ 0x24c8, "" },
{ 0x24c9, "" },
{ 0x24ca, "" },
{ 0x24cb, "" },
{ 0x24cc, "" },
{ 0x24cd, "" },
{ 0x24ce, "" },
{ 0x24cf, "" },
{ 0xfb00, "ff" },
{ 0xfb01, "fi" },
{ 0xfb02, "fl" },
{ 0xfb03, "ffi" },
{ 0xfb04, "ffl" },
{ 0xfb05, "st" },
{ 0xfb06, "st" },
{ 0xfb13, "մն" },
{ 0xfb14, "մե" },
{ 0xfb15, "մի" },
{ 0xfb16, "վն" },
{ 0xfb17, "մխ" },
};
#endif /* CHARTABLES_H */

View File

@ -247,6 +247,28 @@ gboolean g_utf8_validate (const gchar *str,
/* Validate a Unicode character */
gboolean g_unichar_validate (gunichar ch);
gchar *g_utf8_strup (const gchar *str);
gchar *g_utf8_strdown (const gchar *str);
gchar *g_utf8_casefold (const gchar *str);
typedef enum {
G_NORMALIZE_DEFAULT,
G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
G_NORMALIZE_DEFAULT_COMPOSE,
G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
G_NORMALIZE_ALL,
G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
G_NORMALIZE_ALL_COMPOSE,
G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
} GNormalizeMode;
gchar *g_utf8_normalize (const gchar *str,
GNormalizeMode mode);
gint g_utf8_collate (const gchar *str1,
const gchar *str2);
gchar *g_utf8_collate_key (const gchar *str);
G_END_DECLS
#endif /* __G_UNICODE_H__ */

233
glib/gunicollate.c Normal file
View File

@ -0,0 +1,233 @@
/* gunicollate.c - Collation
*
* Copyright 2001 Red Hat, Inc.
*
* The Gnome Library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* The Gnome Library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with the Gnome Library; see the file COPYING.LIB. If not,
* write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include <locale.h>
#include <string.h>
#ifdef __STDC_ISO_10646__
#include <wchar.h>
#endif
#include "glib.h"
extern gunichar *_g_utf8_normalize_wc (const gchar *str,
GNormalizeMode mode);
/**
* g_utf8_collate:
* @str1: a UTF-8 encoded string
* @str2: a UTF-8 encoded string
*
* Compares two strings for ordering using the linguistically
* correct rules for the current locale. When sorting a large
* number of strings, it will be significantly faster to
* obtain collation keys with g_utf8_collate_key() and
* compare the keys with strcmp() when sorting instead of
* sorting the original strings.
*
* Return value: -1 if str1 compares before str2, 0 if they
* compare equal, 1 if str1 compares after str2.
**/
gint
g_utf8_collate (const gchar *str1,
const gchar *str2)
{
gint result;
#ifdef __STDC_ISO_10646__
gunichar *str1_norm = _g_utf8_normalize_wc (str1, G_NORMALIZE_ALL_COMPOSE);
gunichar *str2_norm = _g_utf8_normalize_wc (str2, G_NORMALIZE_ALL_COMPOSE);
result = wcscoll ((wchar_t *)str1_norm, (wchar_t *)str2_norm);
g_free (str1_norm);
g_free (str2_norm);
#else /* !__STDC_ISO_10646__ */
const gchar *charset;
gchar *str1_norm = g_utf8_normalize (str1, G_NORMALIZE_ALL_COMPOSE);
gchar *str2_norm = g_utf8_normalize (str2, G_NORMALIZE_ALL_COMPOSE);
if (g_get_charset (&charset))
{
result = strcoll (str1_norm, str2_norm);
}
else
{
gchar *str1_locale = g_convert (str1_norm, -1, "UTF-8", charset, NULL, NULL, NULL);
gchar *str2_locale = g_convert (str2_norm, -1, "UTF-8", charset, NULL, NULL, NULL);
if (str1_locale && str2_locale)
result = strcoll (str1_locale, str2_locale);
else if (str1_locale)
result = -1;
else if (str2_locale)
result = 1;
else
result = strcmp (str1_norm, str2_norm);
g_free (str1_locale);
g_free (str2_locale);
}
g_free (str1_norm);
g_free (str2_norm);
#endif /* __STDC_ISO_10646__ */
return result;
}
#ifdef __STDC_ISO_10646__
/* We need UTF-8 encoding of numbers to encode the weights if
* we are using wcsxfrm. However, we aren't encoding Unicode
* characters, so we can't simply use g_unichar_to_utf8.
*
* The following routine is taken (with modification) from GNU
* libc's strxfrm routine:
*
* Copyright (C) 1995-1999,2000,2001 Free Software Foundation, Inc.
* Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
*/
static inline int
utf8_encode (char *buf, wchar_t val)
{
int retval;
if (val < 0x80)
{
if (buf)
*buf++ = (char) val;
retval = 1;
}
else
{
int step;
for (step = 2; step < 6; ++step)
if ((val & (~(guint32)0 << (5 * step + 1))) == 0)
break;
retval = step;
if (buf)
{
*buf = (unsigned char) (~0xff >> step);
--step;
do
{
buf[step] = 0x80 | (val & 0x3f);
val >>= 6;
}
while (--step > 0);
*buf |= val;
}
}
return retval;
}
#endif /* __STDC_ISO_10646__ */
/**
* g_utf8_collate_key:
* @str: a UTF-8 encoded string.
*
* Converts a string into a collation key that can be compared
* with other collation keys using strcmp(). The results of
* comparing the collation keys of two strings with strcmp()
* will always be the same as comparing the two original
* keys with g_utf8_collate().
*
* Return value: a newly allocated string. This string should
* be freed with g_free when you are done with it.
**/
gchar *
g_utf8_collate_key (const gchar *str)
{
gchar *result;
size_t len;
#ifdef __STDC_ISO_10646__
gunichar *str_norm = _g_utf8_normalize_wc (str, G_NORMALIZE_ALL_COMPOSE);
wchar_t *result_wc;
size_t i;
size_t result_len = 0;
setlocale (LC_COLLATE, "");
len = wcsxfrm (NULL, (wchar_t *)str_norm, 0);
result_wc = g_new (wchar_t, len + 1);
wcsxfrm (result_wc, (wchar_t *)str_norm, len + 1);
for (i=0; i < len; i++)
result_len += utf8_encode (NULL, result_wc[i]);
result = g_malloc (result_len + 1);
result_len = 0;
for (i=0; i < len; i++)
result_len += utf8_encode (result + result_len, result_wc[i]);
result[result_len] = '\0';
g_free (result_wc);
g_free (str_norm);
return result;
#else /* !__STDC_ISO_10646__ */
const gchar *charset;
gchar *str_norm = g_utf8_normalize (str, G_NORMALIZE_ALL_COMPOSE);
if (g_get_charset (&charset))
{
len = strxfrm (NULL, str_norm, 0);
result = g_malloc (len + 1);
strxfrm (result, str_norm, len + 1);
}
else
{
gchar *str_locale = g_convert (str_norm, -1, "UTF-8", charset, NULL, NULL, NULL);
if (str_locale)
{
len = strxfrm (NULL, str_locale, 0);
result = g_malloc (len + 2);
result[0] = 'A';
strxfrm (result + 1, str_locale, len + 1);
g_free (str_locale);
}
else
{
len = strlen (str_norm);
result = g_malloc (len + 2);
result[0] = 'B';
memcpy (result + 1, str_norm, len);
result[len+1] = '\0';
}
}
g_free (str_norm);
#endif /* __STDC_ISO_10646__ */
return result;
}

667
glib/gunicomp.h Normal file
View File

@ -0,0 +1,667 @@
#define COMPOSE_FIRST_START 1
#define COMPOSE_FIRST_SINGLE_START 147
#define COMPOSE_SECOND_START 357
#define COMPOSE_SECOND_SINGLE_START 388
static gushort compose_page0[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 147, 148, 149, 0, 0, 1, 2, 3, 4, 5, 150, 6,
7, 8, 151, 9, 10, 11, 12, 13, 14, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0,
0, 0, 0, 0, 0, 24, 25, 26, 27, 28, 152, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 0, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 153, 154, 50, 155, 0, 0, 51, 0, 0, 0,
0, 156, 0, 0, 0, 0, 52, 53, 157, 0, 158, 0, 0, 0, 54, 0, 0, 0, 0, 0, 55, 0,
159, 160, 56, 161, 0, 0, 57, 0, 0, 0, 0, 162, 0, 0, 0, 0, 58, 59, 163, 0,
164, 0, 0, 0, 60, 0, 0, 0
};
static gushort compose_page1[256] = {
0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 64, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 65, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 165, 166, 0, 0, 0, 0,
167, 168, 0, 0, 0, 0, 0, 0, 169, 170, 171, 172, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 68, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 70, 0, 0, 0, 0, 0, 0, 174, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 175, 176, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page2[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 178, 179, 180, 0, 0, 0, 0, 181,
182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page3[256] = {
357, 358, 359, 360, 361, 0, 362, 363, 364, 365, 366, 367, 368, 0, 0, 369,
0, 370, 0, 371, 372, 0, 0, 0, 0, 0, 0, 373, 0, 0, 0, 0, 0, 0, 0, 374, 375,
376, 377, 378, 379, 0, 0, 0, 0, 380, 381, 0, 382, 383, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 384, 0, 0, 385, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, 72, 0, 73, 0, 74, 0,
0, 0, 0, 0, 75, 0, 184, 0, 0, 0, 76, 0, 0, 0, 77, 0, 0, 185, 0, 186, 0, 0,
78, 0, 0, 0, 79, 0, 80, 0, 81, 0, 0, 0, 0, 0, 82, 0, 83, 0, 0, 0, 84, 0, 0,
0, 85, 86, 87, 0, 0, 187, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page4[256] = {
0, 0, 0, 0, 0, 0, 188, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 189, 0, 90, 91,
190, 92, 0, 191, 0, 0, 0, 192, 0, 0, 0, 0, 93, 0, 0, 0, 193, 0, 0, 0, 194,
0, 195, 0, 0, 94, 0, 0, 196, 0, 95, 96, 197, 97, 0, 198, 0, 0, 0, 199, 0,
0, 0, 0, 98, 0, 0, 0, 200, 0, 0, 0, 201, 0, 202, 0, 0, 0, 0, 0, 0, 0, 0,
203, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 204, 205, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 206, 207, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
208, 209, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page6[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210, 0,
211, 0, 0, 0, 0, 0, 0, 0, 0, 388, 389, 390, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 213, 0, 0, 214, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page9[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 215, 0, 0, 0, 0, 0, 0, 0, 216,
0, 0, 217, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 391, 0, 0, 0, 0, 0, 0,
0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 392, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page11[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 393, 0, 0, 0, 0, 0, 0, 0, 0, 101, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 394, 395, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 218,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 396, 0, 0, 0, 0, 0,
0, 0, 102, 219, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 397, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page12[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 221, 0, 0, 398, 0, 0, 0,
103, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 399, 400, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page13[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 401, 0, 0, 0, 0, 0, 0, 0, 104, 223, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 402, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 403, 0, 0, 0, 0, 404, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105, 0, 0,
224, 0, 0, 405, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page16[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 225, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page30[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 226, 227, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 228, 229, 0, 0, 0, 0, 0, 0,
230, 231, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232, 233, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 234, 235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page31[256] = {
108, 109, 236, 237, 238, 239, 240, 241, 110, 111, 242, 243, 244, 245, 246,
247, 112, 113, 0, 0, 0, 0, 0, 0, 114, 115, 0, 0, 0, 0, 0, 0, 116, 117, 248,
249, 250, 251, 252, 253, 118, 119, 254, 255, 256, 257, 258, 259, 120, 121,
0, 0, 0, 0, 0, 0, 122, 123, 0, 0, 0, 0, 0, 0, 124, 125, 0, 0, 0, 0, 0, 0,
126, 127, 0, 0, 0, 0, 0, 0, 128, 129, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0,
0, 0, 131, 132, 260, 261, 262, 263, 264, 265, 133, 134, 266, 267, 268, 269,
270, 271, 272, 0, 0, 0, 273, 0, 0, 0, 0, 0, 0, 0, 274, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275, 0, 0, 0, 0, 0, 0, 0, 0, 135, 0, 0, 0, 0, 0, 0, 276, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 277, 0, 0, 0, 0, 0, 0, 0,
136, 0
};
static gushort compose_page33[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 278, 0, 279, 0,
280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281, 0, 282, 0, 283, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static gushort compose_page34[256] = {
0, 0, 0, 284, 0, 0, 0, 0, 285, 0, 0, 286, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 287, 0, 288, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 289, 0, 0, 0, 0, 0, 0, 290, 0, 291,
0, 0, 292, 0, 0, 0, 0, 293, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 294, 0, 0, 295, 296, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 297, 298,
0, 0, 299, 300, 0, 0, 301, 302, 303, 304, 0, 0, 0, 0, 305, 306, 0, 0, 307,
308, 0, 0, 0, 0, 0, 0, 0, 0, 0, 309, 310, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 311, 0, 0, 0, 0, 0, 312, 313, 0, 314, 0, 0, 0, 0, 0, 0, 315,
316, 317, 318, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
};
static gushort compose_page48[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319, 0, 0, 0,
0, 320, 0, 321, 0, 322, 0, 323, 0, 324, 0, 325, 0, 326, 0, 327, 0, 328, 0,
329, 0, 330, 0, 331, 0, 0, 332, 0, 333, 0, 334, 0, 0, 0, 0, 0, 0, 137, 0,
0, 138, 0, 0, 139, 0, 0, 140, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 386, 387, 0, 0, 335,
0, 0, 0, 0, 0, 0, 0, 0, 336, 0, 0, 0, 0, 337, 0, 338, 0, 339, 0, 340, 0,
341, 0, 342, 0, 343, 0, 344, 0, 345, 0, 346, 0, 347, 0, 348, 0, 0, 349, 0,
350, 0, 351, 0, 0, 0, 0, 0, 0, 142, 0, 0, 143, 0, 0, 144, 0, 0, 145, 0, 0,
146, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 352, 353,
354, 355, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 356, 0, 0
};
static unsigned short *compose_table[256] = {
compose_page0,
compose_page1,
compose_page2,
compose_page3,
compose_page4,
(gushort *) 0,
compose_page6,
(gushort *) 0,
(gushort *) 0,
compose_page9,
(gushort *) 0,
compose_page11,
compose_page12,
compose_page13,
(gushort *) 0,
(gushort *) 0,
compose_page16,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
compose_page30,
compose_page31,
(gushort *) 0,
compose_page33,
compose_page34,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
(gushort *) 0,
compose_page48
};
gushort compose_first_single[][2] = {
{ 0x0338, 0x226e },
{ 0x0338, 0x2260 },
{ 0x0338, 0x226f },
{ 0x0307, 0x1e1e },
{ 0x0302, 0x0134 },
{ 0x0307, 0x1e1f },
{ 0x0304, 0x01de },
{ 0x0301, 0x01fa },
{ 0x0301, 0x1e08 },
{ 0x0301, 0x1e2e },
{ 0x0304, 0x022a },
{ 0x0301, 0x01fe },
{ 0x0304, 0x01df },
{ 0x0301, 0x01fb },
{ 0x0301, 0x1e09 },
{ 0x0301, 0x1e2f },
{ 0x0304, 0x022b },
{ 0x0301, 0x01ff },
{ 0x0307, 0x1e64 },
{ 0x0307, 0x1e65 },
{ 0x0307, 0x1e66 },
{ 0x0307, 0x1e67 },
{ 0x0301, 0x1e78 },
{ 0x0301, 0x1e79 },
{ 0x0308, 0x1e7a },
{ 0x0308, 0x1e7b },
{ 0x0307, 0x1e9b },
{ 0x030c, 0x01ee },
{ 0x0304, 0x01ec },
{ 0x0304, 0x01ed },
{ 0x0304, 0x01e0 },
{ 0x0304, 0x01e1 },
{ 0x0306, 0x1e1c },
{ 0x0306, 0x1e1d },
{ 0x0304, 0x0230 },
{ 0x0304, 0x0231 },
{ 0x030c, 0x01ef },
{ 0x0314, 0x1fec },
{ 0x0345, 0x1fb4 },
{ 0x0345, 0x1fc4 },
{ 0x0345, 0x1ff4 },
{ 0x0308, 0x0407 },
{ 0x0301, 0x0403 },
{ 0x0308, 0x04de },
{ 0x0301, 0x040c },
{ 0x0308, 0x04e6 },
{ 0x0308, 0x04f4 },
{ 0x0308, 0x04f8 },
{ 0x0308, 0x04ec },
{ 0x0301, 0x0453 },
{ 0x0308, 0x04df },
{ 0x0301, 0x045c },
{ 0x0308, 0x04e7 },
{ 0x0308, 0x04f5 },
{ 0x0308, 0x04f9 },
{ 0x0308, 0x04ed },
{ 0x0308, 0x0457 },
{ 0x030f, 0x0476 },
{ 0x030f, 0x0477 },
{ 0x0308, 0x04da },
{ 0x0308, 0x04db },
{ 0x0308, 0x04ea },
{ 0x0308, 0x04eb },
{ 0x0654, 0x0624 },
{ 0x0654, 0x0626 },
{ 0x0654, 0x06c2 },
{ 0x0654, 0x06d3 },
{ 0x0654, 0x06c0 },
{ 0x093c, 0x0929 },
{ 0x093c, 0x0931 },
{ 0x093c, 0x0934 },
{ 0x0bd7, 0x0b94 },
{ 0x0bbe, 0x0bcb },
{ 0x0c56, 0x0c48 },
{ 0x0cd5, 0x0cc0 },
{ 0x0cd5, 0x0ccb },
{ 0x0d3e, 0x0d4b },
{ 0x0dca, 0x0ddd },
{ 0x102e, 0x1026 },
{ 0x0304, 0x1e38 },
{ 0x0304, 0x1e39 },
{ 0x0304, 0x1e5c },
{ 0x0304, 0x1e5d },
{ 0x0307, 0x1e68 },
{ 0x0307, 0x1e69 },
{ 0x0302, 0x1ec6 },
{ 0x0302, 0x1ec7 },
{ 0x0302, 0x1ed8 },
{ 0x0302, 0x1ed9 },
{ 0x0345, 0x1f82 },
{ 0x0345, 0x1f83 },
{ 0x0345, 0x1f84 },
{ 0x0345, 0x1f85 },
{ 0x0345, 0x1f86 },
{ 0x0345, 0x1f87 },
{ 0x0345, 0x1f8a },
{ 0x0345, 0x1f8b },
{ 0x0345, 0x1f8c },
{ 0x0345, 0x1f8d },
{ 0x0345, 0x1f8e },
{ 0x0345, 0x1f8f },
{ 0x0345, 0x1f92 },
{ 0x0345, 0x1f93 },
{ 0x0345, 0x1f94 },
{ 0x0345, 0x1f95 },
{ 0x0345, 0x1f96 },
{ 0x0345, 0x1f97 },
{ 0x0345, 0x1f9a },
{ 0x0345, 0x1f9b },
{ 0x0345, 0x1f9c },
{ 0x0345, 0x1f9d },
{ 0x0345, 0x1f9e },
{ 0x0345, 0x1f9f },
{ 0x0345, 0x1fa2 },
{ 0x0345, 0x1fa3 },
{ 0x0345, 0x1fa4 },
{ 0x0345, 0x1fa5 },
{ 0x0345, 0x1fa6 },
{ 0x0345, 0x1fa7 },
{ 0x0345, 0x1faa },
{ 0x0345, 0x1fab },
{ 0x0345, 0x1fac },
{ 0x0345, 0x1fad },
{ 0x0345, 0x1fae },
{ 0x0345, 0x1faf },
{ 0x0345, 0x1fb2 },
{ 0x0345, 0x1fc2 },
{ 0x0345, 0x1ff2 },
{ 0x0345, 0x1fb7 },
{ 0x0345, 0x1fc7 },
{ 0x0345, 0x1ff7 },
{ 0x0338, 0x219a },
{ 0x0338, 0x219b },
{ 0x0338, 0x21ae },
{ 0x0338, 0x21cd },
{ 0x0338, 0x21cf },
{ 0x0338, 0x21ce },
{ 0x0338, 0x2204 },
{ 0x0338, 0x2209 },
{ 0x0338, 0x220c },
{ 0x0338, 0x2224 },
{ 0x0338, 0x2226 },
{ 0x0338, 0x2241 },
{ 0x0338, 0x2244 },
{ 0x0338, 0x2247 },
{ 0x0338, 0x2249 },
{ 0x0338, 0x226d },
{ 0x0338, 0x2262 },
{ 0x0338, 0x2270 },
{ 0x0338, 0x2271 },
{ 0x0338, 0x2274 },
{ 0x0338, 0x2275 },
{ 0x0338, 0x2278 },
{ 0x0338, 0x2279 },
{ 0x0338, 0x2280 },
{ 0x0338, 0x2281 },
{ 0x0338, 0x22e0 },
{ 0x0338, 0x22e1 },
{ 0x0338, 0x2284 },
{ 0x0338, 0x2285 },
{ 0x0338, 0x2288 },
{ 0x0338, 0x2289 },
{ 0x0338, 0x22e2 },
{ 0x0338, 0x22e3 },
{ 0x0338, 0x22ac },
{ 0x0338, 0x22ad },
{ 0x0338, 0x22ae },
{ 0x0338, 0x22af },
{ 0x0338, 0x22ea },
{ 0x0338, 0x22eb },
{ 0x0338, 0x22ec },
{ 0x0338, 0x22ed },
{ 0x3099, 0x3094 },
{ 0x3099, 0x304c },
{ 0x3099, 0x304e },
{ 0x3099, 0x3050 },
{ 0x3099, 0x3052 },
{ 0x3099, 0x3054 },
{ 0x3099, 0x3056 },
{ 0x3099, 0x3058 },
{ 0x3099, 0x305a },
{ 0x3099, 0x305c },
{ 0x3099, 0x305e },
{ 0x3099, 0x3060 },
{ 0x3099, 0x3062 },
{ 0x3099, 0x3065 },
{ 0x3099, 0x3067 },
{ 0x3099, 0x3069 },
{ 0x3099, 0x309e },
{ 0x3099, 0x30f4 },
{ 0x3099, 0x30ac },
{ 0x3099, 0x30ae },
{ 0x3099, 0x30b0 },
{ 0x3099, 0x30b2 },
{ 0x3099, 0x30b4 },
{ 0x3099, 0x30b6 },
{ 0x3099, 0x30b8 },
{ 0x3099, 0x30ba },
{ 0x3099, 0x30bc },
{ 0x3099, 0x30be },
{ 0x3099, 0x30c0 },
{ 0x3099, 0x30c2 },
{ 0x3099, 0x30c5 },
{ 0x3099, 0x30c7 },
{ 0x3099, 0x30c9 },
{ 0x3099, 0x30f7 },
{ 0x3099, 0x30f8 },
{ 0x3099, 0x30f9 },
{ 0x3099, 0x30fa },
{ 0x3099, 0x30fe }
};
gushort compose_second_single[][2] = {
{ 0x0627, 0x0622 },
{ 0x0627, 0x0623 },
{ 0x0627, 0x0625 },
{ 0x09c7, 0x09cb },
{ 0x09c7, 0x09cc },
{ 0x0b47, 0x0b4b },
{ 0x0b47, 0x0b48 },
{ 0x0b47, 0x0b4c },
{ 0x0bc6, 0x0bca },
{ 0x0bc6, 0x0bcc },
{ 0x0cc6, 0x0cca },
{ 0x0cc6, 0x0cc7 },
{ 0x0cc6, 0x0cc8 },
{ 0x0d46, 0x0d4a },
{ 0x0d46, 0x0d4c },
{ 0x0dd9, 0x0dda },
{ 0x0dd9, 0x0ddc },
{ 0x0dd9, 0x0dde }
};
gushort compose_array[146][31] = {
{ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x0100, 0x0102, 0x0226, 0x00c4, 0x1ea2, 0x00c5, 0, 0x01cd, 0x0200, 0x0202, 0, 0, 0, 0x1ea0, 0, 0x1e00, 0, 0, 0x0104, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e04, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e06, 0, 0, 0, 0 },
{ 0, 0x0106, 0x0108, 0, 0, 0, 0x010a, 0, 0, 0, 0, 0x010c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00c7, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e0a, 0, 0, 0, 0, 0x010e, 0, 0, 0, 0, 0, 0x1e0c, 0, 0, 0, 0x1e10, 0, 0x1e12, 0, 0, 0x1e0e, 0, 0, 0, 0 },
{ 0x00c8, 0x00c9, 0x00ca, 0x1ebc, 0x0112, 0x0114, 0x0116, 0x00cb, 0x1eba, 0, 0, 0x011a, 0x0204, 0x0206, 0, 0, 0, 0x1eb8, 0, 0, 0, 0x0228, 0x0118, 0x1e18, 0, 0x1e1a, 0, 0, 0, 0, 0 },
{ 0, 0x01f4, 0x011c, 0, 0x1e20, 0x011e, 0x0120, 0, 0, 0, 0, 0x01e6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0122, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0x0124, 0, 0, 0, 0x1e22, 0x1e26, 0, 0, 0, 0x021e, 0, 0, 0, 0, 0, 0x1e24, 0, 0, 0, 0x1e28, 0, 0, 0x1e2a, 0, 0, 0, 0, 0, 0 },
{ 0x00cc, 0x00cd, 0x00ce, 0x0128, 0x012a, 0x012c, 0x0130, 0x00cf, 0x1ec8, 0, 0, 0x01cf, 0x0208, 0x020a, 0, 0, 0, 0x1eca, 0, 0, 0, 0, 0x012e, 0, 0, 0x1e2c, 0, 0, 0, 0, 0 },
{ 0, 0x1e30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01e8, 0, 0, 0, 0, 0, 0x1e32, 0, 0, 0, 0x0136, 0, 0, 0, 0, 0x1e34, 0, 0, 0, 0 },
{ 0, 0x0139, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x013d, 0, 0, 0, 0, 0, 0x1e36, 0, 0, 0, 0x013b, 0, 0x1e3c, 0, 0, 0x1e3a, 0, 0, 0, 0 },
{ 0, 0x1e3e, 0, 0, 0, 0, 0x1e40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x01f8, 0x0143, 0, 0x00d1, 0, 0, 0x1e44, 0, 0, 0, 0, 0x0147, 0, 0, 0, 0, 0, 0x1e46, 0, 0, 0, 0x0145, 0, 0x1e4a, 0, 0, 0x1e48, 0, 0, 0, 0 },
{ 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x014c, 0x014e, 0x022e, 0x00d6, 0x1ece, 0, 0x0150, 0x01d1, 0x020c, 0x020e, 0, 0, 0x01a0, 0x1ecc, 0, 0, 0, 0, 0x01ea, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x1e54, 0, 0, 0, 0, 0x1e56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x0154, 0, 0, 0, 0, 0x1e58, 0, 0, 0, 0, 0x0158, 0x0210, 0x0212, 0, 0, 0, 0x1e5a, 0, 0, 0, 0x0156, 0, 0, 0, 0, 0x1e5e, 0, 0, 0, 0 },
{ 0, 0x015a, 0x015c, 0, 0, 0, 0x1e60, 0, 0, 0, 0, 0x0160, 0, 0, 0, 0, 0, 0x1e62, 0, 0, 0x0218, 0x015e, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e6a, 0, 0, 0, 0, 0x0164, 0, 0, 0, 0, 0, 0x1e6c, 0, 0, 0x021a, 0x0162, 0, 0x1e70, 0, 0, 0x1e6e, 0, 0, 0, 0 },
{ 0x00d9, 0x00da, 0x00db, 0x0168, 0x016a, 0x016c, 0, 0x00dc, 0x1ee6, 0x016e, 0x0170, 0x01d3, 0x0214, 0x0216, 0, 0, 0x01af, 0x1ee4, 0x1e72, 0, 0, 0, 0x0172, 0x1e76, 0, 0x1e74, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0x1e7c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e7e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1e80, 0x1e82, 0x0174, 0, 0, 0, 0x1e86, 0x1e84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e8a, 0x1e8c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ef2, 0x00dd, 0x0176, 0x1ef8, 0x0232, 0, 0x1e8e, 0x0178, 0x1ef6, 0, 0, 0, 0, 0, 0, 0, 0, 0x1ef4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x0179, 0x1e90, 0, 0, 0, 0x017b, 0, 0, 0, 0, 0x017d, 0, 0, 0, 0, 0, 0x1e92, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e94, 0, 0, 0, 0 },
{ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x0101, 0x0103, 0x0227, 0x00e4, 0x1ea3, 0x00e5, 0, 0x01ce, 0x0201, 0x0203, 0, 0, 0, 0x1ea1, 0, 0x1e01, 0, 0, 0x0105, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e03, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e05, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e07, 0, 0, 0, 0 },
{ 0, 0x0107, 0x0109, 0, 0, 0, 0x010b, 0, 0, 0, 0, 0x010d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00e7, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e0b, 0, 0, 0, 0, 0x010f, 0, 0, 0, 0, 0, 0x1e0d, 0, 0, 0, 0x1e11, 0, 0x1e13, 0, 0, 0x1e0f, 0, 0, 0, 0 },
{ 0x00e8, 0x00e9, 0x00ea, 0x1ebd, 0x0113, 0x0115, 0x0117, 0x00eb, 0x1ebb, 0, 0, 0x011b, 0x0205, 0x0207, 0, 0, 0, 0x1eb9, 0, 0, 0, 0x0229, 0x0119, 0x1e19, 0, 0x1e1b, 0, 0, 0, 0, 0 },
{ 0, 0x01f5, 0x011d, 0, 0x1e21, 0x011f, 0x0121, 0, 0, 0, 0, 0x01e7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0123, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0x0125, 0, 0, 0, 0x1e23, 0x1e27, 0, 0, 0, 0x021f, 0, 0, 0, 0, 0, 0x1e25, 0, 0, 0, 0x1e29, 0, 0, 0x1e2b, 0, 0x1e96, 0, 0, 0, 0 },
{ 0x00ec, 0x00ed, 0x00ee, 0x0129, 0x012b, 0x012d, 0, 0x00ef, 0x1ec9, 0, 0, 0x01d0, 0x0209, 0x020b, 0, 0, 0, 0x1ecb, 0, 0, 0, 0, 0x012f, 0, 0, 0x1e2d, 0, 0, 0, 0, 0 },
{ 0, 0, 0x0135, 0, 0, 0, 0, 0, 0, 0, 0, 0x01f0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x1e31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01e9, 0, 0, 0, 0, 0, 0x1e33, 0, 0, 0, 0x0137, 0, 0, 0, 0, 0x1e35, 0, 0, 0, 0 },
{ 0, 0x013a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x013e, 0, 0, 0, 0, 0, 0x1e37, 0, 0, 0, 0x013c, 0, 0x1e3d, 0, 0, 0x1e3b, 0, 0, 0, 0 },
{ 0, 0x1e3f, 0, 0, 0, 0, 0x1e41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x01f9, 0x0144, 0, 0x00f1, 0, 0, 0x1e45, 0, 0, 0, 0, 0x0148, 0, 0, 0, 0, 0, 0x1e47, 0, 0, 0, 0x0146, 0, 0x1e4b, 0, 0, 0x1e49, 0, 0, 0, 0 },
{ 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x014d, 0x014f, 0x022f, 0x00f6, 0x1ecf, 0, 0x0151, 0x01d2, 0x020d, 0x020f, 0, 0, 0x01a1, 0x1ecd, 0, 0, 0, 0, 0x01eb, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x1e55, 0, 0, 0, 0, 0x1e57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x0155, 0, 0, 0, 0, 0x1e59, 0, 0, 0, 0, 0x0159, 0x0211, 0x0213, 0, 0, 0, 0x1e5b, 0, 0, 0, 0x0157, 0, 0, 0, 0, 0x1e5f, 0, 0, 0, 0 },
{ 0, 0x015b, 0x015d, 0, 0, 0, 0x1e61, 0, 0, 0, 0, 0x0161, 0, 0, 0, 0, 0, 0x1e63, 0, 0, 0x0219, 0x015f, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e6b, 0x1e97, 0, 0, 0, 0x0165, 0, 0, 0, 0, 0, 0x1e6d, 0, 0, 0x021b, 0x0163, 0, 0x1e71, 0, 0, 0x1e6f, 0, 0, 0, 0 },
{ 0x00f9, 0x00fa, 0x00fb, 0x0169, 0x016b, 0x016d, 0, 0x00fc, 0x1ee7, 0x016f, 0x0171, 0x01d4, 0x0215, 0x0217, 0, 0, 0x01b0, 0x1ee5, 0x1e73, 0, 0, 0, 0x0173, 0x1e77, 0, 0x1e75, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0x1e7d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e7f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1e81, 0x1e83, 0x0175, 0, 0, 0, 0x1e87, 0x1e85, 0, 0x1e98, 0, 0, 0, 0, 0, 0, 0, 0x1e89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0x1e8b, 0x1e8d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ef3, 0x00fd, 0x0177, 0x1ef9, 0x0233, 0, 0x1e8f, 0x00ff, 0x1ef7, 0x1e99, 0, 0, 0, 0, 0, 0, 0, 0x1ef5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x017a, 0x1e91, 0, 0, 0, 0x017c, 0, 0, 0, 0, 0x017e, 0, 0, 0, 0, 0, 0x1e93, 0, 0, 0, 0, 0, 0, 0, 0, 0x1e95, 0, 0, 0, 0 },
{ 0x1fed, 0x0385, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fc1, 0, 0, 0 },
{ 0x1ea6, 0x1ea4, 0, 0x1eaa, 0, 0, 0, 0, 0x1ea8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x01fc, 0, 0, 0x01e2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ec0, 0x1ebe, 0, 0x1ec4, 0, 0, 0, 0, 0x1ec2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ed2, 0x1ed0, 0, 0x1ed6, 0, 0, 0, 0, 0x1ed4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x1e4c, 0, 0, 0x022c, 0, 0, 0x1e4e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x01db, 0x01d7, 0, 0, 0x01d5, 0, 0, 0, 0, 0, 0, 0x01d9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ea7, 0x1ea5, 0, 0x1eab, 0, 0, 0, 0, 0x1ea9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x01fd, 0, 0, 0x01e3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ec1, 0x1ebf, 0, 0x1ec5, 0, 0, 0, 0, 0x1ec3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ed3, 0x1ed1, 0, 0x1ed7, 0, 0, 0, 0, 0x1ed5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0x1e4d, 0, 0, 0x022d, 0, 0, 0x1e4f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x01dc, 0x01d8, 0, 0, 0x01d6, 0, 0, 0, 0, 0, 0, 0x01da, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1eb0, 0x1eae, 0, 0x1eb4, 0, 0, 0, 0, 0x1eb2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1eb1, 0x1eaf, 0, 0x1eb5, 0, 0, 0, 0, 0x1eb3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1e14, 0x1e16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1e15, 0x1e17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1e50, 0x1e52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1e51, 0x1e53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1edc, 0x1eda, 0, 0x1ee0, 0, 0, 0, 0, 0x1ede, 0, 0, 0, 0, 0, 0, 0, 0, 0x1ee2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1edd, 0x1edb, 0, 0x1ee1, 0, 0, 0, 0, 0x1edf, 0, 0, 0, 0, 0, 0, 0, 0, 0x1ee3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1eea, 0x1ee8, 0, 0x1eee, 0, 0, 0, 0, 0x1eec, 0, 0, 0, 0, 0, 0, 0, 0, 0x1ef0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1eeb, 0x1ee9, 0, 0x1eef, 0, 0, 0, 0, 0x1eed, 0, 0, 0, 0, 0, 0, 0, 0, 0x1ef1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1fba, 0x0386, 0, 0, 0x1fb9, 0x1fb8, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f08, 0x1f09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fbc, 0, 0 },
{ 0x1fc8, 0x0388, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f18, 0x1f19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1fca, 0x0389, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f28, 0x1f29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fcc, 0, 0 },
{ 0x1fda, 0x038a, 0, 0, 0x1fd9, 0x1fd8, 0, 0x03aa, 0, 0, 0, 0, 0, 0, 0x1f38, 0x1f39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ff8, 0x038c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f48, 0x1f49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1fea, 0x038e, 0, 0, 0x1fe9, 0x1fe8, 0, 0x03ab, 0, 0, 0, 0, 0, 0, 0, 0x1f59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1ffa, 0x038f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f68, 0x1f69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1ffc, 0, 0 },
{ 0x1f70, 0x03ac, 0, 0, 0x1fb1, 0x1fb0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f00, 0x1f01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fb6, 0x1fb3, 0, 0 },
{ 0x1f72, 0x03ad, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f10, 0x1f11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f74, 0x03ae, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f20, 0x1f21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fc6, 0x1fc3, 0, 0 },
{ 0x1f76, 0x03af, 0, 0, 0x1fd1, 0x1fd0, 0, 0x03ca, 0, 0, 0, 0, 0, 0, 0x1f30, 0x1f31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fd6, 0, 0, 0 },
{ 0x1f78, 0x03cc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f40, 0x1f41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fe4, 0x1fe5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f7a, 0x03cd, 0, 0, 0x1fe1, 0x1fe0, 0, 0x03cb, 0, 0, 0, 0, 0, 0, 0x1f50, 0x1f51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fe6, 0, 0, 0 },
{ 0x1f7c, 0x03ce, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f60, 0x1f61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1ff6, 0x1ff3, 0, 0 },
{ 0x1fd2, 0x0390, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fd7, 0, 0, 0 },
{ 0x1fe2, 0x03b0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fe7, 0, 0, 0 },
{ 0, 0x03d3, 0, 0, 0, 0, 0, 0x03d4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0x04d0, 0, 0x04d2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x0400, 0, 0, 0, 0, 0x04d6, 0, 0x0401, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0x04c1, 0, 0x04dc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x040d, 0, 0, 0, 0x04e2, 0x0419, 0, 0x04e4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0x04ee, 0x040e, 0, 0x04f0, 0, 0, 0x04f2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0x04d1, 0, 0x04d3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x0450, 0, 0, 0, 0, 0x04d7, 0, 0x0451, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0x04c2, 0, 0x04dd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x045d, 0, 0, 0, 0x04e3, 0x0439, 0, 0x04e5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0x04ef, 0x045e, 0, 0x04f1, 0, 0, 0x04f3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0x1eac, 0, 0, 0x1eb6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0x1ead, 0, 0, 0x1eb7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f02, 0x1f04, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f06, 0x1f80, 0, 0 },
{ 0x1f03, 0x1f05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f07, 0x1f81, 0, 0 },
{ 0x1f0a, 0x1f0c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f0e, 0x1f88, 0, 0 },
{ 0x1f0b, 0x1f0d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f0f, 0x1f89, 0, 0 },
{ 0x1f12, 0x1f14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f13, 0x1f15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f1a, 0x1f1c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f1b, 0x1f1d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f22, 0x1f24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f26, 0x1f90, 0, 0 },
{ 0x1f23, 0x1f25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f27, 0x1f91, 0, 0 },
{ 0x1f2a, 0x1f2c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f2e, 0x1f98, 0, 0 },
{ 0x1f2b, 0x1f2d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f2f, 0x1f99, 0, 0 },
{ 0x1f32, 0x1f34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f36, 0, 0, 0 },
{ 0x1f33, 0x1f35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f37, 0, 0, 0 },
{ 0x1f3a, 0x1f3c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f3e, 0, 0, 0 },
{ 0x1f3b, 0x1f3d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f3f, 0, 0, 0 },
{ 0x1f42, 0x1f44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f43, 0x1f45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f4a, 0x1f4c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f4b, 0x1f4d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0x1f52, 0x1f54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f56, 0, 0, 0 },
{ 0x1f53, 0x1f55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f57, 0, 0, 0 },
{ 0x1f5b, 0x1f5d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f5f, 0, 0, 0 },
{ 0x1f62, 0x1f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f66, 0x1fa0, 0, 0 },
{ 0x1f63, 0x1f65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f67, 0x1fa1, 0, 0 },
{ 0x1f6a, 0x1f6c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f6e, 0x1fa8, 0, 0 },
{ 0x1f6b, 0x1f6d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1f6f, 0x1fa9, 0, 0 },
{ 0x1fcd, 0x1fce, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fcf, 0, 0, 0 },
{ 0x1fdd, 0x1fde, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1fdf, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x3070, 0x3071 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x3073, 0x3074 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x3076, 0x3077 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x3079, 0x307a },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x307c, 0x307d },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x30d0, 0x30d1 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x30d3, 0x30d4 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x30d6, 0x30d7 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x30d9, 0x30da },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x30dc, 0x30dd }
};

View File

@ -21,6 +21,7 @@
#include "glib.h"
#include "gunidecomp.h"
#include "gunicomp.h"
#include <config.h>
@ -76,41 +77,40 @@ g_unicode_canonical_ordering (gunichar *string,
}
}
gunichar *
g_unicode_canonical_decomposition (gunichar ch,
size_t *result_len)
guchar *
find_decomposition (gunichar ch,
gboolean compat)
{
gunichar *r = NULL;
if (ch <= 0xffff)
int start = 0;
int end = G_N_ELEMENTS (decomp_table);
if (ch >= decomp_table[start].ch &&
ch <= decomp_table[end - 1].ch)
{
int start = 0;
int end = G_N_ELEMENTS (decomp_table);
while (start != end)
while (TRUE)
{
int half = (start + end) / 2;
if (ch == decomp_table[half].ch)
{
/* Found it. */
int i, len;
/* We store as a double-nul terminated string. */
for (len = 0; (decomp_table[half].expansion[len]
|| decomp_table[half].expansion[len + 1]);
len += 2)
;
int offset;
/* We've counted twice as many bytes as there are
characters. */
*result_len = len / 2;
r = malloc (len / 2 * sizeof (gunichar));
for (i = 0; i < len; i += 2)
if (compat)
{
r[i / 2] = (decomp_table[half].expansion[i] << 8
| decomp_table[half].expansion[i + 1]);
offset = decomp_table[half].compat_offset;
if (offset == 0xff)
offset = decomp_table[half].canon_offset;
}
break;
else
{
offset = decomp_table[half].canon_offset;
if (offset == 0xff)
return NULL;
}
return decomp_table[half].expansion + offset;
}
else if (half == start)
break;
else if (ch > decomp_table[half].ch)
start = half;
else
@ -118,7 +118,36 @@ g_unicode_canonical_decomposition (gunichar ch,
}
}
if (r == NULL)
return NULL;
}
gunichar *
g_unicode_canonical_decomposition (gunichar ch,
size_t *result_len)
{
guchar *decomp = find_decomposition (ch, FALSE);
gunichar *r;
if (decomp)
{
/* Found it. */
int i, len;
/* We store as a double-nul terminated string. */
for (len = 0; (decomp[len] || decomp[len + 1]);
len += 2)
;
/* We've counted twice as many bytes as there are
characters. */
*result_len = len / 2;
r = malloc (len / 2 * sizeof (gunichar));
for (i = 0; i < len; i += 2)
{
r[i / 2] = (decomp[i] << 8 | decomp[i + 1]);
}
}
else
{
/* Not in our table. */
r = malloc (sizeof (gunichar));
@ -131,3 +160,231 @@ g_unicode_canonical_decomposition (gunichar ch,
this, but we rely on it here. */
return r;
}
#define CI(Page, Char) \
((((GPOINTER_TO_INT(compose_table[Page])) & 0xff) \
== GPOINTER_TO_INT(compose_table[Page])) \
? GPOINTER_TO_INT(compose_table[Page]) \
: (compose_table[Page][Char]))
#define COMPOSE_INDEX(Char) \
(((Char) > (G_UNICODE_LAST_CHAR)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
gboolean
combine (gunichar a,
gunichar b,
gunichar *result)
{
gushort index_a, index_b;
index_a = COMPOSE_INDEX(a);
if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
{
if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
{
*result = compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
return TRUE;
}
else
return FALSE;
}
index_b = COMPOSE_INDEX(b);
if (index_b >= COMPOSE_SECOND_SINGLE_START)
{
if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
{
*result = compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
return TRUE;
}
else
return FALSE;
}
if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START &&
index_b >= COMPOSE_SECOND_START && index_a < COMPOSE_SECOND_SINGLE_START)
{
gunichar res = compose_array[index_a - COMPOSE_FIRST_START][index_b - COMPOSE_SECOND_START];
if (res)
{
*result = res;
return TRUE;
}
}
return FALSE;
}
gunichar *
_g_utf8_normalize_wc (const gchar *str,
GNormalizeMode mode)
{
gsize n_wc;
gunichar *wc_buffer;
const char *p;
gsize last_start;
gboolean do_compat = (mode == G_NORMALIZE_NFKC ||
mode == G_NORMALIZE_NFKD);
gboolean do_compose = (mode == G_NORMALIZE_NFC ||
mode == G_NORMALIZE_NFKC);
n_wc = 0;
p = str;
while (*p)
{
gunichar wc = g_utf8_get_char (p);
guchar *decomp = find_decomposition (wc, do_compat);
if (decomp)
{
int len;
/* We store as a double-nul terminated string. */
for (len = 0; (decomp[len] || decomp[len + 1]);
len += 2)
;
n_wc += len / 2;
}
else
n_wc++;
p = g_utf8_next_char (p);
}
wc_buffer = g_new (gunichar, n_wc + 1);
last_start = 0;
n_wc = 0;
p = str;
while (*p)
{
gunichar wc = g_utf8_get_char (p);
guchar *decomp;
int cc;
size_t old_n_wc = n_wc;
decomp = find_decomposition (wc, do_compat);
if (decomp)
{
int len;
/* We store as a double-nul terminated string. */
for (len = 0; (decomp[len] || decomp[len + 1]);
len += 2)
wc_buffer[n_wc++] = (decomp[len] << 8 | decomp[len + 1]);
}
else
wc_buffer[n_wc++] = wc;
if (n_wc > 0)
{
cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
if (cc == 0)
{
g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);
last_start = old_n_wc;
}
}
p = g_utf8_next_char (p);
}
if (n_wc > 0)
{
g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);
last_start = n_wc;
}
wc_buffer[n_wc] = 0;
/* All decomposed and reordered */
if (do_compose && n_wc > 0)
{
gsize i, j;
int last_cc = 0;
last_start = 0;
for (i = 0; i < n_wc; i++)
{
int cc = COMBINING_CLASS (wc_buffer[i]);
if (i > 0 &&
(last_cc == 0 || last_cc != cc) &&
combine (wc_buffer[last_start], wc_buffer[i],
&wc_buffer[last_start]))
{
for (j = i + 1; j < n_wc; j++)
wc_buffer[j-1] = wc_buffer[j];
n_wc--;
i--;
if (i == last_start)
last_cc = 0;
else
last_cc = COMBINING_CLASS (wc_buffer[i-1]);
continue;
}
if (cc == 0)
last_start = i;
last_cc = cc;
}
}
wc_buffer[n_wc] = 0;
return wc_buffer;
}
/**
* g_utf8_normalize:
* @str: a UTF-8 encoded string.
* @mode: the type of normalization to perform.
*
* Convert a string into canonical form, standardizing
* such issues as whether a character with an accent
* is represented as a base character and combining
* accent or as a single precomposed characters. You
* should generally call g_utf8_normalize before
* comparing two Unicode strings.
*
* The normalization mode %G_NORMALIZE_DEFAULT only
* standardizes differences that do not affect the
* text content, such as the above-mentioned accent
* representation. %G_NORMALIZE_ALL also standardizes
* the "compatibility" characters in Unicode, such
* as SUPERSCRIPT THREE to the standard forms
* (in this case DIGIT THREE). Formatting information
* may be lost but for most text operations such
* characters should be considered the same.
* For example, g_utf8_collate() normalizes
* with %G_NORMALIZE_ALL as its first step.
*
* %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
* are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
* but returned a result with composed forms rather
* than a maximally decomposed form. This is often
* useful if you intend to convert the string to
* a legacy encoding or pass it to a system with
* less capable Unicode handling.
*
* Return value: the string in normalized form
**/
gchar *
g_utf8_normalize (const gchar *str,
GNormalizeMode mode)
{
gunichar *result_wc = _g_utf8_normalize_wc (str, mode);
gchar *result;
result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
g_free (result_wc);
return result;
}

File diff suppressed because it is too large Load Diff

View File

@ -25,8 +25,8 @@
#include <config.h>
#include <stddef.h>
#define asize(x) ((sizeof (x)) / sizeof (x[0]))
#include <string.h>
#include <locale.h>
#define ATTTABLE(Page, Char) \
((attr_table[Page] == 0) ? 0 : (attr_table[Page][Char]))
@ -40,16 +40,21 @@
#define TYPE(Char) (((Char) > (G_UNICODE_LAST_CHAR)) ? G_UNICODE_UNASSIGNED : TTYPE ((Char) >> 8, (Char) & 0xff))
#define ISDIGIT(Type) ((Type) == G_UNICODE_DECIMAL_NUMBER \
|| (Type) == G_UNICODE_LETTER_NUMBER \
#define ISDIGIT(Type) ((Type) == G_UNICODE_DECIMAL_NUMBER \
|| (Type) == G_UNICODE_LETTER_NUMBER \
|| (Type) == G_UNICODE_OTHER_NUMBER)
#define ISALPHA(Type) ((Type) == G_UNICODE_LOWERCASE_LETTER \
|| (Type) == G_UNICODE_UPPERCASE_LETTER \
|| (Type) == G_UNICODE_TITLECASE_LETTER \
|| (Type) == G_UNICODE_MODIFIER_LETTER \
#define ISALPHA(Type) ((Type) == G_UNICODE_LOWERCASE_LETTER \
|| (Type) == G_UNICODE_UPPERCASE_LETTER \
|| (Type) == G_UNICODE_TITLECASE_LETTER \
|| (Type) == G_UNICODE_MODIFIER_LETTER \
|| (Type) == G_UNICODE_OTHER_LETTER)
#define ISMARK(Type) ((Type) == G_UNICODE_NON_SPACING_MARK || \
(Type) == G_UNICODE_COMBINING_MARK || \
(Type) == G_UNICODE_ENCLOSING_MARK)
/**
* g_unichar_isalnum:
* @c: a Unicode character
@ -270,7 +275,7 @@ gboolean
g_unichar_istitle (gunichar c)
{
unsigned int i;
for (i = 0; i < asize (title_table); ++i)
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
if (title_table[i][0] == c)
return 1;
return 0;
@ -350,11 +355,20 @@ g_unichar_toupper (gunichar c)
{
int t = TYPE (c);
if (t == G_UNICODE_LOWERCASE_LETTER)
return ATTTABLE (c >> 8, c & 0xff);
{
gunichar val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
{
guchar *p = special_case_table[val - 0xd800];
return p[0] * 256 + p[1];
}
else
return val;
}
else if (t == G_UNICODE_TITLECASE_LETTER)
{
unsigned int i;
for (i = 0; i < asize (title_table); ++i)
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
{
if (title_table[i][0] == c)
return title_table[i][1];
@ -378,11 +392,20 @@ g_unichar_tolower (gunichar c)
{
int t = TYPE (c);
if (t == G_UNICODE_UPPERCASE_LETTER)
return ATTTABLE (c >> 8, c & 0xff);
{
gunichar val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
{
guchar *p = special_case_table[val - 0xd800];
return p[0] * 256 + p[1];
}
else
return val;
}
else if (t == G_UNICODE_TITLECASE_LETTER)
{
unsigned int i;
for (i = 0; i < asize (title_table); ++i)
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
{
if (title_table[i][0] == c)
return title_table[i][2];
@ -405,7 +428,7 @@ gunichar
g_unichar_totitle (gunichar c)
{
unsigned int i;
for (i = 0; i < asize (title_table); ++i)
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
{
if (title_table[i][0] == c || title_table[i][1] == c
|| title_table[i][2] == c)
@ -469,3 +492,416 @@ g_unichar_type (gunichar c)
{
return TYPE (c);
}
/*
* Case mapping functions
*/
typedef enum {
LOCALE_NORMAL,
LOCALE_TURKIC,
LOCALE_LITHUANIAN
} LocaleType;
static LocaleType
get_locale_type (void)
{
const char *locale = setlocale (LC_CTYPE, NULL);
switch (locale[0])
{
case 'a':
if (locale[1] == 'z')
return LOCALE_TURKIC;
break;
case 'l':
if (locale[1] == 't')
return LOCALE_LITHUANIAN;
break;
case 't':
if (locale[1] == 'r')
return LOCALE_TURKIC;
break;
}
return LOCALE_NORMAL;
}
static int
output_marks (const char **p_inout,
char *out_buffer,
int len,
gboolean remove_dot)
{
const char *p = *p_inout;
while (*p)
{
gunichar c = g_utf8_get_char (p);
int t = TYPE(c);
if (ISMARK(t))
{
if (!remove_dot || c != 0x307 /* COMBINING DOT ABOVE */)
len += g_unichar_to_utf8 (c, out_buffer ? out_buffer + len : NULL);
p = g_utf8_next_char (p);
}
else
break;
}
*p_inout = p;
return len;
}
static gsize
output_special_case (gchar *out_buffer,
gsize len,
int index,
int type,
int which)
{
guchar *p = special_case_table[index];
if (type != G_UNICODE_TITLECASE_LETTER)
p += 2; /* +2 to skip over "best single match" */
if (which == 1)
{
while (p[0] && p[1])
p += 2;
p += 2;
}
while (TRUE)
{
gunichar ch = p[0] * 256 + p[1];
if (!ch)
break;
len += g_unichar_to_utf8 (ch, out_buffer ? out_buffer + len : NULL);
p += 2;
}
return len;
}
static gsize
real_toupper (const gchar *str,
gchar *out_buffer,
LocaleType locale_type)
{
const gchar *p = str;
const char *last = NULL;
gsize len = 0;
gboolean last_was_i = FALSE;
while (*p)
{
gunichar c = g_utf8_get_char (p);
int t = TYPE (c);
gunichar val;
last = p;
p = g_utf8_next_char (p);
if (locale_type == LOCALE_LITHUANIAN)
{
if (c == 'i')
last_was_i = TRUE;
else
{
if (last_was_i)
{
/* Nasty, need to remove any dot above. Though
* I think only E WITH DOT ABOVE occurs in practice
* which could simplify this considerably.
*/
gsize decomp_len, i;
gunichar *decomp;
decomp = g_unicode_canonical_decomposition (c, &decomp_len);
for (i=0; i < decomp_len; i++)
{
if (decomp[i] != 0x307 /* COMBINING DOT ABOVE */)
len += g_unichar_to_utf8 (g_unichar_toupper (decomp[i]), out_buffer ? out_buffer + len : NULL);
}
g_free (decomp);
len = output_marks (&p, out_buffer, len, TRUE);
continue;
}
if (!ISMARK(t))
last_was_i = FALSE;
}
}
if (locale_type == LOCALE_TURKIC && c == 'i')
{
/* i => LATIN CAPITAL LETTER I WITH DOT ABOVE */
len += g_unichar_to_utf8 (0x130, out_buffer ? out_buffer + len : NULL);
}
else if (c == 0x0345) /* COMBINING GREEK YPOGEGRAMMENI */
{
/* Nasty, need to move it after other combining marks .. this would go away if
* we normalized first.
*/
len = output_marks (&p, out_buffer, len, FALSE);
/* And output as GREEK CAPITAL LETTER IOTA */
len += g_unichar_to_utf8 (0x399, out_buffer ? out_buffer + len : NULL);
}
else if (t == G_UNICODE_LOWERCASE_LETTER || t == G_UNICODE_TITLECASE_LETTER)
{
val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
{
len += output_special_case (out_buffer, len, val - 0xd800, t,
t == G_UNICODE_LOWERCASE_LETTER ? 0 : 1);
}
else
{
if (t == G_UNICODE_TITLECASE_LETTER)
{
unsigned int i;
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
{
if (title_table[i][0] == c)
val = title_table[i][1];
}
}
len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
}
}
else
{
gsize char_len = g_utf8_skip[*(guchar *)last];
if (out_buffer)
memcpy (out_buffer + len, last, char_len);
len += char_len;
}
}
return len;
}
/**
* g_ut8f_strup:
* @string: a UTF-8 encoded string
*
* Converts all Unicode characters in the string that have a case
* to uppercase. The exact manner that this is done depends
* on the current locale, and may result in the number of
* characters in the string increasing. (For instance, the
* German ess-zet will be changed to SS.)
*
* Return value: a newly allocated string, with all characters
* converted to uppercase.
**/
gchar *
g_utf8_strup (const gchar *str)
{
gsize len;
LocaleType locale_type;
gchar *result;
g_return_val_if_fail (str != NULL, NULL);
locale_type = get_locale_type ();
/*
* We use a two pass approach to keep memory management simple
*/
len = real_toupper (str, NULL, locale_type);
result = g_malloc (len + 1);
real_toupper (str, result, locale_type);
result[len] = '\0';
return result;
}
static gsize
real_tolower (const gchar *str,
gchar *out_buffer,
LocaleType locale_type)
{
const gchar *p = str;
const char *last = NULL;
gsize len = 0;
while (*p)
{
gunichar c = g_utf8_get_char (p);
int t = TYPE (c);
gunichar val;
last = p;
p = g_utf8_next_char (p);
if (locale_type == LOCALE_TURKIC && c == 'I')
{
/* I => LATIN SMALL LETTER DOTLESS I */
len += g_unichar_to_utf8 (0x131, out_buffer ? out_buffer + len : NULL);
}
else if (c == 0x03A3) /* GREEK CAPITAL LETTER SIGMA */
{
gunichar next_c = g_utf8_get_char (p);
int next_t = TYPE(next_c);
/* SIGMA mapps differently depending on whether it is
* final or not. The following simplified test would
* fail in the case of combining marks following the
* sigma, but I don't think that occurs in real text.
* The test here matches that in ICU.
*/
if (ISALPHA(next_t)) /* Lu,Ll,Lt,Lm,Lo */
val = 0x3c3; /* GREEK SMALL SIGMA */
else
val = 0x3c2; /* GREEK SMALL FINAL SIGMA */
len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
}
else if (t == G_UNICODE_UPPERCASE_LETTER || t == G_UNICODE_TITLECASE_LETTER)
{
val = ATTTABLE (c >> 8, c & 0xff);
if (val >= 0xd800 && val < 0xdc00)
{
len += output_special_case (out_buffer, len, val - 0xd800, t, 0);
}
else
{
if (t == G_UNICODE_TITLECASE_LETTER)
{
unsigned int i;
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
{
if (title_table[i][0] == c)
val = title_table[i][2];
}
}
len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
}
}
else
{
gsize char_len = g_utf8_skip[*(guchar *)last];
if (out_buffer)
memcpy (out_buffer + len, last, char_len);
len += char_len;
}
}
return len;
}
/**
* g_ut8f_strdown:
* @string: a UTF-8 encoded string
*
* Converts all Unicode characters in the string that have a case
* to lowercase. The exact manner that this is done depends
* on the current locale, and may result in the number of
* characters in the string changing.
*
* Return value: a newly allocated string, with all characters
* converted to lowercase.
**/
gchar *
g_utf8_strdown (const gchar *str)
{
gsize len;
LocaleType locale_type;
gchar *result;
g_return_val_if_fail (str != NULL, NULL);
locale_type = get_locale_type ();
/*
* We use a two pass approach to keep memory management simple
*/
len = real_tolower (str, NULL, locale_type);
result = g_malloc (len + 1);
real_tolower (str, result, locale_type);
result[len] = '\0';
return result;
}
/**
* g_utf8_casefold:
* @str: a UTF-8 encoded string
*
* Converts a string into a form that is independent of case. The
* result will not correspond to any particular case, but can be
* compared for equality or ordered with the results of calling
* g_utf8_casefold() on other strings.
*
* Note that calling g_utf8_casefold() followed by g_utf8_collate() is
* only an approximation to the correct linguistic case insensitive
* ordering, though it is a fairly good one. Getting this exactly
* right would require a more sophisticated collation function that
* takes case sensitivity into account. GLib does not currently
* provide such a function.
*
* Return value: a newly allocated string, that is a
* case independent form of @str.
**/
gchar *
g_utf8_casefold (const gchar *str)
{
GString *result = g_string_new (NULL);
const char *p;
gchar buf[6];
int len;
p = str;
while (*p)
{
gunichar ch = g_utf8_get_char (p);
int start = 0;
int end = G_N_ELEMENTS (casefold_table);
if (ch >= casefold_table[start].ch &&
ch <= casefold_table[end - 1].ch)
{
while (TRUE)
{
int half = (start + end) / 2;
if (ch == casefold_table[half].ch)
{
g_string_append (result, casefold_table[half].data);
goto next;
}
else if (half == start)
break;
else if (ch > casefold_table[half].ch)
start = half;
else
end = half;
}
}
ch = g_unichar_tolower (ch);
len = g_unichar_to_utf8 (ch, buf);
g_string_append_len (result, buf, len);
next:
p = g_utf8_next_char (p);
}
return g_string_free (result, FALSE);
}

View File

@ -44,3 +44,6 @@ testgdateparser
testglib
timeloop
timeloop-basic
unicode-caseconv
unicode-collate
unicode-normalize

View File

@ -34,39 +34,42 @@ endif
if ENABLE_TIMELOOP
timeloop = timeloop
endif
noinst_PROGRAMS = testglib testgdate testgdateparser $(timeloop)
noinst_PROGRAMS = testglib testgdate testgdateparser unicode-normalize unicode-collate $(timeloop)
testglib_LDADD = $(libglib)
testgdate_LDADD = $(libglib)
testgdateparser_LDADD = $(libglib)
unicode_normalize_LDADD = $(libglib)
unicode_collate_LDADD = $(libglib)
if ENABLE_TIMELOOP
timeloop_LDADD = $(libglib)
endif
test_programs = \
array-test \
$(CXX_TEST) \
date-test \
dirname-test \
gio-test \
hash-test \
iochannel-test \
list-test \
mainloop-test \
module-test \
node-test \
queue-test \
qsort-test \
rand-test \
relation-test \
shell-test \
slist-test \
spawn-test \
strfunc-test \
string-test \
thread-test \
threadpool-test \
tree-test \
type-test \
test_programs = \
array-test \
$(CXX_TEST) \
date-test \
dirname-test \
gio-test \
hash-test \
iochannel-test \
list-test \
mainloop-test \
module-test \
node-test \
queue-test \
qsort-test \
rand-test \
relation-test \
shell-test \
slist-test \
spawn-test \
strfunc-test \
string-test \
thread-test \
threadpool-test \
tree-test \
type-test \
unicode-caseconv \
unicode-encoding
test_scripts = run-markup-tests.sh
@ -108,6 +111,7 @@ threadpool_test_LDADD = $(thread_LDADD)
tree_test_LDADD = $(progs_LDADD)
type_test_LDADD = $(progs_LDADD)
unicode_encoding_LDADD = $(progs_LDADD)
unicode_caseconv_LDADD = $(progs_LDADD)
lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la

796
tests/casefold.txt Normal file
View File

@ -0,0 +1,796 @@
# Test cases generated from Unicode 3.1 data
# by gen-casefold-test.pl. Do not edit.
#
# Some special hand crafted tests
#
AaBbCc@@ aabbcc@@
#
# Now the automatic tests
#
A a
B b
C c
D d
E e
F f
G g
H h
I i
J j
K k
L l
M m
N n
O o
P p
Q q
R r
S s
T t
U u
V v
W w
X x
Y y
Z z
µ μ
À à
Á á
 â
à ã
Ä ä
Å å
Æ æ
Ç ç
È è
É é
Ê ê
Ë ë
Ì ì
Í í
Î î
Ï ï
Ð ð
Ñ ñ
Ò ò
Ó ó
Ô ô
Õ õ
Ö ö
Ø ø
Ù ù
Ú ú
Û û
Ü ü
Ý ý
Þ þ
ß ss
Ā ā
Ă ă
Ą ą
Ć ć
Ĉ ĉ
Ċ ċ
Č č
Ď ď
Đ đ
Ē ē
Ĕ ĕ
Ė ė
Ę ę
Ě ě
Ĝ ĝ
Ğ ğ
Ġ ġ
Ģ ģ
Ĥ ĥ
Ħ ħ
Ĩ ĩ
Ī ī
Ĭ ĭ
Į į
İ i
ı i
IJ ij
Ĵ ĵ
Ķ ķ
Ĺ ĺ
Ļ ļ
Ľ ľ
Ŀ ŀ
Ł ł
Ń ń
Ņ ņ
Ň ň
ʼn ʼn
Ŋ ŋ
Ō ō
Ŏ ŏ
Ő ő
Œ œ
Ŕ ŕ
Ŗ ŗ
Ř ř
Ś ś
Ŝ ŝ
Ş ş
Š š
Ţ ţ
Ť ť
Ŧ ŧ
Ũ ũ
Ū ū
Ŭ ŭ
Ů ů
Ű ű
Ų ų
Ŵ ŵ
Ŷ ŷ
Ÿ ÿ
Ź ź
Ż ż
Ž ž
ſ s
Ɓ ɓ
Ƃ ƃ
Ƅ ƅ
Ɔ ɔ
Ƈ ƈ
Ɖ ɖ
Ɗ ɗ
Ƌ ƌ
Ǝ ǝ
Ə ə
Ɛ ɛ
Ƒ ƒ
Ɠ ɠ
Ɣ ɣ
Ɩ ɩ
Ɨ ɨ
Ƙ ƙ
Ɯ ɯ
Ɲ ɲ
Ɵ ɵ
Ơ ơ
Ƣ ƣ
Ƥ ƥ
Ʀ ʀ
Ƨ ƨ
Ʃ ʃ
Ƭ ƭ
Ʈ ʈ
Ư ư
Ʊ ʊ
Ʋ ʋ
Ƴ ƴ
Ƶ ƶ
Ʒ ʒ
Ƹ ƹ
Ƽ ƽ
DŽ dž
Dž dž
LJ lj
Lj lj
NJ nj
Nj nj
Ǎ ǎ
Ǐ ǐ
Ǒ ǒ
Ǔ ǔ
Ǖ ǖ
Ǘ ǘ
Ǚ ǚ
Ǜ ǜ
Ǟ ǟ
Ǡ ǡ
Ǣ ǣ
Ǥ ǥ
Ǧ ǧ
Ǩ ǩ
Ǫ ǫ
Ǭ ǭ
Ǯ ǯ
ǰ ǰ
DZ dz
Dz dz
Ǵ ǵ
Ƕ ƕ
Ƿ ƿ
Ǹ ǹ
Ǻ ǻ
Ǽ ǽ
Ǿ ǿ
Ȁ ȁ
Ȃ ȃ
Ȅ ȅ
Ȇ ȇ
Ȉ ȉ
Ȋ ȋ
Ȍ ȍ
Ȏ ȏ
Ȑ ȑ
Ȓ ȓ
Ȕ ȕ
Ȗ ȗ
Ș ș
Ț ț
Ȝ ȝ
Ȟ ȟ
Ȣ ȣ
Ȥ ȥ
Ȧ ȧ
Ȩ ȩ
Ȫ ȫ
Ȭ ȭ
Ȯ ȯ
Ȱ ȱ
Ȳ ȳ
ͅ ι
Ά ά
Έ έ
Ή ή
Ί ί
Ό ό
Ύ ύ
Ώ ώ
ΐ ΐ
Α α
Β β
Γ γ
Δ δ
Ε ε
Ζ ζ
Η η
Θ θ
Ι ι
Κ κ
Λ λ
Μ μ
Ν ν
Ξ ξ
Ο ο
Π π
Ρ ρ
Σ σ
Τ τ
Υ υ
Φ φ
Χ χ
Ψ ψ
Ω ω
Ϊ ϊ
Ϋ ϋ
ΰ ΰ
ς σ
ϐ β
ϑ θ
ϕ φ
ϖ π
Ϛ ϛ
Ϝ ϝ
Ϟ ϟ
Ϡ ϡ
Ϣ ϣ
Ϥ ϥ
Ϧ ϧ
Ϩ ϩ
Ϫ ϫ
Ϭ ϭ
Ϯ ϯ
ϰ κ
ϱ ρ
ϲ σ
ϴ θ
ϵ ε
Ѐ ѐ
Ё ё
Ђ ђ
Ѓ ѓ
Є є
Ѕ ѕ
І і
Ї ї
Ј ј
Љ љ
Њ њ
Ћ ћ
Ќ ќ
Ѝ ѝ
Ў ў
Џ џ
А а
Б б
В в
Г г
Д д
Е е
Ж ж
З з
И и
Й й
К к
Л л
М м
Н н
О о
П п
Р р
С с
Т т
У у
Ф ф
Х х
Ц ц
Ч ч
Ш ш
Щ щ
Ъ ъ
Ы ы
Ь ь
Э э
Ю ю
Я я
Ѡ ѡ
Ѣ ѣ
Ѥ ѥ
Ѧ ѧ
Ѩ ѩ
Ѫ ѫ
Ѭ ѭ
Ѯ ѯ
Ѱ ѱ
Ѳ ѳ
Ѵ ѵ
Ѷ ѷ
Ѹ ѹ
Ѻ ѻ
Ѽ ѽ
Ѿ ѿ
Ҁ ҁ
Ҍ ҍ
Ҏ ҏ
Ґ ґ
Ғ ғ
Ҕ ҕ
Җ җ
Ҙ ҙ
Қ қ
Ҝ ҝ
Ҟ ҟ
Ҡ ҡ
Ң ң
Ҥ ҥ
Ҧ ҧ
Ҩ ҩ
Ҫ ҫ
Ҭ ҭ
Ү ү
Ұ ұ
Ҳ ҳ
Ҵ ҵ
Ҷ ҷ
Ҹ ҹ
Һ һ
Ҽ ҽ
Ҿ ҿ
Ӂ ӂ
Ӄ ӄ
Ӈ ӈ
Ӌ ӌ
Ӑ ӑ
Ӓ ӓ
Ӕ ӕ
Ӗ ӗ
Ә ә
Ӛ ӛ
Ӝ ӝ
Ӟ ӟ
Ӡ ӡ
Ӣ ӣ
Ӥ ӥ
Ӧ ӧ
Ө ө
Ӫ ӫ
Ӭ ӭ
Ӯ ӯ
Ӱ ӱ
Ӳ ӳ
Ӵ ӵ
Ӹ ӹ
Ա ա
Բ բ
Գ գ
Դ դ
Ե ե
Զ զ
Է է
Ը ը
Թ թ
Ժ ժ
Ի ի
Լ լ
Խ խ
Ծ ծ
Կ կ
Հ հ
Ձ ձ
Ղ ղ
Ճ ճ
Մ մ
Յ յ
Ն ն
Շ շ
Ո ո
Չ չ
Պ պ
Ջ ջ
Ռ ռ
Ս ս
Վ վ
Տ տ
Ր ր
Ց ց
Ւ ւ
Փ փ
Ք ք
Օ օ
Ֆ ֆ
և եւ
Ḁ ḁ
Ḃ ḃ
Ḅ ḅ
Ḇ ḇ
Ḉ ḉ
Ḋ ḋ
Ḍ ḍ
Ḏ ḏ
Ḑ ḑ
Ḓ ḓ
Ḕ ḕ
Ḗ ḗ
Ḙ ḙ
Ḛ ḛ
Ḝ ḝ
Ḟ ḟ
Ḡ ḡ
Ḣ ḣ
Ḥ ḥ
Ḧ ḧ
Ḩ ḩ
Ḫ ḫ
Ḭ ḭ
Ḯ ḯ
Ḱ ḱ
Ḳ ḳ
Ḵ ḵ
Ḷ ḷ
Ḹ ḹ
Ḻ ḻ
Ḽ ḽ
Ḿ ḿ
Ṁ ṁ
Ṃ ṃ
Ṅ ṅ
Ṇ ṇ
Ṉ ṉ
Ṋ ṋ
Ṍ ṍ
Ṏ ṏ
Ṑ ṑ
Ṓ ṓ
Ṕ ṕ
Ṗ ṗ
Ṙ ṙ
Ṛ ṛ
Ṝ ṝ
Ṟ ṟ
Ṡ ṡ
Ṣ ṣ
Ṥ ṥ
Ṧ ṧ
Ṩ ṩ
Ṫ ṫ
Ṭ ṭ
Ṯ ṯ
Ṱ ṱ
Ṳ ṳ
Ṵ ṵ
Ṷ ṷ
Ṹ ṹ
Ṻ ṻ
Ṽ ṽ
Ṿ ṿ
Ẁ ẁ
Ẃ ẃ
Ẅ ẅ
Ẇ ẇ
Ẉ ẉ
Ẋ ẋ
Ẍ ẍ
Ẏ ẏ
Ẑ ẑ
Ẓ ẓ
Ẕ ẕ
ẖ ẖ
ẗ ẗ
ẘ ẘ
ẙ ẙ
ẚ aʾ
ẛ ṡ
Ạ ạ
Ả ả
Ấ ấ
Ầ ầ
Ẩ ẩ
Ẫ ẫ
Ậ ậ
Ắ ắ
Ằ ằ
Ẳ ẳ
Ẵ ẵ
Ặ ặ
Ẹ ẹ
Ẻ ẻ
Ẽ ẽ
Ế ế
Ề ề
Ể ể
Ễ ễ
Ệ ệ
Ỉ ỉ
Ị ị
Ọ ọ
Ỏ ỏ
Ố ố
Ồ ồ
Ổ ổ
Ỗ ỗ
Ộ ộ
Ớ ớ
Ờ ờ
Ở ở
Ỡ ỡ
Ợ ợ
Ụ ụ
Ủ ủ
Ứ ứ
Ừ ừ
Ử ử
Ữ ữ
Ự ự
Ỳ ỳ
Ỵ ỵ
Ỷ ỷ
Ỹ ỹ
Ἀ ἀ
Ἁ ἁ
Ἂ ἂ
Ἃ ἃ
Ἄ ἄ
Ἅ ἅ
Ἆ ἆ
Ἇ ἇ
Ἐ ἐ
Ἑ ἑ
Ἒ ἒ
Ἓ ἓ
Ἔ ἔ
Ἕ ἕ
Ἠ ἠ
Ἡ ἡ
Ἢ ἢ
Ἣ ἣ
Ἤ ἤ
Ἥ ἥ
Ἦ ἦ
Ἧ ἧ
Ἰ ἰ
Ἱ ἱ
Ἲ ἲ
Ἳ ἳ
Ἴ ἴ
Ἵ ἵ
Ἶ ἶ
Ἷ ἷ
Ὀ ὀ
Ὁ ὁ
Ὂ ὂ
Ὃ ὃ
Ὄ ὄ
Ὅ ὅ
ὐ ὐ
ὒ ὒ
ὔ ὔ
ὖ ὖ
Ὑ ὑ
Ὓ ὓ
Ὕ ὕ
Ὗ ὗ
Ὠ ὠ
Ὡ ὡ
Ὢ ὢ
Ὣ ὣ
Ὤ ὤ
Ὥ ὥ
Ὦ ὦ
Ὧ ὧ
ᾀ ἀι
ᾁ ἁι
ᾂ ἂι
ᾃ ἃι
ᾄ ἄι
ᾅ ἅι
ᾆ ἆι
ᾇ ἇι
ᾈ ἀι
ᾉ ἁι
ᾊ ἂι
ᾋ ἃι
ᾌ ἄι
ᾍ ἅι
ᾎ ἆι
ᾏ ἇι
ᾐ ἠι
ᾑ ἡι
ᾒ ἢι
ᾓ ἣι
ᾔ ἤι
ᾕ ἥι
ᾖ ἦι
ᾗ ἧι
ᾘ ἠι
ᾙ ἡι
ᾚ ἢι
ᾛ ἣι
ᾜ ἤι
ᾝ ἥι
ᾞ ἦι
ᾟ ἧι
ᾠ ὠι
ᾡ ὡι
ᾢ ὢι
ᾣ ὣι
ᾤ ὤι
ᾥ ὥι
ᾦ ὦι
ᾧ ὧι
ᾨ ὠι
ᾩ ὡι
ᾪ ὢι
ᾫ ὣι
ᾬ ὤι
ᾭ ὥι
ᾮ ὦι
ᾯ ὧι
ᾲ ὰι
αι
ᾴ άι
ᾶ ᾶ
ᾷ ᾶι
Ᾰ ᾰ
Ᾱ ᾱ
Ὰ ὰ
Ά ά
αι
ι
ῂ ὴι
ῃ ηι
ῄ ήι
ῆ ῆ
ῇ ῆι
Ὲ ὲ
Έ έ
Ὴ ὴ
Ή ή
ῌ ηι
ῒ ῒ
ΐ ΐ
ῖ ῖ
ῗ ῗ
Ῐ ῐ
Ῑ ῑ
Ὶ ὶ
Ί ί
ῢ ῢ
ΰ ΰ
ῤ ῤ
ῦ ῦ
ῧ ῧ
Ῠ ῠ
Ῡ ῡ
Ὺ ὺ
Ύ ύ
Ῥ ῥ
ῲ ὼι
ῳ ωι
ῴ ώι
ῶ ῶ
ῷ ῶι
Ὸ ὸ
Ό ό
Ὼ ὼ
Ώ ώ
ῼ ωι
Ω ω
k
Å å
Ⅱ ⅱ
Ⅲ ⅲ
Ⅳ ⅳ
Ⅵ ⅵ
Ⅶ ⅶ
Ⅷ ⅷ
Ⅸ ⅸ
Ⅺ ⅺ
Ⅻ ⅻ
Ⓐ ⓐ
Ⓑ ⓑ
Ⓒ ⓒ
Ⓓ ⓓ
Ⓔ ⓔ
Ⓕ ⓕ
Ⓖ ⓖ
Ⓗ ⓗ
Ⓘ ⓘ
Ⓙ ⓙ
Ⓚ ⓚ
Ⓛ ⓛ
Ⓜ ⓜ
Ⓝ ⓝ
Ⓞ ⓞ
Ⓟ ⓟ
Ⓠ ⓠ
Ⓡ ⓡ
Ⓢ ⓢ
Ⓣ ⓣ
Ⓤ ⓤ
Ⓥ ⓥ
Ⓦ ⓦ
Ⓧ ⓧ
Ⓨ ⓨ
Ⓩ ⓩ
ff ff
fi fi
fl fl
ffi ffi
ffl ffl
ſt st
st st
ﬓ մն
ﬔ մե
ﬕ մի
ﬖ վն
ﬗ մխ

1543
tests/casemap.txt Normal file

File diff suppressed because it is too large Load Diff

82
tests/gen-casefold-txt.pl Executable file
View File

@ -0,0 +1,82 @@
#! /usr/bin/perl -w
# Copyright (C) 1998, 1999 Tom Tromey
# Copyright (C) 2001 Red Hat Software
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# gen-casefold-test.pl - Generate test cases for casefolding from Unicode data.
# See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
# Usage:
# I consider the output of this program to be unrestricted. Use it as
# you will.
# Names of fields in the CaseFolding table
$FOLDING_CODE = 0;
$FOLDING_STATUS = 1;
$FOLDING_MAPPING = 2;
my $casefoldlen = 0;
my @casefold;
if (@ARGV != 2) {
$0 =~ s@.*/@@;
die "Usage: $0 UNICODE-VERSION CaseFolding.txt\n";
}
print <<EOT;
# Test cases generated from Unicode $ARGV[0] data
# by gen-casefold-test.pl. Do not edit.
#
# Some special hand crafted tests
#
AaBbCc@@\taabbcc@@
#
# Now the automatic tests
#
EOT
open (INPUT, "< $ARGV[1]") || exit 1;
while (<INPUT>)
{
chop;
next if /^#/;
next if /^\s*$/;
s/\s*#.*//;
my @fields = split ('\s*;\s*', $_, 30);
my $raw_code = $fields[$FOLDING_CODE];
my $code = hex ($raw_code);
next if $code > 0xffff; # FIXME!
if ($#fields != 3)
{
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
next;
}
next if ($fields[$FOLDING_STATUS] eq 'S');
@values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
printf ("%s\t%s\n", pack ("U", $code), pack ("U*", @values));
}
close INPUT;

232
tests/gen-casemap-txt.pl Executable file
View File

@ -0,0 +1,232 @@
#! /usr/bin/perl -w
# Copyright (C) 1998, 1999 Tom Tromey
# Copyright (C) 2001 Red Hat Software
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# gen-casemap-test.pl - Generate test cases for case mapping from Unicode data.
# See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
# I consider the output of this program to be unrestricted. Use it as
# you will.
use utf8;
if (@ARGV != 3) {
$0 =~ s@.*/@@;
die "Usage: $0 UNICODE-VERSION UnicodeData.txt SpecialCasing.txt\n";
}
use vars qw($CODE $NAME $CATEGORY $COMBINING_CLASSES $BIDI_CATEGORY $DECOMPOSITION $DECIMAL_VALUE $DIGIT_VALUE $NUMERIC_VALUE $MIRRORED $OLD_NAME $COMMENT $UPPER $LOWER $TITLE $BREAK_CODE $BREAK_CATEGORY $BREAK_NAME $CASE_CODE $CASE_LOWER $CASE_TITLE $CASE_UPPER $CASE_CONDITION);
# Names of fields in Unicode data table.
$CODE = 0;
$NAME = 1;
$CATEGORY = 2;
$COMBINING_CLASSES = 3;
$BIDI_CATEGORY = 4;
$DECOMPOSITION = 5;
$DECIMAL_VALUE = 6;
$DIGIT_VALUE = 7;
$NUMERIC_VALUE = 8;
$MIRRORED = 9;
$OLD_NAME = 10;
$COMMENT = 11;
$UPPER = 12;
$LOWER = 13;
$TITLE = 14;
# Names of fields in the SpecialCasing table
$CASE_CODE = 0;
$CASE_LOWER = 1;
$CASE_TITLE = 2;
$CASE_UPPER = 3;
$CASE_CONDITION = 4;
my @upper;
my @title;
my @lower;
open (INPUT, "< $ARGV[1]") || exit 1;
$last_code = -1;
while (<INPUT>)
{
chop;
@fields = split (';', $_, 30);
if ($#fields != 14)
{
printf STDERR ("Entry for $fields[$CODE] has wrong number of fields (%d)\n", $#fields);
}
$code = hex ($fields[$CODE]);
last if ($code > 0xFFFF); # ignore characters out of the basic plane
if ($code > $last_code + 1)
{
# Found a gap.
if ($fields[$NAME] =~ /Last>/)
{
# Fill the gap with the last character read,
# since this was a range specified in the char database
@gfields = @fields;
}
else
{
# The gap represents undefined characters. Only the type
# matters.
@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
'', '', '', '');
}
for (++$last_code; $last_code < $code; ++$last_code)
{
$gfields{$CODE} = sprintf ("%04x", $last_code);
&process_one ($last_code, @gfields);
}
}
&process_one ($code, @fields);
$last_code = $code;
}
close INPUT;
open (INPUT, "< $ARGV[2]") || exit 1;
while (<INPUT>)
{
my $code;
chop;
next if /^#/;
next if /^\s*$/;
s/\s*#.*//;
@fields = split ('\s*;\s*', $_, 30);
$raw_code = $fields[$CASE_CODE];
$code = hex ($raw_code);
if ($#fields != 4 && $#fields != 5)
{
printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
next;
}
if (defined $fields[5]) {
# Ignore conditional special cases - we'll handle them manually
next;
}
$upper[$code] = &make_hex ($fields[$CASE_UPPER]);
$lower[$code] = &make_hex ($fields[$CASE_LOWER]);
$title[$code] = &make_hex ($fields[$CASE_TITLE]);
}
close INPUT;
print <<EOT;
# Test cases generated from Unicode $ARGV[0] data
# by gen-case-tests.pl. Do not edit.
#
# Some special hand crafted tests
#
tr_TR\ti\ti\t\x{0130}\t\x{0130}\t# i => LATIN CAPITAL LETTER I WITH DOT ABOVE
tr_TR\tI\t\x{0131}\tI\tI\t# I => LATIN SMALL LETTER DOTLESS I
# Test reordering of YPOGEGRAMMENI across other accents
\t\x{03b1}\x{0345}\x{0314}\t\x{03b1}\x{0345}\x{314}\t\x{0391}\x{0345}\x{0314}\t\x{0391}\x{0314}\x{0399}\t
\t\x{03b1}\x{0314}\x{0345}\t\x{03b1}\x{314}\x{0345}\t\x{0391}\x{0314}\x{0345}\t\x{0391}\x{0314}\x{0399}\t
# Handling of final and nonfinal sigma
ΜΆΙΟΣ μάιος Μάιος ΜΆΙΟΣ
ΜΆΙΟΣ μάιος Μάιος ΜΆΙΟΣ
ΣΙΓΜΑ σιγμα Σιγμα ΣΙΓΜΑ
# Lithuanian rule of i followed by letter with dot. Not at all sure
# about the titlecase part here
lt_LT\ti\x{117}\ti\x{117}\tIe\tIE\t
lt_LT\tie\x{307}\tie\x{307}\tIe\tIE\t
#
# Now the automatic tests
#
EOT
&print_tests;
exit 0;
# Process a single character.
sub process_one
{
my ($code, @fields) = @_;
my $type = $fields[$CATEGORY];
if ($type eq 'Ll')
{
$upper[$code] = make_hex ($fields[$UPPER]);
$lower[$code] = pack ("U", $code);
$title[$code] = make_hex ($fields[$TITLE]);
}
elsif ($type eq 'Lu')
{
$lower[$code] = make_hex ($fields[$LOWER]);
$upper[$code] = pack ("U", $code);
$title[$code] = make_hex ($fields[$TITLE]);
}
if ($type eq 'Lt')
{
$upper[$code] = make_hex ($fields[$UPPER]);
$lower[$code] = pack ("U", hex ($fields[$LOWER]));
$title[$code] = make_hex ($fields[$LOWER]);
}
}
sub print_tests
{
for ($i = 0; $i < 0xffff; $i++) {
if ($i == 0x3A3) {
# Greek sigma needs special tests
next;
}
my $lower = $lower[$i];
my $title = $title[$i];
my $upper = $upper[$i];
if (defined $upper || defined $lower || defined $title) {
printf "\t%s\t%s\t%s\t%s\t# %4X\n",
pack ("U", $i),
(defined $lower ? $lower : ""),
(defined $title ? $title : ""),
(defined $upper ? $upper : ""),
$i;
}
}
}
sub make_hex
{
my $codes = shift;
$codes =~ s/^\s+//;
$codes =~ s/\s+$//;
if ($codes eq "0" || $codes eq "") {
return "";
} else {
return pack ("U*", map { hex ($_) } split /\s+/, $codes);
}
}

116
tests/unicode-caseconv.c Normal file
View File

@ -0,0 +1,116 @@
#include <locale.h>
#include <stdlib.h>
#include <stdio.h>
#include <glib.h>
#include <string.h>
int main (int argc, char **argv)
{
FILE *infile;
char buffer[1024];
char **strings;
char *srcdir = getenv ("srcdir");
char *filename;
const char *locale;
const char *test;
char *convert;
char *current_locale = setlocale (LC_CTYPE, NULL);
gint result = 0;
if (!srcdir)
srcdir = ".";
filename = g_strconcat (srcdir, G_DIR_SEPARATOR_S, "casemap.txt", NULL);
infile = fopen (filename, "r");
if (!infile)
{
fprintf (stderr, "Failed to open %s\n", filename );
exit (1);
}
while (fgets (buffer, sizeof(buffer), infile))
{
if (buffer[0] == '#')
continue;
strings = g_strsplit (buffer, "\t", -1);
locale = strings[0];
if (!locale[0])
locale = "C";
if (strcmp (locale, current_locale) != 0)
{
setlocale (LC_CTYPE, locale);
current_locale = setlocale (LC_CTYPE, NULL);
if (strncmp (current_locale, locale, 2) != 0)
{
fprintf (stderr, "Cannot set locale to %s, skipping\n", locale);
goto next;
}
}
test = strings[1];
convert = g_utf8_strup (test);
if (strcmp (convert, strings[4]) != 0)
{
fprintf (stderr, "Failure: toupper(%s) == %s, should have been %s\n",
test, convert, strings[4]);
result = 1;
}
g_free (convert);
convert = g_utf8_strdown (test);
if (strcmp (convert, strings[2]) != 0)
{
fprintf (stderr, "Failure: tolower(%s) == %s, should have been %s\n",
test, convert, strings[2]);
result = 1;
}
g_free (convert);
next:
g_strfreev (strings);
}
fclose (infile);
g_free (filename);
filename = g_strconcat (srcdir, G_DIR_SEPARATOR_S, "casefold.txt", NULL);
infile = fopen (filename, "r");
if (!infile)
{
fprintf (stderr, "Failed to open %s\n", filename );
exit (1);
}
while (fgets (buffer, sizeof(buffer), infile))
{
if (buffer[0] == '#')
continue;
buffer[strlen(buffer) - 1] = '\0';
strings = g_strsplit (buffer, "\t", -1);
test = strings[0];
convert = g_utf8_casefold (test);
if (strcmp (convert, strings[1]) != 0)
{
fprintf (stderr, "Failure: casefold(%s) == '%s', should have been '%s'\n",
test, convert, strings[1]);
result = 1;
}
g_free (convert);
g_strfreev (strings);
}
fclose (infile);
return result;
}

94
tests/unicode-collate.c Normal file
View File

@ -0,0 +1,94 @@
#include <glib.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct {
const char *key;
const char *str;
} Line;
int
compare_collate (const void *a, const void *b)
{
const Line *line_a = a;
const Line *line_b = b;
return g_utf8_collate (line_a->str, line_b->str);
}
int
compare_key (const void *a, const void *b)
{
const Line *line_a = a;
const Line *line_b = b;
return strcmp (line_a->key, line_b->key);
}
int main (int argc, char **argv)
{
GIOChannel *in;
GError *error = NULL;
GArray *line_array = g_array_new (FALSE, FALSE, sizeof(Line));
guint i;
if (argc != 1 && argc != 2)
{
fprintf (stderr, "Usage: unicode-collate [FILE]\n");
return 1;
}
if (argc == 2)
{
in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error);
if (!in)
{
fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
return 1;
}
}
else
{
in = g_io_channel_unix_new (fileno (stdin));
}
while (TRUE)
{
gsize term_pos;
gchar *str;
Line line;
if (g_io_channel_read_line (in, &str, NULL, &term_pos, &error) != G_IO_STATUS_NORMAL)
break;
str[term_pos] = '\0';
line.key = g_utf8_collate_key (str);
line.str = str;
g_array_append_val (line_array, line);
}
if (error)
{
fprintf (stderr, "Error reading test file, %s\n", error->message);
return 1;
}
printf ("== g_utf8_collate ==\n");
qsort (line_array->data, line_array->len, sizeof (Line), compare_collate);
for (i = 0; i < line_array->len; i++)
printf ("%s\n", g_array_index (line_array, Line, i).str);
printf ("== g_utf8_collate_key ==\n");
qsort (line_array->data, line_array->len, sizeof (Line), compare_key);
for (i = 0; i < line_array->len; i++)
printf ("%s\n", g_array_index (line_array, Line, i).str);
g_io_channel_close (in);
return 0;
}

194
tests/unicode-normalize.c Normal file
View File

@ -0,0 +1,194 @@
#include <glib.h>
#include <stdio.h>
#include <stdlib.h>
gboolean success = TRUE;
static char *
decode (const gchar *input)
{
unsigned ch;
int offset = 0;
GString *result = g_string_new (NULL);
int len;
char buf[6];
do
{
if (sscanf (input + offset, "%x", &ch) != 1)
{
fprintf (stderr, "Error parsing character string %s\n", input);
exit (1);
}
/* FIXME: We don't handle the > BMP or Hangul syllables */
if (ch > 0xffff || /* > BMP */
(ch >= 0xac00 && ch <= 0xd7ff)) /* Hangul syllables */
{
g_string_free (result, TRUE);
return NULL;
}
len = g_unichar_to_utf8 (ch, buf);
g_string_append_len (result, buf, len);
while (input[offset] && input[offset] != ' ')
offset++;
while (input[offset] && input[offset] == ' ')
offset++;
}
while (input[offset]);
return g_string_free (result, FALSE);
}
const char *names[4] = {
"NFD",
"NFC",
"NFKD",
"NFKC"
};
static void
test_form (int line,
GNormalizeMode mode,
gboolean do_compat,
int expected,
char **c,
char **raw)
{
int i;
gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC ||
mode == G_NORMALIZE_NFKD);
if (mode_is_compat || !do_compat)
{
for (i = 0; i < 3; i++)
{
char *result = g_utf8_normalize (c[i], mode);
if (strcmp (result, c[expected]) != 0)
{
fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
fprintf (stderr, " g_utf8_normalize (%s, %s) != %s\n",
raw[i], names[mode], raw[expected]);
success = FALSE;
}
g_free (result);
}
}
if (mode_is_compat || do_compat)
{
for (i = 3; i < 5; i++)
{
char *result = g_utf8_normalize (c[i], mode);
if (strcmp (result, c[expected]) != 0)
{
fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
fprintf (stderr, " g_utf8_normalize (%s, %s) != %s\n",
raw[i], names[mode], raw[expected]);
success = FALSE;
}
g_free (result);
}
}
}
static gboolean
process_one (int line, gchar **columns)
{
char *c[5];
int i;
gboolean skip = FALSE;
for (i=0; i < 5; i++)
{
c[i] = decode(columns[i]);
if (!c[i])
skip = TRUE;
}
if (!skip)
{
test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns);
test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns);
test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns);
test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns);
test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns);
test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns);
}
for (i=0; i < 5; i++)
g_free (c[i]);
return TRUE;
}
int main (int argc, char **argv)
{
GIOChannel *in;
GError *error = NULL;
GString *buffer = g_string_new (NULL);
int line_to_do = 0;
int line = 1;
if (argc != 2 && argc != 3)
{
fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n");
return 1;
}
if (argc == 3)
line_to_do = atoi(argv[2]);
in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error);
if (!in)
{
fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
return 1;
}
while (TRUE)
{
gsize term_pos;
gchar **columns;
if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL)
break;
if (line_to_do && line != line_to_do)
goto next;
buffer->str[term_pos] = '\0';
if (buffer->str[0] == '#') /* Comment */
goto next;
if (buffer->str[0] == '@') /* Part */
{
fprintf (stderr, "\nProcessing %s\n", buffer->str + 1);
goto next;
}
columns = g_strsplit (buffer->str, ";", -1);
if (!process_one (line, columns))
return 1;
g_strfreev (columns);
next:
g_string_truncate (buffer, 0);
line++;
}
if (error)
{
fprintf (stderr, "Error reading test file, %s\n", error->message);
return 1;
}
g_io_channel_close (in);
g_string_free (buffer, TRUE);
return !success;
}