Add g_unicode_script_from_iso15924()

And adjust g_unicode_script_to_iso1592().
This commit is contained in:
Behdad Esfahbod 2011-07-20 22:11:08 -04:00
parent a5e94cbd36
commit 9bcb3d7457
5 changed files with 51 additions and 4 deletions

View File

@ -2720,6 +2720,7 @@ g_unicode_canonical_decomposition
g_unichar_get_mirror_char
GUnicodeScript
g_unichar_get_script
g_unicode_script_from_iso15924
g_unicode_script_to_iso15924
<SUBSECTION>

View File

@ -1229,6 +1229,7 @@ g_unichar_xdigit_value
g_unichar_type
g_unicode_canonical_decomposition
g_unicode_canonical_ordering
g_unicode_script_from_iso15924
g_unicode_script_to_iso15924
g_utf8_casefold
g_utf8_collate

View File

@ -476,6 +476,7 @@ typedef enum
} GUnicodeScript;
guint32 g_unicode_script_to_iso15924 (GUnicodeScript script);
GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924);
/* Returns TRUE if current locale uses UTF-8 charset. If CHARSET is
* not null, sets *CHARSET to the name of the current locale's

View File

@ -1448,15 +1448,54 @@ static const guint32 iso15924_tags[] =
* See <ulink url="http://unicode.org/iso15924/codelists.html">Codes for the
* representation of names of scripts</ulink> for details.
*
* Return value: the ISO 15924 code for @script, encoded as an integer.
* Return value: the ISO 15924 code for @script, encoded as an integer,
* of zero if @script is %G_UNICODE_SCRIPT_INVALID_CODE or
* ISO 15924 code 'Zzzz' (script code for UNKNOWN) if @script is not understood.
*
* Since: 2.30
*/
guint32
g_unicode_script_to_iso15924 (GUnicodeScript script)
{
if (G_UNLIKELY (script < 0 || script >= (int) G_N_ELEMENTS (iso15924_tags)))
if (G_UNLIKELY (script == G_UNICODE_SCRIPT_INVALID_CODE))
return 0;
if (G_UNLIKELY (script < 0 || script >= (int) G_N_ELEMENTS (iso15924_tags)))
return 0x5A7A7A7A;
return iso15924_tags[script];
}
/**
* g_unicode_script_from_iso15924:
* @iso15924: a Unicode script
*
* Looks up the Unicode script for @iso15924. ISO 15924 assigns four-letter
* codes to scripts. For example, the code for Arabic is 'Arab'.
* This function accepts four letter codes encoded as a @guint32 in a
* big-endian fashion. That is, the code expected for Arabic is
* 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
*
* See <ulink url="http://unicode.org/iso15924/codelists.html">Codes for the
* representation of names of scripts</ulink> for details.
*
* Return value: the Unicode script for @iso15924, or
* of %G_UNICODE_SCRIPT_INVALID_CODE if @iso15924 is zero and
* %G_UNICODE_SCRIPT_UNKNOWN if @iso15924 is unknown.
*
* Since: 2.30
*/
GUnicodeScript
g_unicode_script_from_iso15924 (guint32 iso15924)
{
unsigned int i;
if (!iso15924)
return G_UNICODE_SCRIPT_INVALID_CODE;
for (i = 0; i < G_N_ELEMENTS (iso15924_tags); i++)
if (iso15924_tags[i] == iso15924)
return (GUnicodeScript) i;
return G_UNICODE_SCRIPT_UNKNOWN;
}

View File

@ -603,10 +603,15 @@ test_fully_decompose_len (void)
}
static void
test_script_to_iso15924 (void)
test_iso15924 (void)
{
g_assert_cmphex (0, ==, g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_INVALID_CODE));
g_assert_cmphex (0x5A7A7A7A, ==, g_unicode_script_to_iso15924 (1000));
g_assert_cmphex (0x41726162, ==, g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_ARABIC));
g_assert_cmphex (G_UNICODE_SCRIPT_INVALID_CODE, ==, g_unicode_script_from_iso15924 (0));
g_assert_cmphex (G_UNICODE_SCRIPT_UNKNOWN, ==, g_unicode_script_from_iso15924 (0x12345678));
g_assert_cmphex (G_UNICODE_SCRIPT_ARABIC, ==, g_unicode_script_from_iso15924 (0x41726162));
}
int
@ -630,7 +635,7 @@ main (int argc,
g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition);
g_test_add_func ("/unicode/decompose-tail", test_decompose_tail);
g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
g_test_add_func ("/unicode/script-to-iso15924", test_script_to_iso15924);
g_test_add_func ("/unicode/iso15924", test_iso15924);
return g_test_run();
}