Add g_unicode_script_from_iso15924()

And adjust g_unicode_script_to_iso1592().
2025-08-01 15:03:39 +02:00 · 2011-07-20 22:11:08 -04:00
parent a5e94cbd36
commit 9bcb3d7457
5 changed files with 51 additions and 4 deletions
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -2720,6 +2720,7 @@ g_unicode_canonical_decomposition
 g_unichar_get_mirror_char
 GUnicodeScript
 g_unichar_get_script
+g_unicode_script_from_iso15924
 g_unicode_script_to_iso15924

 <SUBSECTION>
--- a/glib/glib.symbols
+++ b/glib/glib.symbols
@@ -1229,6 +1229,7 @@ g_unichar_xdigit_value
 g_unichar_type
 g_unicode_canonical_decomposition
 g_unicode_canonical_ordering
+g_unicode_script_from_iso15924
 g_unicode_script_to_iso15924
 g_utf8_casefold
 g_utf8_collate
--- a/glib/gunicode.h
+++ b/glib/gunicode.h
@@ -476,6 +476,7 @@ typedef enum
 } GUnicodeScript;

 guint32 g_unicode_script_to_iso15924 (GUnicodeScript script);
+GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924);

 /* Returns TRUE if current locale uses UTF-8 charset.  If CHARSET is
 * not null, sets *CHARSET to the name of the current locale's
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -1448,15 +1448,54 @@ static const guint32 iso15924_tags[] =
 * See <ulink url="http://unicode.org/iso15924/codelists.html">Codes for the
 * representation of names of scripts</ulink> for details.
 *
- * Return value: the ISO 15924 code for @script, encoded as an integer.
+ * Return value: the ISO 15924 code for @script, encoded as an integer,
+ *   of zero if @script is %G_UNICODE_SCRIPT_INVALID_CODE or
+ *   ISO 15924 code 'Zzzz' (script code for UNKNOWN) if @script is not understood.
 *
 * Since: 2.30
 */
 guint32
 g_unicode_script_to_iso15924 (GUnicodeScript script)
 {
-  if (G_UNLIKELY (script < 0 || script >= (int) G_N_ELEMENTS (iso15924_tags)))
+  if (G_UNLIKELY (script == G_UNICODE_SCRIPT_INVALID_CODE))
    return 0;

+  if (G_UNLIKELY (script < 0 || script >= (int) G_N_ELEMENTS (iso15924_tags)))
+    return 0x5A7A7A7A;
+
  return iso15924_tags[script];
 }
+
+/**
+ * g_unicode_script_from_iso15924:
+ * @iso15924: a Unicode script
+ *
+ * Looks up the Unicode script for @iso15924.  ISO 15924 assigns four-letter
+ * codes to scripts.  For example, the code for Arabic is 'Arab'.
+ * This function accepts four letter codes encoded as a @guint32 in a
+ * big-endian fashion.  That is, the code expected for Arabic is
+ * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
+ *
+ * See <ulink url="http://unicode.org/iso15924/codelists.html">Codes for the
+ * representation of names of scripts</ulink> for details.
+ *
+ * Return value: the Unicode script for @iso15924, or
+ *   of %G_UNICODE_SCRIPT_INVALID_CODE if @iso15924 is zero and
+ *   %G_UNICODE_SCRIPT_UNKNOWN if @iso15924 is unknown.
+ *
+ * Since: 2.30
+ */
+GUnicodeScript
+g_unicode_script_from_iso15924 (guint32 iso15924)
+{
+  unsigned int i;
+
+   if (!iso15924)
+     return G_UNICODE_SCRIPT_INVALID_CODE;
+
+  for (i = 0; i < G_N_ELEMENTS (iso15924_tags); i++)
+    if (iso15924_tags[i] == iso15924)
+      return (GUnicodeScript) i;
+
+  return G_UNICODE_SCRIPT_UNKNOWN;
+}
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -603,10 +603,15 @@ test_fully_decompose_len (void)
 }

 static void
-test_script_to_iso15924 (void)
+test_iso15924 (void)
 {
  g_assert_cmphex (0, ==, g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_INVALID_CODE));
+  g_assert_cmphex (0x5A7A7A7A, ==, g_unicode_script_to_iso15924 (1000));
  g_assert_cmphex (0x41726162, ==, g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_ARABIC));
+
+  g_assert_cmphex (G_UNICODE_SCRIPT_INVALID_CODE, ==, g_unicode_script_from_iso15924 (0));
+  g_assert_cmphex (G_UNICODE_SCRIPT_UNKNOWN, ==, g_unicode_script_from_iso15924 (0x12345678));
+  g_assert_cmphex (G_UNICODE_SCRIPT_ARABIC, ==, g_unicode_script_from_iso15924 (0x41726162));
 }

 int
@@ -630,7 +635,7 @@ main (int   argc,
  g_test_add_func ("/unicode/canonical-decomposition", test_canonical_decomposition);
  g_test_add_func ("/unicode/decompose-tail", test_decompose_tail);
  g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
-  g_test_add_func ("/unicode/script-to-iso15924", test_script_to_iso15924);
+  g_test_add_func ("/unicode/iso15924", test_iso15924);

  return g_test_run();
 }