From 6924a4bd368028fed193ed8c4962d5053aa0122c Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 11 Dec 2024 22:47:16 -0500 Subject: [PATCH] Add g_unichar_isnumber and g_unichar_number_value All characters that have one of the categories Nd, Nl, No get assigned a numeric value (not necessarily an integer). The existing apix g_unichar_isdigit and g_unichar_digit_value don't provide access to this data, so add two new functions. --- glib/glib-unicode-values/gen-unicode-values.c | 198 +++++++ glib/glib-unicode-values/meson.build | 5 + glib/gunicode.h | 6 + glib/gunicodevalues.h | 513 ++++++++++++++++++ glib/guniprop.c | 63 ++- glib/tests/unicode.c | 71 +++ 6 files changed, 854 insertions(+), 2 deletions(-) create mode 100644 glib/glib-unicode-values/gen-unicode-values.c create mode 100644 glib/glib-unicode-values/meson.build create mode 100644 glib/gunicodevalues.h diff --git a/glib/glib-unicode-values/gen-unicode-values.c b/glib/glib-unicode-values/gen-unicode-values.c new file mode 100644 index 000000000..2cf5e432e --- /dev/null +++ b/glib/glib-unicode-values/gen-unicode-values.c @@ -0,0 +1,198 @@ +/* gen-unicode-values.c - generate gunicodevalues.h for glib + * + * Author: + * Matthias Clasen + * + * Copyright (C) 2024 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, see . + */ + +#include + +#include +#include +#include + +typedef struct { + gunichar ch; + double value; +} NumericValueEntry; + +static NumericValueEntry entries[4000]; +static gsize num_entries; + +static int +compare_entries (const void *p1, const void *p2) +{ + const NumericValueEntry *e1 = p1; + const NumericValueEntry *e2 = p2; + + if (e1->ch < e2->ch) + return -1; + else if (e1->ch > e2->ch) + return 1; + else + return 0; +} + +static void +read_file (FILE *f) +{ + gsize l; + static char buf[4000]; + + l = 0; + while (fgets (buf, sizeof buf, f)) + { + gunichar ch, ch1, ch2; + double value; + const char *s = buf; + int k; + + l++; + + while (*s == ' ') + s++; + + if (s[0] == '#' || s[0] == '\0' || s[0] == '\n') + continue; + + k = sscanf (s, "%x ; %lf", &ch, &value); + if (k == 2) + { + if (num_entries + 1 > G_N_ELEMENTS (entries)) + { + fprintf (stderr, "Table overflow\n"); + exit (1); + } + + entries[num_entries].ch = ch; + entries[num_entries].value = value; + num_entries++; + continue; + } + + k = sscanf (s, "%x..%x ; %lf", &ch1, &ch2, &value); + if (k == 3) + { + if (num_entries + (ch2 + 1 - ch1) > G_N_ELEMENTS (entries)) + { + fprintf (stderr, "Table overflow\n"); + exit (1); + } + + for (ch = ch1; ch <= ch2; ch++) + { + entries[num_entries].ch = ch; + entries[num_entries].value = value; + num_entries++; + } + continue; + } + + fprintf (stderr, "Malformed line: %s\n", s); + exit (1); + } + + qsort (entries, num_entries, sizeof (NumericValueEntry), compare_entries); +} + +static void +read_data (const char *filename) +{ + FILE *f; + + fprintf (stderr, "Reading '%s'\n", filename); + + if (!(f = fopen (filename, "rt"))) + { + fprintf (stderr, "error: cannot open '%s' for reading", filename); + exit (1); + } + + read_file (f); + + fclose (f); +} + +static void +gen_unicode_values (void) +{ + fprintf (stderr, "Generating 'gunicodevalues.h'\n"); + printf ("/* gunicodevalues.h\n" + " * generated by gen-unicode-values\n" + " * from the file DerivedNumericValues.txt\n" + " */\n\n"); + + printf ("#pragma once\n\n"); + + /* We collect ranges of consecutive characters whose + * numeric values have a fixed distance. + */ + printf ("typedef struct\n" + "{\n" + " gunichar ch;\n" + " guint16 length;\n" + " guint16 increment;\n" + " double value;\n" + "} GUnicodeValue;\n\n"); + + printf ("static GUnicodeValue unicode_values[] = {\n"); + + guint16 increment = 0xffff; + for (gsize i = 0; i < num_entries; i++) + { + if (increment == 0xffff && + entries[i + 1].ch == entries[i].ch + 1) + { + double delta = entries[i + 1].value - entries[i].value; + if (delta == ceil (delta) && delta >= 0 && delta < 0xffff) + increment = (guint16) delta; + } + + gsize k = i; + if (increment != 0xffff) + { + while (entries[k + 1].ch == entries[k].ch + 1 && + entries[k + 1].value == entries[k].value + increment) + k++; + } + + printf (" { %#x, %u, %u, %f },\n", + entries[i].ch, + (guint) (k - i + 1), + increment != 0xffff ? increment : 0, + entries[i].value); + i = k; + increment = 0xffff; + } + + printf ("};\n\n"); +} + +int +main (int argc, const char **argv) +{ + if (argc < 2) + { + fprintf (stderr, "usage:\n gen-unicode-values /path/to/DerivedNumericValues.txt\n"); + exit (1); + } + + read_data (argv[1]); + gen_unicode_values (); + + return 0; +} diff --git a/glib/glib-unicode-values/meson.build b/glib/glib-unicode-values/meson.build new file mode 100644 index 000000000..ec8ded75b --- /dev/null +++ b/glib/glib-unicode-values/meson.build @@ -0,0 +1,5 @@ +gen_unicode_values = executable('gen-unicode-values', + 'gen-unicode-values.c', + dependencies : [libglib_dep], + install: false, +) diff --git a/glib/gunicode.h b/glib/gunicode.h index cb8e30b41..9750a5dad 100644 --- a/glib/gunicode.h +++ b/glib/gunicode.h @@ -739,6 +739,12 @@ gint g_unichar_digit_value (gunichar c) G_GNUC_CONST; GLIB_AVAILABLE_IN_ALL gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST; +GLIB_AVAILABLE_IN_2_84 +gboolean g_unichar_isnumber (gunichar c) G_GNUC_CONST; + +GLIB_AVAILABLE_IN_2_84 +double g_unichar_number_value (gunichar c) G_GNUC_CONST; + /* Return the Unicode character type of a given character. */ GLIB_AVAILABLE_IN_ALL GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST; diff --git a/glib/gunicodevalues.h b/glib/gunicodevalues.h new file mode 100644 index 000000000..f3e13ce47 --- /dev/null +++ b/glib/gunicodevalues.h @@ -0,0 +1,513 @@ +/* gunicodevalues.h + * generated by gen-unicode-values + * from the file DerivedNumericValues.txt + */ + +#pragma once + +typedef struct +{ + gunichar ch; + guint16 length; + guint16 increment; + double value; +} GUnicodeValue; + +static GUnicodeValue unicode_values[] = { + { 0x30, 10, 1, 0.000000 }, + { 0xb2, 2, 1, 2.000000 }, + { 0xb9, 1, 0, 1.000000 }, + { 0xbc, 1, 0, 0.250000 }, + { 0xbd, 1, 0, 0.500000 }, + { 0xbe, 1, 0, 0.750000 }, + { 0x660, 10, 1, 0.000000 }, + { 0x6f0, 10, 1, 0.000000 }, + { 0x7c0, 10, 1, 0.000000 }, + { 0x966, 10, 1, 0.000000 }, + { 0x9e6, 10, 1, 0.000000 }, + { 0x9f4, 1, 0, 0.062500 }, + { 0x9f5, 1, 0, 0.125000 }, + { 0x9f6, 1, 0, 0.187500 }, + { 0x9f7, 1, 0, 0.250000 }, + { 0x9f8, 1, 0, 0.750000 }, + { 0x9f9, 1, 0, 16.000000 }, + { 0xa66, 10, 1, 0.000000 }, + { 0xae6, 10, 1, 0.000000 }, + { 0xb66, 10, 1, 0.000000 }, + { 0xb72, 1, 0, 0.250000 }, + { 0xb73, 1, 0, 0.500000 }, + { 0xb74, 1, 0, 0.750000 }, + { 0xb75, 1, 0, 0.062500 }, + { 0xb76, 1, 0, 0.125000 }, + { 0xb77, 1, 0, 0.187500 }, + { 0xbe6, 11, 1, 0.000000 }, + { 0xbf1, 2, 900, 100.000000 }, + { 0xc66, 10, 1, 0.000000 }, + { 0xc78, 4, 1, 0.000000 }, + { 0xc7c, 3, 1, 1.000000 }, + { 0xce6, 10, 1, 0.000000 }, + { 0xd58, 1, 0, 0.006250 }, + { 0xd59, 1, 0, 0.025000 }, + { 0xd5a, 1, 0, 0.037500 }, + { 0xd5b, 1, 0, 0.050000 }, + { 0xd5c, 1, 0, 0.100000 }, + { 0xd5d, 1, 0, 0.150000 }, + { 0xd5e, 1, 0, 0.200000 }, + { 0xd66, 11, 1, 0.000000 }, + { 0xd71, 2, 900, 100.000000 }, + { 0xd73, 1, 0, 0.250000 }, + { 0xd74, 1, 0, 0.500000 }, + { 0xd75, 1, 0, 0.750000 }, + { 0xd76, 1, 0, 0.062500 }, + { 0xd77, 1, 0, 0.125000 }, + { 0xd78, 1, 0, 0.187500 }, + { 0xde6, 10, 1, 0.000000 }, + { 0xe50, 10, 1, 0.000000 }, + { 0xed0, 10, 1, 0.000000 }, + { 0xf20, 10, 1, 0.000000 }, + { 0xf2a, 9, 1, 0.500000 }, + { 0xf33, 1, 0, -0.500000 }, + { 0x1040, 10, 1, 0.000000 }, + { 0x1090, 10, 1, 0.000000 }, + { 0x1369, 10, 1, 1.000000 }, + { 0x1373, 9, 10, 20.000000 }, + { 0x137c, 1, 0, 10000.000000 }, + { 0x16ee, 3, 1, 17.000000 }, + { 0x17e0, 10, 1, 0.000000 }, + { 0x17f0, 10, 1, 0.000000 }, + { 0x1810, 10, 1, 0.000000 }, + { 0x1946, 10, 1, 0.000000 }, + { 0x19d0, 10, 1, 0.000000 }, + { 0x19da, 1, 0, 1.000000 }, + { 0x1a80, 10, 1, 0.000000 }, + { 0x1a90, 10, 1, 0.000000 }, + { 0x1b50, 10, 1, 0.000000 }, + { 0x1bb0, 10, 1, 0.000000 }, + { 0x1c40, 10, 1, 0.000000 }, + { 0x1c50, 10, 1, 0.000000 }, + { 0x2070, 1, 0, 0.000000 }, + { 0x2074, 6, 1, 4.000000 }, + { 0x2080, 10, 1, 0.000000 }, + { 0x2150, 1, 0, 0.142857 }, + { 0x2151, 1, 0, 0.111111 }, + { 0x2152, 1, 0, 0.100000 }, + { 0x2153, 1, 0, 0.333333 }, + { 0x2154, 1, 0, 0.666667 }, + { 0x2155, 1, 0, 0.200000 }, + { 0x2156, 1, 0, 0.400000 }, + { 0x2157, 1, 0, 0.600000 }, + { 0x2158, 1, 0, 0.800000 }, + { 0x2159, 1, 0, 0.166667 }, + { 0x215a, 1, 0, 0.833333 }, + { 0x215b, 1, 0, 0.125000 }, + { 0x215c, 1, 0, 0.375000 }, + { 0x215d, 1, 0, 0.625000 }, + { 0x215e, 1, 0, 0.875000 }, + { 0x215f, 2, 0, 1.000000 }, + { 0x2161, 11, 1, 2.000000 }, + { 0x216c, 2, 50, 50.000000 }, + { 0x216e, 2, 500, 500.000000 }, + { 0x2170, 12, 1, 1.000000 }, + { 0x217c, 2, 50, 50.000000 }, + { 0x217e, 2, 500, 500.000000 }, + { 0x2180, 2, 4000, 1000.000000 }, + { 0x2182, 1, 0, 10000.000000 }, + { 0x2185, 2, 44, 6.000000 }, + { 0x2187, 2, 50000, 50000.000000 }, + { 0x2189, 1, 0, 0.000000 }, + { 0x2460, 20, 1, 1.000000 }, + { 0x2474, 20, 1, 1.000000 }, + { 0x2488, 20, 1, 1.000000 }, + { 0x24ea, 2, 11, 0.000000 }, + { 0x24ec, 9, 1, 12.000000 }, + { 0x24f5, 10, 1, 1.000000 }, + { 0x24ff, 1, 0, 0.000000 }, + { 0x2776, 10, 1, 1.000000 }, + { 0x2780, 10, 1, 1.000000 }, + { 0x278a, 10, 1, 1.000000 }, + { 0x2cfd, 1, 0, 0.500000 }, + { 0x3007, 1, 0, 0.000000 }, + { 0x3021, 9, 1, 1.000000 }, + { 0x3038, 3, 10, 10.000000 }, + { 0x3192, 4, 1, 1.000000 }, + { 0x3220, 10, 1, 1.000000 }, + { 0x3248, 8, 10, 10.000000 }, + { 0x3251, 15, 1, 21.000000 }, + { 0x3280, 10, 1, 1.000000 }, + { 0x32b1, 15, 1, 36.000000 }, + { 0x3405, 1, 0, 5.000000 }, + { 0x3483, 1, 0, 2.000000 }, + { 0x382a, 1, 0, 5.000000 }, + { 0x3b4d, 1, 0, 7.000000 }, + { 0x4e00, 1, 0, 1.000000 }, + { 0x4e03, 1, 0, 7.000000 }, + { 0x4e07, 1, 0, 10000.000000 }, + { 0x4e09, 1, 0, 3.000000 }, + { 0x4e24, 1, 0, 2.000000 }, + { 0x4e5d, 1, 0, 9.000000 }, + { 0x4e8c, 1, 0, 2.000000 }, + { 0x4e94, 1, 0, 5.000000 }, + { 0x4e96, 1, 0, 4.000000 }, + { 0x4eac, 1, 0, 10000000000000000.000000 }, + { 0x4ebf, 1, 0, 100000000.000000 }, + { 0x4ec0, 1, 0, 10.000000 }, + { 0x4edf, 1, 0, 1000.000000 }, + { 0x4ee8, 1, 0, 3.000000 }, + { 0x4f0d, 1, 0, 5.000000 }, + { 0x4f70, 1, 0, 100.000000 }, + { 0x4fe9, 1, 0, 2.000000 }, + { 0x5006, 1, 0, 2.000000 }, + { 0x5104, 1, 0, 100000000.000000 }, + { 0x5146, 1, 0, 1000000.000000 }, + { 0x5169, 1, 0, 2.000000 }, + { 0x516b, 1, 0, 8.000000 }, + { 0x516d, 1, 0, 6.000000 }, + { 0x5341, 1, 0, 10.000000 }, + { 0x5343, 1, 0, 1000.000000 }, + { 0x5344, 2, 10, 20.000000 }, + { 0x534c, 1, 0, 40.000000 }, + { 0x53c1, 4, 0, 3.000000 }, + { 0x56db, 1, 0, 4.000000 }, + { 0x58f1, 1, 0, 1.000000 }, + { 0x58f9, 1, 0, 1.000000 }, + { 0x5e7a, 1, 0, 1.000000 }, + { 0x5efe, 2, 11, 9.000000 }, + { 0x5f0c, 3, 1, 1.000000 }, + { 0x5f10, 1, 0, 2.000000 }, + { 0x62d0, 1, 0, 7.000000 }, + { 0x62fe, 1, 0, 10.000000 }, + { 0x634c, 1, 0, 8.000000 }, + { 0x67d2, 1, 0, 7.000000 }, + { 0x6d1e, 1, 0, 0.000000 }, + { 0x6f06, 1, 0, 7.000000 }, + { 0x7396, 1, 0, 9.000000 }, + { 0x767e, 1, 0, 100.000000 }, + { 0x7695, 1, 0, 200.000000 }, + { 0x79ed, 1, 0, 1000000000.000000 }, + { 0x8086, 1, 0, 4.000000 }, + { 0x842c, 1, 0, 10000.000000 }, + { 0x8cae, 1, 0, 2.000000 }, + { 0x8cb3, 1, 0, 2.000000 }, + { 0x8d30, 1, 0, 2.000000 }, + { 0x920e, 1, 0, 9.000000 }, + { 0x94a9, 1, 0, 9.000000 }, + { 0x9621, 1, 0, 1000.000000 }, + { 0x9646, 1, 0, 6.000000 }, + { 0x964c, 1, 0, 100.000000 }, + { 0x9678, 1, 0, 6.000000 }, + { 0x96f6, 1, 0, 0.000000 }, + { 0xa620, 10, 1, 0.000000 }, + { 0xa6e6, 9, 1, 1.000000 }, + { 0xa6ef, 1, 0, 0.000000 }, + { 0xa830, 1, 0, 0.250000 }, + { 0xa831, 1, 0, 0.500000 }, + { 0xa832, 1, 0, 0.750000 }, + { 0xa833, 1, 0, 0.062500 }, + { 0xa834, 1, 0, 0.125000 }, + { 0xa835, 1, 0, 0.187500 }, + { 0xa8d0, 10, 1, 0.000000 }, + { 0xa900, 10, 1, 0.000000 }, + { 0xa9d0, 10, 1, 0.000000 }, + { 0xa9f0, 10, 1, 0.000000 }, + { 0xaa50, 10, 1, 0.000000 }, + { 0xabf0, 10, 1, 0.000000 }, + { 0xf96b, 1, 0, 3.000000 }, + { 0xf973, 1, 0, 10.000000 }, + { 0xf978, 1, 0, 2.000000 }, + { 0xf9b2, 1, 0, 0.000000 }, + { 0xf9d1, 1, 0, 6.000000 }, + { 0xf9d3, 1, 0, 6.000000 }, + { 0xf9fd, 1, 0, 10.000000 }, + { 0xff10, 10, 1, 0.000000 }, + { 0x10107, 10, 1, 1.000000 }, + { 0x10111, 9, 10, 20.000000 }, + { 0x1011a, 9, 100, 200.000000 }, + { 0x10123, 9, 1000, 2000.000000 }, + { 0x1012c, 8, 10000, 20000.000000 }, + { 0x10140, 1, 0, 0.250000 }, + { 0x10141, 1, 0, 0.500000 }, + { 0x10142, 2, 4, 1.000000 }, + { 0x10144, 2, 450, 50.000000 }, + { 0x10146, 2, 45000, 5000.000000 }, + { 0x10148, 2, 5, 5.000000 }, + { 0x1014a, 2, 50, 50.000000 }, + { 0x1014c, 2, 500, 500.000000 }, + { 0x1014e, 1, 0, 5000.000000 }, + { 0x1014f, 2, 5, 5.000000 }, + { 0x10151, 2, 50, 50.000000 }, + { 0x10153, 2, 500, 500.000000 }, + { 0x10155, 2, 40000, 10000.000000 }, + { 0x10157, 1, 0, 10.000000 }, + { 0x10158, 3, 0, 1.000000 }, + { 0x1015b, 4, 0, 2.000000 }, + { 0x1015f, 2, 5, 5.000000 }, + { 0x10161, 4, 0, 10.000000 }, + { 0x10165, 2, 20, 30.000000 }, + { 0x10167, 3, 0, 50.000000 }, + { 0x1016a, 3, 200, 100.000000 }, + { 0x1016d, 4, 0, 500.000000 }, + { 0x10171, 2, 4000, 1000.000000 }, + { 0x10173, 2, 45, 5.000000 }, + { 0x10175, 2, 0, 0.500000 }, + { 0x10177, 1, 0, 0.666667 }, + { 0x10178, 1, 0, 0.750000 }, + { 0x1018a, 1, 0, 0.000000 }, + { 0x1018b, 1, 0, 0.250000 }, + { 0x102e1, 10, 1, 1.000000 }, + { 0x102eb, 9, 10, 20.000000 }, + { 0x102f4, 8, 100, 200.000000 }, + { 0x10320, 2, 4, 1.000000 }, + { 0x10322, 2, 40, 10.000000 }, + { 0x10341, 1, 0, 90.000000 }, + { 0x1034a, 1, 0, 900.000000 }, + { 0x103d1, 2, 1, 1.000000 }, + { 0x103d3, 2, 10, 10.000000 }, + { 0x103d5, 1, 0, 100.000000 }, + { 0x104a0, 10, 1, 0.000000 }, + { 0x10858, 3, 1, 1.000000 }, + { 0x1085b, 2, 10, 10.000000 }, + { 0x1085d, 2, 900, 100.000000 }, + { 0x1085f, 1, 0, 10000.000000 }, + { 0x10879, 5, 1, 1.000000 }, + { 0x1087e, 2, 10, 10.000000 }, + { 0x108a7, 4, 1, 1.000000 }, + { 0x108ab, 2, 1, 4.000000 }, + { 0x108ad, 2, 10, 10.000000 }, + { 0x108af, 1, 0, 100.000000 }, + { 0x108fb, 2, 4, 1.000000 }, + { 0x108fd, 2, 10, 10.000000 }, + { 0x108ff, 1, 0, 100.000000 }, + { 0x10916, 2, 9, 1.000000 }, + { 0x10918, 2, 80, 20.000000 }, + { 0x1091a, 2, 1, 2.000000 }, + { 0x109bc, 1, 0, 0.916667 }, + { 0x109bd, 1, 0, 0.500000 }, + { 0x109c0, 10, 1, 1.000000 }, + { 0x109ca, 6, 10, 20.000000 }, + { 0x109d2, 10, 100, 100.000000 }, + { 0x109dc, 9, 1000, 2000.000000 }, + { 0x109e5, 9, 10000, 20000.000000 }, + { 0x109ee, 1, 0, 200000.000000 }, + { 0x109ef, 1, 0, 300000.000000 }, + { 0x109f0, 1, 0, 400000.000000 }, + { 0x109f1, 1, 0, 500000.000000 }, + { 0x109f2, 1, 0, 600000.000000 }, + { 0x109f3, 1, 0, 700000.000000 }, + { 0x109f4, 1, 0, 800000.000000 }, + { 0x109f5, 1, 0, 900000.000000 }, + { 0x109f6, 1, 0, 0.083333 }, + { 0x109f7, 1, 0, 0.166667 }, + { 0x109f8, 1, 0, 0.250000 }, + { 0x109f9, 1, 0, 0.333333 }, + { 0x109fa, 1, 0, 0.416667 }, + { 0x109fb, 1, 0, 0.500000 }, + { 0x109fc, 1, 0, 0.583333 }, + { 0x109fd, 1, 0, 0.666667 }, + { 0x109fe, 1, 0, 0.750000 }, + { 0x109ff, 1, 0, 0.833333 }, + { 0x10a40, 4, 1, 1.000000 }, + { 0x10a44, 2, 10, 10.000000 }, + { 0x10a46, 2, 900, 100.000000 }, + { 0x10a48, 1, 0, 0.500000 }, + { 0x10a7d, 2, 49, 1.000000 }, + { 0x10a9d, 2, 9, 1.000000 }, + { 0x10a9f, 1, 0, 20.000000 }, + { 0x10aeb, 2, 4, 1.000000 }, + { 0x10aed, 2, 10, 10.000000 }, + { 0x10aef, 1, 0, 100.000000 }, + { 0x10b58, 4, 1, 1.000000 }, + { 0x10b5c, 2, 10, 10.000000 }, + { 0x10b5e, 2, 900, 100.000000 }, + { 0x10b78, 4, 1, 1.000000 }, + { 0x10b7c, 2, 10, 10.000000 }, + { 0x10b7e, 2, 900, 100.000000 }, + { 0x10ba9, 4, 1, 1.000000 }, + { 0x10bad, 2, 10, 10.000000 }, + { 0x10baf, 1, 0, 100.000000 }, + { 0x10cfa, 2, 4, 1.000000 }, + { 0x10cfc, 2, 40, 10.000000 }, + { 0x10cfe, 2, 900, 100.000000 }, + { 0x10d30, 10, 1, 0.000000 }, + { 0x10d40, 10, 1, 0.000000 }, + { 0x10e60, 10, 1, 1.000000 }, + { 0x10e6a, 9, 10, 20.000000 }, + { 0x10e73, 8, 100, 200.000000 }, + { 0x10e7b, 1, 0, 0.500000 }, + { 0x10e7c, 1, 0, 0.250000 }, + { 0x10e7d, 1, 0, 0.333333 }, + { 0x10e7e, 1, 0, 0.666667 }, + { 0x10f1d, 5, 1, 1.000000 }, + { 0x10f22, 3, 10, 10.000000 }, + { 0x10f25, 1, 0, 100.000000 }, + { 0x10f26, 1, 0, 0.500000 }, + { 0x10f51, 2, 9, 1.000000 }, + { 0x10f53, 2, 80, 20.000000 }, + { 0x10fc5, 4, 1, 1.000000 }, + { 0x10fc9, 2, 10, 10.000000 }, + { 0x10fcb, 1, 0, 100.000000 }, + { 0x11052, 10, 1, 1.000000 }, + { 0x1105c, 9, 10, 20.000000 }, + { 0x11065, 1, 0, 1000.000000 }, + { 0x11066, 10, 1, 0.000000 }, + { 0x110f0, 10, 1, 0.000000 }, + { 0x11136, 10, 1, 0.000000 }, + { 0x111d0, 10, 1, 0.000000 }, + { 0x111e1, 10, 1, 1.000000 }, + { 0x111eb, 9, 10, 20.000000 }, + { 0x111f4, 1, 0, 1000.000000 }, + { 0x112f0, 10, 1, 0.000000 }, + { 0x11450, 10, 1, 0.000000 }, + { 0x114d0, 10, 1, 0.000000 }, + { 0x11650, 10, 1, 0.000000 }, + { 0x116c0, 10, 1, 0.000000 }, + { 0x116d0, 10, 1, 0.000000 }, + { 0x116da, 10, 1, 0.000000 }, + { 0x11730, 11, 1, 0.000000 }, + { 0x1173b, 1, 0, 20.000000 }, + { 0x118e0, 11, 1, 0.000000 }, + { 0x118eb, 8, 10, 20.000000 }, + { 0x11950, 10, 1, 0.000000 }, + { 0x11bf0, 10, 1, 0.000000 }, + { 0x11c50, 10, 1, 0.000000 }, + { 0x11c5a, 10, 1, 1.000000 }, + { 0x11c64, 9, 10, 20.000000 }, + { 0x11d50, 10, 1, 0.000000 }, + { 0x11da0, 10, 1, 0.000000 }, + { 0x11f50, 10, 1, 0.000000 }, + { 0x11fc0, 1, 0, 0.003125 }, + { 0x11fc1, 1, 0, 0.006250 }, + { 0x11fc2, 1, 0, 0.012500 }, + { 0x11fc3, 1, 0, 0.015625 }, + { 0x11fc4, 1, 0, 0.025000 }, + { 0x11fc5, 1, 0, 0.031250 }, + { 0x11fc6, 1, 0, 0.037500 }, + { 0x11fc7, 1, 0, 0.046875 }, + { 0x11fc8, 1, 0, 0.050000 }, + { 0x11fc9, 2, 0, 0.062500 }, + { 0x11fcb, 1, 0, 0.100000 }, + { 0x11fcc, 1, 0, 0.125000 }, + { 0x11fcd, 1, 0, 0.150000 }, + { 0x11fce, 1, 0, 0.187500 }, + { 0x11fcf, 1, 0, 0.200000 }, + { 0x11fd0, 1, 0, 0.250000 }, + { 0x11fd1, 2, 0, 0.500000 }, + { 0x11fd3, 1, 0, 0.750000 }, + { 0x11fd4, 1, 0, 0.003125 }, + { 0x12400, 8, 1, 2.000000 }, + { 0x12408, 7, 1, 3.000000 }, + { 0x1240f, 6, 1, 4.000000 }, + { 0x12415, 9, 1, 1.000000 }, + { 0x1241e, 5, 1, 1.000000 }, + { 0x12423, 2, 1, 2.000000 }, + { 0x12425, 7, 1, 3.000000 }, + { 0x1242c, 3, 1, 1.000000 }, + { 0x1242f, 3, 1, 3.000000 }, + { 0x12432, 1, 0, 216000.000000 }, + { 0x12433, 1, 0, 432000.000000 }, + { 0x12434, 3, 1, 1.000000 }, + { 0x12437, 3, 1, 3.000000 }, + { 0x1243a, 2, 0, 3.000000 }, + { 0x1243c, 4, 0, 4.000000 }, + { 0x12440, 2, 1, 6.000000 }, + { 0x12442, 2, 0, 7.000000 }, + { 0x12444, 2, 0, 8.000000 }, + { 0x12446, 4, 0, 9.000000 }, + { 0x1244a, 5, 1, 2.000000 }, + { 0x1244f, 4, 1, 1.000000 }, + { 0x12453, 2, 1, 4.000000 }, + { 0x12455, 1, 0, 5.000000 }, + { 0x12456, 2, 1, 2.000000 }, + { 0x12458, 2, 1, 1.000000 }, + { 0x1245a, 1, 0, 0.333333 }, + { 0x1245b, 1, 0, 0.666667 }, + { 0x1245c, 1, 0, 0.833333 }, + { 0x1245d, 1, 0, 0.333333 }, + { 0x1245e, 1, 0, 0.666667 }, + { 0x1245f, 1, 0, 0.125000 }, + { 0x12460, 1, 0, 0.250000 }, + { 0x12461, 1, 0, 0.166667 }, + { 0x12462, 2, 0, 0.250000 }, + { 0x12464, 1, 0, 0.500000 }, + { 0x12465, 1, 0, 0.333333 }, + { 0x12466, 1, 0, 0.666667 }, + { 0x12467, 2, 10, 40.000000 }, + { 0x12469, 6, 1, 4.000000 }, + { 0x16130, 10, 1, 0.000000 }, + { 0x16a60, 10, 1, 0.000000 }, + { 0x16ac0, 10, 1, 0.000000 }, + { 0x16b50, 10, 1, 0.000000 }, + { 0x16b5b, 2, 90, 10.000000 }, + { 0x16b5d, 1, 0, 10000.000000 }, + { 0x16b5e, 1, 0, 1000000.000000 }, + { 0x16b5f, 1, 0, 100000000.000000 }, + { 0x16b60, 1, 0, 10000000000.000000 }, + { 0x16b61, 1, 0, 1000000000000.000000 }, + { 0x16d70, 10, 1, 0.000000 }, + { 0x16e80, 20, 1, 0.000000 }, + { 0x16e94, 3, 1, 1.000000 }, + { 0x1ccf0, 10, 1, 0.000000 }, + { 0x1d2c0, 20, 1, 0.000000 }, + { 0x1d2e0, 20, 1, 0.000000 }, + { 0x1d360, 10, 1, 1.000000 }, + { 0x1d36a, 8, 10, 20.000000 }, + { 0x1d372, 5, 1, 1.000000 }, + { 0x1d377, 2, 4, 1.000000 }, + { 0x1d7ce, 10, 1, 0.000000 }, + { 0x1d7d8, 10, 1, 0.000000 }, + { 0x1d7e2, 10, 1, 0.000000 }, + { 0x1d7ec, 10, 1, 0.000000 }, + { 0x1d7f6, 10, 1, 0.000000 }, + { 0x1e140, 10, 1, 0.000000 }, + { 0x1e2f0, 10, 1, 0.000000 }, + { 0x1e4f0, 10, 1, 0.000000 }, + { 0x1e5f1, 10, 1, 0.000000 }, + { 0x1e8c7, 9, 1, 1.000000 }, + { 0x1e950, 10, 1, 0.000000 }, + { 0x1ec71, 10, 1, 1.000000 }, + { 0x1ec7b, 9, 10, 20.000000 }, + { 0x1ec84, 9, 100, 200.000000 }, + { 0x1ec8d, 9, 1000, 2000.000000 }, + { 0x1ec96, 9, 10000, 20000.000000 }, + { 0x1ec9f, 1, 0, 200000.000000 }, + { 0x1eca0, 1, 0, 100000.000000 }, + { 0x1eca1, 1, 0, 10000000.000000 }, + { 0x1eca2, 1, 0, 20000000.000000 }, + { 0x1eca3, 9, 1, 1.000000 }, + { 0x1ecad, 1, 0, 0.250000 }, + { 0x1ecae, 1, 0, 0.500000 }, + { 0x1ecaf, 1, 0, 0.750000 }, + { 0x1ecb1, 2, 1, 1.000000 }, + { 0x1ecb3, 1, 0, 10000.000000 }, + { 0x1ecb4, 1, 0, 100000.000000 }, + { 0x1ed01, 10, 1, 1.000000 }, + { 0x1ed0b, 9, 10, 20.000000 }, + { 0x1ed14, 9, 100, 200.000000 }, + { 0x1ed1d, 9, 1000, 2000.000000 }, + { 0x1ed26, 8, 10000, 20000.000000 }, + { 0x1ed2f, 9, 1, 2.000000 }, + { 0x1ed38, 2, 200, 400.000000 }, + { 0x1ed3a, 2, 8000, 2000.000000 }, + { 0x1ed3c, 1, 0, 0.500000 }, + { 0x1ed3d, 1, 0, 0.166667 }, + { 0x1f100, 2, 0, 0.000000 }, + { 0x1f102, 9, 1, 1.000000 }, + { 0x1f10b, 2, 0, 0.000000 }, + { 0x1fbf0, 10, 1, 0.000000 }, + { 0x20001, 1, 0, 7.000000 }, + { 0x20064, 1, 0, 4.000000 }, + { 0x200e2, 1, 0, 4.000000 }, + { 0x20121, 1, 0, 5.000000 }, + { 0x2092a, 1, 0, 1.000000 }, + { 0x20983, 1, 0, 30.000000 }, + { 0x2098c, 1, 0, 40.000000 }, + { 0x2099c, 1, 0, 40.000000 }, + { 0x20aea, 1, 0, 6.000000 }, + { 0x20afd, 1, 0, 3.000000 }, + { 0x20b19, 1, 0, 3.000000 }, + { 0x22390, 1, 0, 2.000000 }, + { 0x22998, 1, 0, 3.000000 }, + { 0x23b1b, 1, 0, 3.000000 }, + { 0x2626d, 1, 0, 4.000000 }, + { 0x2f890, 1, 0, 9.000000 }, +}; + diff --git a/glib/guniprop.c b/glib/guniprop.c index fe0033fd6..274381b8b 100644 --- a/glib/guniprop.c +++ b/glib/guniprop.c @@ -33,6 +33,7 @@ #include "gunicode.h" #include "gunichartables.h" #include "gmirroringtable.h" +#include "gunicodevalues.h" #include "gscripttable.h" #include "gunicodeprivate.h" #ifdef G_OS_WIN32 @@ -712,12 +713,70 @@ g_unichar_xdigit_value (gunichar c) return -1; } +/** + * g_unichar_isnumber: + * @c: a Unicode character + * + * Determines if a character is a number. + * + * This means the character is classified as one of + * [GLib.UnicodeType.decimal_number], + * [GLib.UnicodeType.letter_number] or + * [GLib.UnicodeType.other_number]. + * + * Returns: true if the character is a number + * + * Since: 2.84 + */ +gboolean +g_unichar_isnumber (gunichar c) +{ + return IS (TYPE(c), + OR (G_UNICODE_DECIMAL_NUMBER, + OR (G_UNICODE_LETTER_NUMBER, + OR (G_UNICODE_OTHER_NUMBER, + 0)))); +} + +/** + * g_unichar_number_value: + * @c: a Unicode character + * + * Determines the numeric value of a character that is a number. + * + * See [func@GLib.unichar_isnumber]. + * + * Returns: If @c is a number, its numeric value. Otherwise, -1. + * + * Since: 2.84 + */ +double +g_unichar_number_value (gunichar c) +{ + int lower = 0; + int upper = G_N_ELEMENTS (unicode_values) - 1; + + while (lower <= upper) + { + int mid = (lower + upper) / 2; + + if (c < unicode_values[mid].ch) + upper = mid - 1; + else if (c > unicode_values[mid].ch + (unicode_values[mid].length - 1)) + lower = mid + 1; + else + return unicode_values[mid].value + (c - unicode_values[mid].ch) * unicode_values[mid].increment; + } + + return -1; +} + /** * g_unichar_type: * @c: a Unicode character - * + * * Classifies a Unicode character by type. - * + * * Returns: the type of the character. **/ GUnicodeType diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c index 00b4f5b65..b2bf9e0fd 100644 --- a/glib/tests/unicode.c +++ b/glib/tests/unicode.c @@ -999,6 +999,75 @@ test_xdigit_value (void) g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1); } +static void +test_number (void) +{ + g_assert_false (g_unichar_isnumber (' ')); + g_assert_false (g_unichar_isnumber ('a')); + g_assert_true (g_unichar_isnumber ('0')); + g_assert_true (g_unichar_isnumber ('9')); + g_assert_false (g_unichar_isnumber ('A')); + g_assert_false (g_unichar_isnumber ('-')); + g_assert_false (g_unichar_isnumber ('*')); + g_assert_false (g_unichar_isnumber (0xFF21)); /* Unichar fullwidth 'A' */ + g_assert_false (g_unichar_isnumber (0xFF3A)); /* Unichar fullwidth 'Z' */ + g_assert_false (g_unichar_isnumber (0xFF41)); /* Unichar fullwidth 'a' */ + g_assert_false (g_unichar_isnumber (0xFF5A)); /* Unichar fullwidth 'z' */ + g_assert_true (g_unichar_isnumber (0xFF10)); /* Unichar fullwidth '0' */ + g_assert_true (g_unichar_isnumber (0xFF19)); /* Unichar fullwidth '9' */ + g_assert_false (g_unichar_isnumber (0xFF0A)); /* Unichar fullwidth '*' */ + g_assert_true (g_unichar_isnumber (0x2161)); /* Roman numeral 2 'Ⅱ' */ + g_assert_true (g_unichar_isnumber (0x216D)); /* Roman numeral 100 'Ⅽ' */ + g_assert_true (g_unichar_isnumber (0x2180)); /* Roman numeral 1000 'ↀ' */ + g_assert_true (g_unichar_isnumber (0x00BC)); /* ¼ one quarter */ + g_assert_false (g_unichar_isnumber (0x79ED)); /* 秭 one billion, not a numeric category */ + + /*** Testing TYPE() border cases ***/ + g_assert_false (g_unichar_isnumber (0x3FF5)); + /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ + g_assert_false (g_unichar_isnumber (0xFFEFF)); + /* U+E0001 Language Tag */ + g_assert_false (g_unichar_isnumber (0xE0001)); + g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR)); + g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR + 1)); + g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR_PART1)); + g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR_PART1 + 1)); +} + +static void +test_number_value (void) +{ + g_assert_cmpfloat (g_unichar_number_value (' '), ==, -1); + g_assert_cmpfloat (g_unichar_number_value ('a'), ==, -1); + g_assert_cmpfloat (g_unichar_number_value ('0'), ==, 0); + g_assert_cmpfloat (g_unichar_number_value ('9'), ==, 9); + g_assert_cmpfloat (g_unichar_number_value ('A'), ==, -1); + g_assert_cmpfloat (g_unichar_number_value ('-'), ==, -1); + g_assert_cmpfloat (g_unichar_number_value (0xFF21), ==, -1); /* Unichar 'A' */ + g_assert_cmpfloat (g_unichar_number_value (0xFF3A), ==, -1); /* Unichar 'Z' */ + g_assert_cmpfloat (g_unichar_number_value (0xFF41), ==, -1); /* Unichar 'a' */ + g_assert_cmpfloat (g_unichar_number_value (0xFF5A), ==, -1); /* Unichar 'z' */ + g_assert_cmpfloat (g_unichar_number_value (0xFF10), ==, 0); /* Unichar '0' */ + g_assert_cmpfloat (g_unichar_number_value (0xFF19), ==, 9); /* Unichar '9' */ + g_assert_cmpfloat (g_unichar_number_value (0xFF0A), ==, -1); /* Unichar '*' */ + g_assert_cmpfloat (g_unichar_number_value (0x2161), ==, 2); /* Roman numeral 2 'Ⅱ' */ + g_assert_cmpfloat (g_unichar_number_value (0x216D), ==, 100); /* Roman numeral 100 'Ⅽ' */ + g_assert_cmpfloat (g_unichar_number_value (0x2180), ==, 1000); /* Roman numeral 1000 'ↀ' */ + g_assert_cmpfloat (g_unichar_number_value (0x00BC), ==, 0.25); /* ¼ one quarter */ + g_assert_cmpfloat (g_unichar_number_value (0x79ED), ==, 1000000000); /* 秭 one billion */ + + /*** Testing TYPE() border cases ***/ + g_assert_cmpfloat (g_unichar_number_value (0x3FF5), ==, -1); + /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ + g_assert_cmpfloat (g_unichar_number_value (0xFFEFF), ==, -1); + /* U+E0001 Language Tag */ + g_assert_cmpfloat (g_unichar_number_value (0xE0001), ==, -1); + g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR), ==, -1); + g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR + 1), ==, -1); + g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR_PART1), ==, -1); + g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1); +} + /* Test that g_unichar_ispunct() returns the correct value for various * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ static void @@ -2111,6 +2180,8 @@ main (int argc, g_test_add_func ("/unicode/xdigit-value", test_xdigit_value); g_test_add_func ("/unicode/zero-width", test_zerowidth); g_test_add_func ("/unicode/normalize", test_normalize); + g_test_add_func ("/unicode/number", test_number); + g_test_add_func ("/unicode/number-value", test_number_value); return g_test_run(); }