Add g_unichar_isnumber and g_unichar_number_value

All characters that have one of the categories Nd, Nl, No get
assigned a numeric value (not necessarily an integer). The
existing apix g_unichar_isdigit and g_unichar_digit_value
don't provide access to this data, so add two new functions.
This commit is contained in:
Matthias Clasen 2024-12-11 22:47:16 -05:00
parent c50836535a
commit 6924a4bd36
6 changed files with 854 additions and 2 deletions

View File

@ -0,0 +1,198 @@
/* gen-unicode-values.c - generate gunicodevalues.h for glib
*
* Author:
* Matthias Clasen
*
* Copyright (C) 2024 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <glib.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
typedef struct {
gunichar ch;
double value;
} NumericValueEntry;
static NumericValueEntry entries[4000];
static gsize num_entries;
static int
compare_entries (const void *p1, const void *p2)
{
const NumericValueEntry *e1 = p1;
const NumericValueEntry *e2 = p2;
if (e1->ch < e2->ch)
return -1;
else if (e1->ch > e2->ch)
return 1;
else
return 0;
}
static void
read_file (FILE *f)
{
gsize l;
static char buf[4000];
l = 0;
while (fgets (buf, sizeof buf, f))
{
gunichar ch, ch1, ch2;
double value;
const char *s = buf;
int k;
l++;
while (*s == ' ')
s++;
if (s[0] == '#' || s[0] == '\0' || s[0] == '\n')
continue;
k = sscanf (s, "%x ; %lf", &ch, &value);
if (k == 2)
{
if (num_entries + 1 > G_N_ELEMENTS (entries))
{
fprintf (stderr, "Table overflow\n");
exit (1);
}
entries[num_entries].ch = ch;
entries[num_entries].value = value;
num_entries++;
continue;
}
k = sscanf (s, "%x..%x ; %lf", &ch1, &ch2, &value);
if (k == 3)
{
if (num_entries + (ch2 + 1 - ch1) > G_N_ELEMENTS (entries))
{
fprintf (stderr, "Table overflow\n");
exit (1);
}
for (ch = ch1; ch <= ch2; ch++)
{
entries[num_entries].ch = ch;
entries[num_entries].value = value;
num_entries++;
}
continue;
}
fprintf (stderr, "Malformed line: %s\n", s);
exit (1);
}
qsort (entries, num_entries, sizeof (NumericValueEntry), compare_entries);
}
static void
read_data (const char *filename)
{
FILE *f;
fprintf (stderr, "Reading '%s'\n", filename);
if (!(f = fopen (filename, "rt")))
{
fprintf (stderr, "error: cannot open '%s' for reading", filename);
exit (1);
}
read_file (f);
fclose (f);
}
static void
gen_unicode_values (void)
{
fprintf (stderr, "Generating 'gunicodevalues.h'\n");
printf ("/* gunicodevalues.h\n"
" * generated by gen-unicode-values\n"
" * from the file DerivedNumericValues.txt\n"
" */\n\n");
printf ("#pragma once\n\n");
/* We collect ranges of consecutive characters whose
* numeric values have a fixed distance.
*/
printf ("typedef struct\n"
"{\n"
" gunichar ch;\n"
" guint16 length;\n"
" guint16 increment;\n"
" double value;\n"
"} GUnicodeValue;\n\n");
printf ("static GUnicodeValue unicode_values[] = {\n");
guint16 increment = 0xffff;
for (gsize i = 0; i < num_entries; i++)
{
if (increment == 0xffff &&
entries[i + 1].ch == entries[i].ch + 1)
{
double delta = entries[i + 1].value - entries[i].value;
if (delta == ceil (delta) && delta >= 0 && delta < 0xffff)
increment = (guint16) delta;
}
gsize k = i;
if (increment != 0xffff)
{
while (entries[k + 1].ch == entries[k].ch + 1 &&
entries[k + 1].value == entries[k].value + increment)
k++;
}
printf (" { %#x, %u, %u, %f },\n",
entries[i].ch,
(guint) (k - i + 1),
increment != 0xffff ? increment : 0,
entries[i].value);
i = k;
increment = 0xffff;
}
printf ("};\n\n");
}
int
main (int argc, const char **argv)
{
if (argc < 2)
{
fprintf (stderr, "usage:\n gen-unicode-values /path/to/DerivedNumericValues.txt\n");
exit (1);
}
read_data (argv[1]);
gen_unicode_values ();
return 0;
}

View File

@ -0,0 +1,5 @@
gen_unicode_values = executable('gen-unicode-values',
'gen-unicode-values.c',
dependencies : [libglib_dep],
install: false,
)

View File

@ -739,6 +739,12 @@ gint g_unichar_digit_value (gunichar c) G_GNUC_CONST;
GLIB_AVAILABLE_IN_ALL
gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST;
GLIB_AVAILABLE_IN_2_84
gboolean g_unichar_isnumber (gunichar c) G_GNUC_CONST;
GLIB_AVAILABLE_IN_2_84
double g_unichar_number_value (gunichar c) G_GNUC_CONST;
/* Return the Unicode character type of a given character. */
GLIB_AVAILABLE_IN_ALL
GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST;

513
glib/gunicodevalues.h Normal file
View File

@ -0,0 +1,513 @@
/* gunicodevalues.h
* generated by gen-unicode-values
* from the file DerivedNumericValues.txt
*/
#pragma once
typedef struct
{
gunichar ch;
guint16 length;
guint16 increment;
double value;
} GUnicodeValue;
static GUnicodeValue unicode_values[] = {
{ 0x30, 10, 1, 0.000000 },
{ 0xb2, 2, 1, 2.000000 },
{ 0xb9, 1, 0, 1.000000 },
{ 0xbc, 1, 0, 0.250000 },
{ 0xbd, 1, 0, 0.500000 },
{ 0xbe, 1, 0, 0.750000 },
{ 0x660, 10, 1, 0.000000 },
{ 0x6f0, 10, 1, 0.000000 },
{ 0x7c0, 10, 1, 0.000000 },
{ 0x966, 10, 1, 0.000000 },
{ 0x9e6, 10, 1, 0.000000 },
{ 0x9f4, 1, 0, 0.062500 },
{ 0x9f5, 1, 0, 0.125000 },
{ 0x9f6, 1, 0, 0.187500 },
{ 0x9f7, 1, 0, 0.250000 },
{ 0x9f8, 1, 0, 0.750000 },
{ 0x9f9, 1, 0, 16.000000 },
{ 0xa66, 10, 1, 0.000000 },
{ 0xae6, 10, 1, 0.000000 },
{ 0xb66, 10, 1, 0.000000 },
{ 0xb72, 1, 0, 0.250000 },
{ 0xb73, 1, 0, 0.500000 },
{ 0xb74, 1, 0, 0.750000 },
{ 0xb75, 1, 0, 0.062500 },
{ 0xb76, 1, 0, 0.125000 },
{ 0xb77, 1, 0, 0.187500 },
{ 0xbe6, 11, 1, 0.000000 },
{ 0xbf1, 2, 900, 100.000000 },
{ 0xc66, 10, 1, 0.000000 },
{ 0xc78, 4, 1, 0.000000 },
{ 0xc7c, 3, 1, 1.000000 },
{ 0xce6, 10, 1, 0.000000 },
{ 0xd58, 1, 0, 0.006250 },
{ 0xd59, 1, 0, 0.025000 },
{ 0xd5a, 1, 0, 0.037500 },
{ 0xd5b, 1, 0, 0.050000 },
{ 0xd5c, 1, 0, 0.100000 },
{ 0xd5d, 1, 0, 0.150000 },
{ 0xd5e, 1, 0, 0.200000 },
{ 0xd66, 11, 1, 0.000000 },
{ 0xd71, 2, 900, 100.000000 },
{ 0xd73, 1, 0, 0.250000 },
{ 0xd74, 1, 0, 0.500000 },
{ 0xd75, 1, 0, 0.750000 },
{ 0xd76, 1, 0, 0.062500 },
{ 0xd77, 1, 0, 0.125000 },
{ 0xd78, 1, 0, 0.187500 },
{ 0xde6, 10, 1, 0.000000 },
{ 0xe50, 10, 1, 0.000000 },
{ 0xed0, 10, 1, 0.000000 },
{ 0xf20, 10, 1, 0.000000 },
{ 0xf2a, 9, 1, 0.500000 },
{ 0xf33, 1, 0, -0.500000 },
{ 0x1040, 10, 1, 0.000000 },
{ 0x1090, 10, 1, 0.000000 },
{ 0x1369, 10, 1, 1.000000 },
{ 0x1373, 9, 10, 20.000000 },
{ 0x137c, 1, 0, 10000.000000 },
{ 0x16ee, 3, 1, 17.000000 },
{ 0x17e0, 10, 1, 0.000000 },
{ 0x17f0, 10, 1, 0.000000 },
{ 0x1810, 10, 1, 0.000000 },
{ 0x1946, 10, 1, 0.000000 },
{ 0x19d0, 10, 1, 0.000000 },
{ 0x19da, 1, 0, 1.000000 },
{ 0x1a80, 10, 1, 0.000000 },
{ 0x1a90, 10, 1, 0.000000 },
{ 0x1b50, 10, 1, 0.000000 },
{ 0x1bb0, 10, 1, 0.000000 },
{ 0x1c40, 10, 1, 0.000000 },
{ 0x1c50, 10, 1, 0.000000 },
{ 0x2070, 1, 0, 0.000000 },
{ 0x2074, 6, 1, 4.000000 },
{ 0x2080, 10, 1, 0.000000 },
{ 0x2150, 1, 0, 0.142857 },
{ 0x2151, 1, 0, 0.111111 },
{ 0x2152, 1, 0, 0.100000 },
{ 0x2153, 1, 0, 0.333333 },
{ 0x2154, 1, 0, 0.666667 },
{ 0x2155, 1, 0, 0.200000 },
{ 0x2156, 1, 0, 0.400000 },
{ 0x2157, 1, 0, 0.600000 },
{ 0x2158, 1, 0, 0.800000 },
{ 0x2159, 1, 0, 0.166667 },
{ 0x215a, 1, 0, 0.833333 },
{ 0x215b, 1, 0, 0.125000 },
{ 0x215c, 1, 0, 0.375000 },
{ 0x215d, 1, 0, 0.625000 },
{ 0x215e, 1, 0, 0.875000 },
{ 0x215f, 2, 0, 1.000000 },
{ 0x2161, 11, 1, 2.000000 },
{ 0x216c, 2, 50, 50.000000 },
{ 0x216e, 2, 500, 500.000000 },
{ 0x2170, 12, 1, 1.000000 },
{ 0x217c, 2, 50, 50.000000 },
{ 0x217e, 2, 500, 500.000000 },
{ 0x2180, 2, 4000, 1000.000000 },
{ 0x2182, 1, 0, 10000.000000 },
{ 0x2185, 2, 44, 6.000000 },
{ 0x2187, 2, 50000, 50000.000000 },
{ 0x2189, 1, 0, 0.000000 },
{ 0x2460, 20, 1, 1.000000 },
{ 0x2474, 20, 1, 1.000000 },
{ 0x2488, 20, 1, 1.000000 },
{ 0x24ea, 2, 11, 0.000000 },
{ 0x24ec, 9, 1, 12.000000 },
{ 0x24f5, 10, 1, 1.000000 },
{ 0x24ff, 1, 0, 0.000000 },
{ 0x2776, 10, 1, 1.000000 },
{ 0x2780, 10, 1, 1.000000 },
{ 0x278a, 10, 1, 1.000000 },
{ 0x2cfd, 1, 0, 0.500000 },
{ 0x3007, 1, 0, 0.000000 },
{ 0x3021, 9, 1, 1.000000 },
{ 0x3038, 3, 10, 10.000000 },
{ 0x3192, 4, 1, 1.000000 },
{ 0x3220, 10, 1, 1.000000 },
{ 0x3248, 8, 10, 10.000000 },
{ 0x3251, 15, 1, 21.000000 },
{ 0x3280, 10, 1, 1.000000 },
{ 0x32b1, 15, 1, 36.000000 },
{ 0x3405, 1, 0, 5.000000 },
{ 0x3483, 1, 0, 2.000000 },
{ 0x382a, 1, 0, 5.000000 },
{ 0x3b4d, 1, 0, 7.000000 },
{ 0x4e00, 1, 0, 1.000000 },
{ 0x4e03, 1, 0, 7.000000 },
{ 0x4e07, 1, 0, 10000.000000 },
{ 0x4e09, 1, 0, 3.000000 },
{ 0x4e24, 1, 0, 2.000000 },
{ 0x4e5d, 1, 0, 9.000000 },
{ 0x4e8c, 1, 0, 2.000000 },
{ 0x4e94, 1, 0, 5.000000 },
{ 0x4e96, 1, 0, 4.000000 },
{ 0x4eac, 1, 0, 10000000000000000.000000 },
{ 0x4ebf, 1, 0, 100000000.000000 },
{ 0x4ec0, 1, 0, 10.000000 },
{ 0x4edf, 1, 0, 1000.000000 },
{ 0x4ee8, 1, 0, 3.000000 },
{ 0x4f0d, 1, 0, 5.000000 },
{ 0x4f70, 1, 0, 100.000000 },
{ 0x4fe9, 1, 0, 2.000000 },
{ 0x5006, 1, 0, 2.000000 },
{ 0x5104, 1, 0, 100000000.000000 },
{ 0x5146, 1, 0, 1000000.000000 },
{ 0x5169, 1, 0, 2.000000 },
{ 0x516b, 1, 0, 8.000000 },
{ 0x516d, 1, 0, 6.000000 },
{ 0x5341, 1, 0, 10.000000 },
{ 0x5343, 1, 0, 1000.000000 },
{ 0x5344, 2, 10, 20.000000 },
{ 0x534c, 1, 0, 40.000000 },
{ 0x53c1, 4, 0, 3.000000 },
{ 0x56db, 1, 0, 4.000000 },
{ 0x58f1, 1, 0, 1.000000 },
{ 0x58f9, 1, 0, 1.000000 },
{ 0x5e7a, 1, 0, 1.000000 },
{ 0x5efe, 2, 11, 9.000000 },
{ 0x5f0c, 3, 1, 1.000000 },
{ 0x5f10, 1, 0, 2.000000 },
{ 0x62d0, 1, 0, 7.000000 },
{ 0x62fe, 1, 0, 10.000000 },
{ 0x634c, 1, 0, 8.000000 },
{ 0x67d2, 1, 0, 7.000000 },
{ 0x6d1e, 1, 0, 0.000000 },
{ 0x6f06, 1, 0, 7.000000 },
{ 0x7396, 1, 0, 9.000000 },
{ 0x767e, 1, 0, 100.000000 },
{ 0x7695, 1, 0, 200.000000 },
{ 0x79ed, 1, 0, 1000000000.000000 },
{ 0x8086, 1, 0, 4.000000 },
{ 0x842c, 1, 0, 10000.000000 },
{ 0x8cae, 1, 0, 2.000000 },
{ 0x8cb3, 1, 0, 2.000000 },
{ 0x8d30, 1, 0, 2.000000 },
{ 0x920e, 1, 0, 9.000000 },
{ 0x94a9, 1, 0, 9.000000 },
{ 0x9621, 1, 0, 1000.000000 },
{ 0x9646, 1, 0, 6.000000 },
{ 0x964c, 1, 0, 100.000000 },
{ 0x9678, 1, 0, 6.000000 },
{ 0x96f6, 1, 0, 0.000000 },
{ 0xa620, 10, 1, 0.000000 },
{ 0xa6e6, 9, 1, 1.000000 },
{ 0xa6ef, 1, 0, 0.000000 },
{ 0xa830, 1, 0, 0.250000 },
{ 0xa831, 1, 0, 0.500000 },
{ 0xa832, 1, 0, 0.750000 },
{ 0xa833, 1, 0, 0.062500 },
{ 0xa834, 1, 0, 0.125000 },
{ 0xa835, 1, 0, 0.187500 },
{ 0xa8d0, 10, 1, 0.000000 },
{ 0xa900, 10, 1, 0.000000 },
{ 0xa9d0, 10, 1, 0.000000 },
{ 0xa9f0, 10, 1, 0.000000 },
{ 0xaa50, 10, 1, 0.000000 },
{ 0xabf0, 10, 1, 0.000000 },
{ 0xf96b, 1, 0, 3.000000 },
{ 0xf973, 1, 0, 10.000000 },
{ 0xf978, 1, 0, 2.000000 },
{ 0xf9b2, 1, 0, 0.000000 },
{ 0xf9d1, 1, 0, 6.000000 },
{ 0xf9d3, 1, 0, 6.000000 },
{ 0xf9fd, 1, 0, 10.000000 },
{ 0xff10, 10, 1, 0.000000 },
{ 0x10107, 10, 1, 1.000000 },
{ 0x10111, 9, 10, 20.000000 },
{ 0x1011a, 9, 100, 200.000000 },
{ 0x10123, 9, 1000, 2000.000000 },
{ 0x1012c, 8, 10000, 20000.000000 },
{ 0x10140, 1, 0, 0.250000 },
{ 0x10141, 1, 0, 0.500000 },
{ 0x10142, 2, 4, 1.000000 },
{ 0x10144, 2, 450, 50.000000 },
{ 0x10146, 2, 45000, 5000.000000 },
{ 0x10148, 2, 5, 5.000000 },
{ 0x1014a, 2, 50, 50.000000 },
{ 0x1014c, 2, 500, 500.000000 },
{ 0x1014e, 1, 0, 5000.000000 },
{ 0x1014f, 2, 5, 5.000000 },
{ 0x10151, 2, 50, 50.000000 },
{ 0x10153, 2, 500, 500.000000 },
{ 0x10155, 2, 40000, 10000.000000 },
{ 0x10157, 1, 0, 10.000000 },
{ 0x10158, 3, 0, 1.000000 },
{ 0x1015b, 4, 0, 2.000000 },
{ 0x1015f, 2, 5, 5.000000 },
{ 0x10161, 4, 0, 10.000000 },
{ 0x10165, 2, 20, 30.000000 },
{ 0x10167, 3, 0, 50.000000 },
{ 0x1016a, 3, 200, 100.000000 },
{ 0x1016d, 4, 0, 500.000000 },
{ 0x10171, 2, 4000, 1000.000000 },
{ 0x10173, 2, 45, 5.000000 },
{ 0x10175, 2, 0, 0.500000 },
{ 0x10177, 1, 0, 0.666667 },
{ 0x10178, 1, 0, 0.750000 },
{ 0x1018a, 1, 0, 0.000000 },
{ 0x1018b, 1, 0, 0.250000 },
{ 0x102e1, 10, 1, 1.000000 },
{ 0x102eb, 9, 10, 20.000000 },
{ 0x102f4, 8, 100, 200.000000 },
{ 0x10320, 2, 4, 1.000000 },
{ 0x10322, 2, 40, 10.000000 },
{ 0x10341, 1, 0, 90.000000 },
{ 0x1034a, 1, 0, 900.000000 },
{ 0x103d1, 2, 1, 1.000000 },
{ 0x103d3, 2, 10, 10.000000 },
{ 0x103d5, 1, 0, 100.000000 },
{ 0x104a0, 10, 1, 0.000000 },
{ 0x10858, 3, 1, 1.000000 },
{ 0x1085b, 2, 10, 10.000000 },
{ 0x1085d, 2, 900, 100.000000 },
{ 0x1085f, 1, 0, 10000.000000 },
{ 0x10879, 5, 1, 1.000000 },
{ 0x1087e, 2, 10, 10.000000 },
{ 0x108a7, 4, 1, 1.000000 },
{ 0x108ab, 2, 1, 4.000000 },
{ 0x108ad, 2, 10, 10.000000 },
{ 0x108af, 1, 0, 100.000000 },
{ 0x108fb, 2, 4, 1.000000 },
{ 0x108fd, 2, 10, 10.000000 },
{ 0x108ff, 1, 0, 100.000000 },
{ 0x10916, 2, 9, 1.000000 },
{ 0x10918, 2, 80, 20.000000 },
{ 0x1091a, 2, 1, 2.000000 },
{ 0x109bc, 1, 0, 0.916667 },
{ 0x109bd, 1, 0, 0.500000 },
{ 0x109c0, 10, 1, 1.000000 },
{ 0x109ca, 6, 10, 20.000000 },
{ 0x109d2, 10, 100, 100.000000 },
{ 0x109dc, 9, 1000, 2000.000000 },
{ 0x109e5, 9, 10000, 20000.000000 },
{ 0x109ee, 1, 0, 200000.000000 },
{ 0x109ef, 1, 0, 300000.000000 },
{ 0x109f0, 1, 0, 400000.000000 },
{ 0x109f1, 1, 0, 500000.000000 },
{ 0x109f2, 1, 0, 600000.000000 },
{ 0x109f3, 1, 0, 700000.000000 },
{ 0x109f4, 1, 0, 800000.000000 },
{ 0x109f5, 1, 0, 900000.000000 },
{ 0x109f6, 1, 0, 0.083333 },
{ 0x109f7, 1, 0, 0.166667 },
{ 0x109f8, 1, 0, 0.250000 },
{ 0x109f9, 1, 0, 0.333333 },
{ 0x109fa, 1, 0, 0.416667 },
{ 0x109fb, 1, 0, 0.500000 },
{ 0x109fc, 1, 0, 0.583333 },
{ 0x109fd, 1, 0, 0.666667 },
{ 0x109fe, 1, 0, 0.750000 },
{ 0x109ff, 1, 0, 0.833333 },
{ 0x10a40, 4, 1, 1.000000 },
{ 0x10a44, 2, 10, 10.000000 },
{ 0x10a46, 2, 900, 100.000000 },
{ 0x10a48, 1, 0, 0.500000 },
{ 0x10a7d, 2, 49, 1.000000 },
{ 0x10a9d, 2, 9, 1.000000 },
{ 0x10a9f, 1, 0, 20.000000 },
{ 0x10aeb, 2, 4, 1.000000 },
{ 0x10aed, 2, 10, 10.000000 },
{ 0x10aef, 1, 0, 100.000000 },
{ 0x10b58, 4, 1, 1.000000 },
{ 0x10b5c, 2, 10, 10.000000 },
{ 0x10b5e, 2, 900, 100.000000 },
{ 0x10b78, 4, 1, 1.000000 },
{ 0x10b7c, 2, 10, 10.000000 },
{ 0x10b7e, 2, 900, 100.000000 },
{ 0x10ba9, 4, 1, 1.000000 },
{ 0x10bad, 2, 10, 10.000000 },
{ 0x10baf, 1, 0, 100.000000 },
{ 0x10cfa, 2, 4, 1.000000 },
{ 0x10cfc, 2, 40, 10.000000 },
{ 0x10cfe, 2, 900, 100.000000 },
{ 0x10d30, 10, 1, 0.000000 },
{ 0x10d40, 10, 1, 0.000000 },
{ 0x10e60, 10, 1, 1.000000 },
{ 0x10e6a, 9, 10, 20.000000 },
{ 0x10e73, 8, 100, 200.000000 },
{ 0x10e7b, 1, 0, 0.500000 },
{ 0x10e7c, 1, 0, 0.250000 },
{ 0x10e7d, 1, 0, 0.333333 },
{ 0x10e7e, 1, 0, 0.666667 },
{ 0x10f1d, 5, 1, 1.000000 },
{ 0x10f22, 3, 10, 10.000000 },
{ 0x10f25, 1, 0, 100.000000 },
{ 0x10f26, 1, 0, 0.500000 },
{ 0x10f51, 2, 9, 1.000000 },
{ 0x10f53, 2, 80, 20.000000 },
{ 0x10fc5, 4, 1, 1.000000 },
{ 0x10fc9, 2, 10, 10.000000 },
{ 0x10fcb, 1, 0, 100.000000 },
{ 0x11052, 10, 1, 1.000000 },
{ 0x1105c, 9, 10, 20.000000 },
{ 0x11065, 1, 0, 1000.000000 },
{ 0x11066, 10, 1, 0.000000 },
{ 0x110f0, 10, 1, 0.000000 },
{ 0x11136, 10, 1, 0.000000 },
{ 0x111d0, 10, 1, 0.000000 },
{ 0x111e1, 10, 1, 1.000000 },
{ 0x111eb, 9, 10, 20.000000 },
{ 0x111f4, 1, 0, 1000.000000 },
{ 0x112f0, 10, 1, 0.000000 },
{ 0x11450, 10, 1, 0.000000 },
{ 0x114d0, 10, 1, 0.000000 },
{ 0x11650, 10, 1, 0.000000 },
{ 0x116c0, 10, 1, 0.000000 },
{ 0x116d0, 10, 1, 0.000000 },
{ 0x116da, 10, 1, 0.000000 },
{ 0x11730, 11, 1, 0.000000 },
{ 0x1173b, 1, 0, 20.000000 },
{ 0x118e0, 11, 1, 0.000000 },
{ 0x118eb, 8, 10, 20.000000 },
{ 0x11950, 10, 1, 0.000000 },
{ 0x11bf0, 10, 1, 0.000000 },
{ 0x11c50, 10, 1, 0.000000 },
{ 0x11c5a, 10, 1, 1.000000 },
{ 0x11c64, 9, 10, 20.000000 },
{ 0x11d50, 10, 1, 0.000000 },
{ 0x11da0, 10, 1, 0.000000 },
{ 0x11f50, 10, 1, 0.000000 },
{ 0x11fc0, 1, 0, 0.003125 },
{ 0x11fc1, 1, 0, 0.006250 },
{ 0x11fc2, 1, 0, 0.012500 },
{ 0x11fc3, 1, 0, 0.015625 },
{ 0x11fc4, 1, 0, 0.025000 },
{ 0x11fc5, 1, 0, 0.031250 },
{ 0x11fc6, 1, 0, 0.037500 },
{ 0x11fc7, 1, 0, 0.046875 },
{ 0x11fc8, 1, 0, 0.050000 },
{ 0x11fc9, 2, 0, 0.062500 },
{ 0x11fcb, 1, 0, 0.100000 },
{ 0x11fcc, 1, 0, 0.125000 },
{ 0x11fcd, 1, 0, 0.150000 },
{ 0x11fce, 1, 0, 0.187500 },
{ 0x11fcf, 1, 0, 0.200000 },
{ 0x11fd0, 1, 0, 0.250000 },
{ 0x11fd1, 2, 0, 0.500000 },
{ 0x11fd3, 1, 0, 0.750000 },
{ 0x11fd4, 1, 0, 0.003125 },
{ 0x12400, 8, 1, 2.000000 },
{ 0x12408, 7, 1, 3.000000 },
{ 0x1240f, 6, 1, 4.000000 },
{ 0x12415, 9, 1, 1.000000 },
{ 0x1241e, 5, 1, 1.000000 },
{ 0x12423, 2, 1, 2.000000 },
{ 0x12425, 7, 1, 3.000000 },
{ 0x1242c, 3, 1, 1.000000 },
{ 0x1242f, 3, 1, 3.000000 },
{ 0x12432, 1, 0, 216000.000000 },
{ 0x12433, 1, 0, 432000.000000 },
{ 0x12434, 3, 1, 1.000000 },
{ 0x12437, 3, 1, 3.000000 },
{ 0x1243a, 2, 0, 3.000000 },
{ 0x1243c, 4, 0, 4.000000 },
{ 0x12440, 2, 1, 6.000000 },
{ 0x12442, 2, 0, 7.000000 },
{ 0x12444, 2, 0, 8.000000 },
{ 0x12446, 4, 0, 9.000000 },
{ 0x1244a, 5, 1, 2.000000 },
{ 0x1244f, 4, 1, 1.000000 },
{ 0x12453, 2, 1, 4.000000 },
{ 0x12455, 1, 0, 5.000000 },
{ 0x12456, 2, 1, 2.000000 },
{ 0x12458, 2, 1, 1.000000 },
{ 0x1245a, 1, 0, 0.333333 },
{ 0x1245b, 1, 0, 0.666667 },
{ 0x1245c, 1, 0, 0.833333 },
{ 0x1245d, 1, 0, 0.333333 },
{ 0x1245e, 1, 0, 0.666667 },
{ 0x1245f, 1, 0, 0.125000 },
{ 0x12460, 1, 0, 0.250000 },
{ 0x12461, 1, 0, 0.166667 },
{ 0x12462, 2, 0, 0.250000 },
{ 0x12464, 1, 0, 0.500000 },
{ 0x12465, 1, 0, 0.333333 },
{ 0x12466, 1, 0, 0.666667 },
{ 0x12467, 2, 10, 40.000000 },
{ 0x12469, 6, 1, 4.000000 },
{ 0x16130, 10, 1, 0.000000 },
{ 0x16a60, 10, 1, 0.000000 },
{ 0x16ac0, 10, 1, 0.000000 },
{ 0x16b50, 10, 1, 0.000000 },
{ 0x16b5b, 2, 90, 10.000000 },
{ 0x16b5d, 1, 0, 10000.000000 },
{ 0x16b5e, 1, 0, 1000000.000000 },
{ 0x16b5f, 1, 0, 100000000.000000 },
{ 0x16b60, 1, 0, 10000000000.000000 },
{ 0x16b61, 1, 0, 1000000000000.000000 },
{ 0x16d70, 10, 1, 0.000000 },
{ 0x16e80, 20, 1, 0.000000 },
{ 0x16e94, 3, 1, 1.000000 },
{ 0x1ccf0, 10, 1, 0.000000 },
{ 0x1d2c0, 20, 1, 0.000000 },
{ 0x1d2e0, 20, 1, 0.000000 },
{ 0x1d360, 10, 1, 1.000000 },
{ 0x1d36a, 8, 10, 20.000000 },
{ 0x1d372, 5, 1, 1.000000 },
{ 0x1d377, 2, 4, 1.000000 },
{ 0x1d7ce, 10, 1, 0.000000 },
{ 0x1d7d8, 10, 1, 0.000000 },
{ 0x1d7e2, 10, 1, 0.000000 },
{ 0x1d7ec, 10, 1, 0.000000 },
{ 0x1d7f6, 10, 1, 0.000000 },
{ 0x1e140, 10, 1, 0.000000 },
{ 0x1e2f0, 10, 1, 0.000000 },
{ 0x1e4f0, 10, 1, 0.000000 },
{ 0x1e5f1, 10, 1, 0.000000 },
{ 0x1e8c7, 9, 1, 1.000000 },
{ 0x1e950, 10, 1, 0.000000 },
{ 0x1ec71, 10, 1, 1.000000 },
{ 0x1ec7b, 9, 10, 20.000000 },
{ 0x1ec84, 9, 100, 200.000000 },
{ 0x1ec8d, 9, 1000, 2000.000000 },
{ 0x1ec96, 9, 10000, 20000.000000 },
{ 0x1ec9f, 1, 0, 200000.000000 },
{ 0x1eca0, 1, 0, 100000.000000 },
{ 0x1eca1, 1, 0, 10000000.000000 },
{ 0x1eca2, 1, 0, 20000000.000000 },
{ 0x1eca3, 9, 1, 1.000000 },
{ 0x1ecad, 1, 0, 0.250000 },
{ 0x1ecae, 1, 0, 0.500000 },
{ 0x1ecaf, 1, 0, 0.750000 },
{ 0x1ecb1, 2, 1, 1.000000 },
{ 0x1ecb3, 1, 0, 10000.000000 },
{ 0x1ecb4, 1, 0, 100000.000000 },
{ 0x1ed01, 10, 1, 1.000000 },
{ 0x1ed0b, 9, 10, 20.000000 },
{ 0x1ed14, 9, 100, 200.000000 },
{ 0x1ed1d, 9, 1000, 2000.000000 },
{ 0x1ed26, 8, 10000, 20000.000000 },
{ 0x1ed2f, 9, 1, 2.000000 },
{ 0x1ed38, 2, 200, 400.000000 },
{ 0x1ed3a, 2, 8000, 2000.000000 },
{ 0x1ed3c, 1, 0, 0.500000 },
{ 0x1ed3d, 1, 0, 0.166667 },
{ 0x1f100, 2, 0, 0.000000 },
{ 0x1f102, 9, 1, 1.000000 },
{ 0x1f10b, 2, 0, 0.000000 },
{ 0x1fbf0, 10, 1, 0.000000 },
{ 0x20001, 1, 0, 7.000000 },
{ 0x20064, 1, 0, 4.000000 },
{ 0x200e2, 1, 0, 4.000000 },
{ 0x20121, 1, 0, 5.000000 },
{ 0x2092a, 1, 0, 1.000000 },
{ 0x20983, 1, 0, 30.000000 },
{ 0x2098c, 1, 0, 40.000000 },
{ 0x2099c, 1, 0, 40.000000 },
{ 0x20aea, 1, 0, 6.000000 },
{ 0x20afd, 1, 0, 3.000000 },
{ 0x20b19, 1, 0, 3.000000 },
{ 0x22390, 1, 0, 2.000000 },
{ 0x22998, 1, 0, 3.000000 },
{ 0x23b1b, 1, 0, 3.000000 },
{ 0x2626d, 1, 0, 4.000000 },
{ 0x2f890, 1, 0, 9.000000 },
};

View File

@ -33,6 +33,7 @@
#include "gunicode.h"
#include "gunichartables.h"
#include "gmirroringtable.h"
#include "gunicodevalues.h"
#include "gscripttable.h"
#include "gunicodeprivate.h"
#ifdef G_OS_WIN32
@ -712,12 +713,70 @@ g_unichar_xdigit_value (gunichar c)
return -1;
}
/**
* g_unichar_isnumber:
* @c: a Unicode character
*
* Determines if a character is a number.
*
* This means the character is classified as one of
* [GLib.UnicodeType.decimal_number],
* [GLib.UnicodeType.letter_number] or
* [GLib.UnicodeType.other_number].
*
* Returns: true if the character is a number
*
* Since: 2.84
*/
gboolean
g_unichar_isnumber (gunichar c)
{
return IS (TYPE(c),
OR (G_UNICODE_DECIMAL_NUMBER,
OR (G_UNICODE_LETTER_NUMBER,
OR (G_UNICODE_OTHER_NUMBER,
0))));
}
/**
* g_unichar_number_value:
* @c: a Unicode character
*
* Determines the numeric value of a character that is a number.
*
* See [func@GLib.unichar_isnumber].
*
* Returns: If @c is a number, its numeric value. Otherwise, -1.
*
* Since: 2.84
*/
double
g_unichar_number_value (gunichar c)
{
int lower = 0;
int upper = G_N_ELEMENTS (unicode_values) - 1;
while (lower <= upper)
{
int mid = (lower + upper) / 2;
if (c < unicode_values[mid].ch)
upper = mid - 1;
else if (c > unicode_values[mid].ch + (unicode_values[mid].length - 1))
lower = mid + 1;
else
return unicode_values[mid].value + (c - unicode_values[mid].ch) * unicode_values[mid].increment;
}
return -1;
}
/**
* g_unichar_type:
* @c: a Unicode character
*
*
* Classifies a Unicode character by type.
*
*
* Returns: the type of the character.
**/
GUnicodeType

View File

@ -999,6 +999,75 @@ test_xdigit_value (void)
g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
}
static void
test_number (void)
{
g_assert_false (g_unichar_isnumber (' '));
g_assert_false (g_unichar_isnumber ('a'));
g_assert_true (g_unichar_isnumber ('0'));
g_assert_true (g_unichar_isnumber ('9'));
g_assert_false (g_unichar_isnumber ('A'));
g_assert_false (g_unichar_isnumber ('-'));
g_assert_false (g_unichar_isnumber ('*'));
g_assert_false (g_unichar_isnumber (0xFF21)); /* Unichar fullwidth 'A' */
g_assert_false (g_unichar_isnumber (0xFF3A)); /* Unichar fullwidth 'Z' */
g_assert_false (g_unichar_isnumber (0xFF41)); /* Unichar fullwidth 'a' */
g_assert_false (g_unichar_isnumber (0xFF5A)); /* Unichar fullwidth 'z' */
g_assert_true (g_unichar_isnumber (0xFF10)); /* Unichar fullwidth '0' */
g_assert_true (g_unichar_isnumber (0xFF19)); /* Unichar fullwidth '9' */
g_assert_false (g_unichar_isnumber (0xFF0A)); /* Unichar fullwidth '*' */
g_assert_true (g_unichar_isnumber (0x2161)); /* Roman numeral 2 'Ⅱ' */
g_assert_true (g_unichar_isnumber (0x216D)); /* Roman numeral 100 '' */
g_assert_true (g_unichar_isnumber (0x2180)); /* Roman numeral 1000 'ↀ' */
g_assert_true (g_unichar_isnumber (0x00BC)); /* ¼ one quarter */
g_assert_false (g_unichar_isnumber (0x79ED)); /* 秭 one billion, not a numeric category */
/*** Testing TYPE() border cases ***/
g_assert_false (g_unichar_isnumber (0x3FF5));
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_false (g_unichar_isnumber (0xFFEFF));
/* U+E0001 Language Tag */
g_assert_false (g_unichar_isnumber (0xE0001));
g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR));
g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR + 1));
g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR_PART1));
g_assert_false (g_unichar_isnumber (G_UNICODE_LAST_CHAR_PART1 + 1));
}
static void
test_number_value (void)
{
g_assert_cmpfloat (g_unichar_number_value (' '), ==, -1);
g_assert_cmpfloat (g_unichar_number_value ('a'), ==, -1);
g_assert_cmpfloat (g_unichar_number_value ('0'), ==, 0);
g_assert_cmpfloat (g_unichar_number_value ('9'), ==, 9);
g_assert_cmpfloat (g_unichar_number_value ('A'), ==, -1);
g_assert_cmpfloat (g_unichar_number_value ('-'), ==, -1);
g_assert_cmpfloat (g_unichar_number_value (0xFF21), ==, -1); /* Unichar 'A' */
g_assert_cmpfloat (g_unichar_number_value (0xFF3A), ==, -1); /* Unichar 'Z' */
g_assert_cmpfloat (g_unichar_number_value (0xFF41), ==, -1); /* Unichar 'a' */
g_assert_cmpfloat (g_unichar_number_value (0xFF5A), ==, -1); /* Unichar 'z' */
g_assert_cmpfloat (g_unichar_number_value (0xFF10), ==, 0); /* Unichar '0' */
g_assert_cmpfloat (g_unichar_number_value (0xFF19), ==, 9); /* Unichar '9' */
g_assert_cmpfloat (g_unichar_number_value (0xFF0A), ==, -1); /* Unichar '*' */
g_assert_cmpfloat (g_unichar_number_value (0x2161), ==, 2); /* Roman numeral 2 'Ⅱ' */
g_assert_cmpfloat (g_unichar_number_value (0x216D), ==, 100); /* Roman numeral 100 '' */
g_assert_cmpfloat (g_unichar_number_value (0x2180), ==, 1000); /* Roman numeral 1000 'ↀ' */
g_assert_cmpfloat (g_unichar_number_value (0x00BC), ==, 0.25); /* ¼ one quarter */
g_assert_cmpfloat (g_unichar_number_value (0x79ED), ==, 1000000000); /* 秭 one billion */
/*** Testing TYPE() border cases ***/
g_assert_cmpfloat (g_unichar_number_value (0x3FF5), ==, -1);
/* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
g_assert_cmpfloat (g_unichar_number_value (0xFFEFF), ==, -1);
/* U+E0001 Language Tag */
g_assert_cmpfloat (g_unichar_number_value (0xE0001), ==, -1);
g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR), ==, -1);
g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
g_assert_cmpfloat (g_unichar_number_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
}
/* Test that g_unichar_ispunct() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
@ -2111,6 +2180,8 @@ main (int argc,
g_test_add_func ("/unicode/xdigit-value", test_xdigit_value);
g_test_add_func ("/unicode/zero-width", test_zerowidth);
g_test_add_func ("/unicode/normalize", test_normalize);
g_test_add_func ("/unicode/number", test_number);
g_test_add_func ("/unicode/number-value", test_number_value);
return g_test_run();
}