mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-01-13 07:56:17 +01:00
0891c64816
Wed Jun 21 12:09:03 2000 Owen Taylor <otaylor@redhat.com> * gunicode.h gutf8.c guniprop.c gunidecomp.[ch] gunichartables.h Makefile.am glib.h: Initial pass at adding unicode support functions. A few things still need to be implemented, a bit of cleanup needs to be done, tests need to be added, and the docs need to be finished, but this should allow replacing most or all use of libunicode.
134 lines
3.4 KiB
C
134 lines
3.4 KiB
C
/* decomp.c - Character decomposition.
|
|
*
|
|
* Copyright (C) 1999, 2000 Tom Tromey
|
|
* Copyright 2000 Red Hat, Inc.
|
|
*
|
|
* The Gnome Library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public License as
|
|
* published by the Free Software Foundation; either version 2 of the
|
|
* License, or (at your option) any later version.
|
|
*
|
|
* The Gnome Library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with the Gnome Library; see the file COPYING.LIB. If not,
|
|
* write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 02111-1307, USA.
|
|
*/
|
|
|
|
#include "glib.h"
|
|
#include "gunidecomp.h"
|
|
|
|
#include <config.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
/* We cheat a bit and cast type values to (char *). We detect these
|
|
using the &0xff trick. */
|
|
#define CC(Page, Char) \
|
|
(((((int) (combining_class_table[Page])) & 0xff) \
|
|
== ((int) combining_class_table[Page])) \
|
|
? ((int) combining_class_table[Page]) \
|
|
: (combining_class_table[Page][Char]))
|
|
|
|
#define COMBINING_CLASS(Char) \
|
|
(((Char) > (UNICODE_LAST_CHAR)) ? 0 : CC((Char) >> 8, (Char) & 0xff))
|
|
|
|
/* Compute the canonical ordering of a string in-place. */
|
|
void
|
|
g_unicode_canonical_ordering (gunichar *string,
|
|
size_t len)
|
|
{
|
|
size_t i;
|
|
int swap = 1;
|
|
|
|
while (swap)
|
|
{
|
|
int last;
|
|
swap = 0;
|
|
last = COMBINING_CLASS (string[0]);
|
|
for (i = 0; i < len - 1; ++i)
|
|
{
|
|
int next = COMBINING_CLASS (string[i + 1]);
|
|
if (next != 0 && last > next)
|
|
{
|
|
size_t j;
|
|
/* Percolate item leftward through string. */
|
|
for (j = i; j > 0; --j)
|
|
{
|
|
gunichar t;
|
|
if (COMBINING_CLASS (string[j]) <= next)
|
|
break;
|
|
t = string[j + 1];
|
|
string[j + 1] = string[j];
|
|
string[j] = t;
|
|
swap = 1;
|
|
}
|
|
/* We're re-entering the loop looking at the old
|
|
character again. */
|
|
next = last;
|
|
}
|
|
last = next;
|
|
}
|
|
}
|
|
}
|
|
|
|
gunichar *
|
|
g_unicode_canonical_decomposition (gunichar ch,
|
|
size_t *result_len)
|
|
{
|
|
gunichar *r = NULL;
|
|
|
|
if (ch <= 0xffff)
|
|
{
|
|
int start = 0;
|
|
int end = G_N_ELEMENTS (decomp_table);
|
|
while (start != end)
|
|
{
|
|
int half = (start + end) / 2;
|
|
if (ch == decomp_table[half].ch)
|
|
{
|
|
/* Found it. */
|
|
int i, len;
|
|
/* We store as a double-nul terminated string. */
|
|
for (len = 0; (decomp_table[half].expansion[len]
|
|
|| decomp_table[half].expansion[len + 1]);
|
|
len += 2)
|
|
;
|
|
|
|
/* We've counted twice as many bytes as there are
|
|
characters. */
|
|
*result_len = len / 2;
|
|
r = malloc (len / 2 * sizeof (gunichar));
|
|
|
|
for (i = 0; i < len; i += 2)
|
|
{
|
|
r[i / 2] = (decomp_table[half].expansion[i] << 8
|
|
| decomp_table[half].expansion[i + 1]);
|
|
}
|
|
break;
|
|
}
|
|
else if (ch > decomp_table[half].ch)
|
|
start = half;
|
|
else
|
|
end = half;
|
|
}
|
|
}
|
|
|
|
if (r == NULL)
|
|
{
|
|
/* Not in our table. */
|
|
r = malloc (sizeof (gunichar));
|
|
*r = ch;
|
|
*result_len = 1;
|
|
}
|
|
|
|
/* Supposedly following the Unicode 2.1.9 table means that the
|
|
decompositions come out in canonical order. I haven't tested
|
|
this, but we rely on it here. */
|
|
return r;
|
|
}
|