2000-06-21 18:11:21 +02:00
|
|
|
/* decomp.c - Character decomposition.
|
|
|
|
*
|
|
|
|
* Copyright (C) 1999, 2000 Tom Tromey
|
|
|
|
* Copyright 2000 Red Hat, Inc.
|
|
|
|
*
|
|
|
|
* The Gnome Library is free software; you can redistribute it and/or
|
2000-07-26 13:02:02 +02:00
|
|
|
* modify it under the terms of the GNU Lesser General Public License as
|
2000-06-21 18:11:21 +02:00
|
|
|
* published by the Free Software Foundation; either version 2 of the
|
|
|
|
* License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* The Gnome Library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
2000-07-26 13:02:02 +02:00
|
|
|
* Lesser General Public License for more details.
|
2000-06-21 18:11:21 +02:00
|
|
|
*
|
2000-07-26 13:02:02 +02:00
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
2000-06-21 18:11:21 +02:00
|
|
|
* License along with the Gnome Library; see the file COPYING.LIB. If not,
|
|
|
|
* write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 02111-1307, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "glib.h"
|
|
|
|
#include "gunidecomp.h"
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
/* We cheat a bit and cast type values to (char *). We detect these
|
|
|
|
using the &0xff trick. */
|
|
|
|
#define CC(Page, Char) \
|
2000-11-29 00:44:21 +01:00
|
|
|
((((GPOINTER_TO_INT(combining_class_table[Page])) & 0xff) \
|
|
|
|
== GPOINTER_TO_INT(combining_class_table[Page])) \
|
|
|
|
? GPOINTER_TO_INT(combining_class_table[Page]) \
|
2000-06-21 18:11:21 +02:00
|
|
|
: (combining_class_table[Page][Char]))
|
|
|
|
|
|
|
|
#define COMBINING_CLASS(Char) \
|
|
|
|
(((Char) > (UNICODE_LAST_CHAR)) ? 0 : CC((Char) >> 8, (Char) & 0xff))
|
|
|
|
|
|
|
|
/* Compute the canonical ordering of a string in-place. */
|
|
|
|
void
|
|
|
|
g_unicode_canonical_ordering (gunichar *string,
|
|
|
|
size_t len)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
int swap = 1;
|
|
|
|
|
|
|
|
while (swap)
|
|
|
|
{
|
|
|
|
int last;
|
|
|
|
swap = 0;
|
|
|
|
last = COMBINING_CLASS (string[0]);
|
|
|
|
for (i = 0; i < len - 1; ++i)
|
|
|
|
{
|
|
|
|
int next = COMBINING_CLASS (string[i + 1]);
|
|
|
|
if (next != 0 && last > next)
|
|
|
|
{
|
|
|
|
size_t j;
|
|
|
|
/* Percolate item leftward through string. */
|
|
|
|
for (j = i; j > 0; --j)
|
|
|
|
{
|
|
|
|
gunichar t;
|
|
|
|
if (COMBINING_CLASS (string[j]) <= next)
|
|
|
|
break;
|
|
|
|
t = string[j + 1];
|
|
|
|
string[j + 1] = string[j];
|
|
|
|
string[j] = t;
|
|
|
|
swap = 1;
|
|
|
|
}
|
|
|
|
/* We're re-entering the loop looking at the old
|
|
|
|
character again. */
|
|
|
|
next = last;
|
|
|
|
}
|
|
|
|
last = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
gunichar *
|
|
|
|
g_unicode_canonical_decomposition (gunichar ch,
|
|
|
|
size_t *result_len)
|
|
|
|
{
|
|
|
|
gunichar *r = NULL;
|
|
|
|
|
|
|
|
if (ch <= 0xffff)
|
|
|
|
{
|
|
|
|
int start = 0;
|
|
|
|
int end = G_N_ELEMENTS (decomp_table);
|
|
|
|
while (start != end)
|
|
|
|
{
|
|
|
|
int half = (start + end) / 2;
|
|
|
|
if (ch == decomp_table[half].ch)
|
|
|
|
{
|
|
|
|
/* Found it. */
|
|
|
|
int i, len;
|
|
|
|
/* We store as a double-nul terminated string. */
|
|
|
|
for (len = 0; (decomp_table[half].expansion[len]
|
|
|
|
|| decomp_table[half].expansion[len + 1]);
|
|
|
|
len += 2)
|
|
|
|
;
|
|
|
|
|
|
|
|
/* We've counted twice as many bytes as there are
|
|
|
|
characters. */
|
|
|
|
*result_len = len / 2;
|
|
|
|
r = malloc (len / 2 * sizeof (gunichar));
|
|
|
|
|
|
|
|
for (i = 0; i < len; i += 2)
|
|
|
|
{
|
|
|
|
r[i / 2] = (decomp_table[half].expansion[i] << 8
|
|
|
|
| decomp_table[half].expansion[i + 1]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (ch > decomp_table[half].ch)
|
|
|
|
start = half;
|
|
|
|
else
|
|
|
|
end = half;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (r == NULL)
|
|
|
|
{
|
|
|
|
/* Not in our table. */
|
|
|
|
r = malloc (sizeof (gunichar));
|
|
|
|
*r = ch;
|
|
|
|
*result_len = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Supposedly following the Unicode 2.1.9 table means that the
|
|
|
|
decompositions come out in canonical order. I haven't tested
|
|
|
|
this, but we rely on it here. */
|
|
|
|
return r;
|
|
|
|
}
|