mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-03-14 19:55:12 +01:00
Bug 654195 - Add g_unichar_compose() and g_unichar_decompose()
This commit is contained in:
parent
8c7de592ff
commit
761a1841ee
@ -2702,6 +2702,8 @@ g_unichar_tolower
|
||||
g_unichar_totitle
|
||||
g_unichar_digit_value
|
||||
g_unichar_xdigit_value
|
||||
g_unichar_compose
|
||||
g_unichar_decompose
|
||||
GUnicodeType
|
||||
g_unichar_type
|
||||
GUnicodeBreakType
|
||||
|
@ -870,6 +870,46 @@ sub print_decomp
|
||||
|
||||
printf OUT "static const gchar decomp_expansion_string[] = %s;\n\n", $decomp_string;
|
||||
|
||||
print OUT "typedef struct\n{\n";
|
||||
print OUT " gunichar ch;\n";
|
||||
print OUT " gunichar a;\n";
|
||||
print OUT " gunichar b;\n";
|
||||
print OUT "} decomposition_step;\n\n";
|
||||
|
||||
print OUT "static const decomposition_step decomp_step_table[] =\n{\n";
|
||||
$first = 1;
|
||||
my @steps = ();
|
||||
for ($count = 0; $count <= $last; ++$count)
|
||||
{
|
||||
if ((defined $decompositions[$count]) && (!$decompose_compat[$count]))
|
||||
{
|
||||
print OUT ",\n"
|
||||
if ! $first;
|
||||
$first = 0;
|
||||
my @list;
|
||||
@list = (split(' ', $decompositions[$count]), "0");
|
||||
printf OUT qq( { 0x%05x, 0x%05x, 0x%05x }), $count, hex($list[0]), hex($list[1]);
|
||||
# don't include 1:1 in the compose table
|
||||
push @steps, [ ($count, hex($list[0]), hex($list[1])) ]
|
||||
if hex($list[1])
|
||||
}
|
||||
}
|
||||
print OUT "\n};\n\n";
|
||||
|
||||
print OUT "static const decomposition_step comp_step_table[] =\n{\n";
|
||||
my @inverted;
|
||||
@inverted = sort { @{$a}[1] <=> @{$b}[1] ||
|
||||
@{$a}[2] <=> @{$b}[2] } @steps;
|
||||
$first = 1;
|
||||
foreach my $i ( 0 .. $#inverted )
|
||||
{
|
||||
print OUT ",\n"
|
||||
if ! $first;
|
||||
$first = 0;
|
||||
printf OUT qq( { 0x%05x, 0x%05x, 0x%05x }), $inverted[$i][0], $inverted[$i][1], $inverted[$i][2];
|
||||
}
|
||||
print OUT "\n};\n\n";
|
||||
|
||||
print OUT "#endif /* DECOMP_H */\n";
|
||||
|
||||
printf STDERR "Generated %d bytes in decomp tables\n", $bytes_out;
|
||||
|
@ -1198,6 +1198,8 @@ g_tree_traverse
|
||||
g_unichar_break_type
|
||||
g_unicode_canonical_ordering
|
||||
g_unichar_combining_class
|
||||
g_unichar_compose
|
||||
g_unichar_decompose
|
||||
g_unichar_isalnum
|
||||
g_unichar_isalpha
|
||||
g_unichar_iscntrl
|
||||
|
@ -280,12 +280,21 @@ GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
|
||||
gint g_unichar_combining_class (gunichar uc) G_GNUC_CONST;
|
||||
|
||||
|
||||
/* Pairwise canonical compose/decompose */
|
||||
gboolean g_unichar_compose (gunichar a,
|
||||
gunichar b,
|
||||
gunichar *ch);
|
||||
gboolean g_unichar_decompose (gunichar ch,
|
||||
gunichar *a,
|
||||
gunichar *b);
|
||||
|
||||
/* Compute canonical ordering of a string in-place. This rearranges
|
||||
decomposed characters in the string according to their combining
|
||||
classes. See the Unicode manual for more information. */
|
||||
void g_unicode_canonical_ordering (gunichar *string,
|
||||
gsize len);
|
||||
|
||||
|
||||
/* Compute canonical decomposition of a character. Returns g_malloc()d
|
||||
string of Unicode characters. RESULT_LEN is set to the resulting
|
||||
length of the string. */
|
||||
|
@ -152,7 +152,7 @@ decompose_hangul (gunichar s,
|
||||
r[1] = V;
|
||||
}
|
||||
|
||||
if (T != TBase)
|
||||
if (T != TBase)
|
||||
{
|
||||
if (r)
|
||||
r[2] = T;
|
||||
@ -530,3 +530,190 @@ g_utf8_normalize (const gchar *str,
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
decompose_hangul_step (gunichar ch,
|
||||
gunichar *a,
|
||||
gunichar *b)
|
||||
{
|
||||
gint SIndex;
|
||||
gunichar L, V, T;
|
||||
|
||||
SIndex = ch - SBase;
|
||||
|
||||
if (SIndex < 0 || SIndex >= SCount)
|
||||
return FALSE; /* not a hangul syllable */
|
||||
|
||||
L = LBase + SIndex / NCount;
|
||||
V = VBase + (SIndex % NCount) / TCount;
|
||||
T = TBase + SIndex % TCount;
|
||||
|
||||
if (T != TBase)
|
||||
{
|
||||
gint LIndex, VIndex;
|
||||
gunichar LV;
|
||||
|
||||
/* split LVT -> LV,T */
|
||||
LIndex = L - LBase;
|
||||
VIndex = V - VBase;
|
||||
LV = SBase + (LIndex * VCount + VIndex) * TCount;
|
||||
|
||||
*a = LV;
|
||||
*b = T;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* split LV -> L,V */
|
||||
*a = L;
|
||||
*b = V;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
compose_hangul_step (gunichar a,
|
||||
gunichar b,
|
||||
gunichar *ch)
|
||||
{
|
||||
gint LIndex, SIndex;
|
||||
|
||||
/* first try L,V -> LV */
|
||||
LIndex = a - LBase;
|
||||
if (0 <= LIndex && LIndex < LCount)
|
||||
{
|
||||
gint VIndex;
|
||||
|
||||
VIndex = b - VBase;
|
||||
if (0 <= VIndex && VIndex < VCount)
|
||||
{
|
||||
*ch = SBase + (LIndex * VCount + VIndex) * TCount;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* next try LV,T -> LVT */
|
||||
SIndex = a - SBase;
|
||||
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0)
|
||||
{
|
||||
gint TIndex;
|
||||
|
||||
TIndex = b - TBase;
|
||||
if (0 < TIndex && TIndex < TCount)
|
||||
{
|
||||
*ch = a + TIndex;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* g_unichar_decompose:
|
||||
* @ch: a Unicode character
|
||||
* @a: return location for the first component of @ch
|
||||
* @b: return location for the second component of @ch
|
||||
*
|
||||
* Performs a single decomposition step of the
|
||||
* Unicode character normalization algorithm.
|
||||
*
|
||||
* This function does not include compatibility
|
||||
* decompositions. It does, however, include algorithmic
|
||||
* Hangul Jamo decomposition, as well as 'singleton'
|
||||
* decompositions which replace a character by a single
|
||||
* other character. In this case, *@b will be set to zero.
|
||||
*
|
||||
* Returns: %TRUE if the character could be decomposed
|
||||
*
|
||||
* Since: 2.30
|
||||
*/
|
||||
gboolean
|
||||
g_unichar_decompose (gunichar ch,
|
||||
gunichar *a,
|
||||
gunichar *b)
|
||||
{
|
||||
gint start = 0;
|
||||
gint end = G_N_ELEMENTS (decomp_step_table);
|
||||
|
||||
if (decompose_hangul_step (ch, a, b))
|
||||
return TRUE;
|
||||
|
||||
if (ch >= decomp_step_table[start].ch &&
|
||||
ch <= decomp_step_table[end - 1].ch)
|
||||
{
|
||||
while (TRUE)
|
||||
{
|
||||
gint half = (start + end) / 2;
|
||||
const decomposition_step *p = &(decomp_step_table[half]);
|
||||
if (ch == p->ch)
|
||||
{
|
||||
*a = p->a;
|
||||
*b = p->b;
|
||||
return TRUE;
|
||||
}
|
||||
else if (half == start)
|
||||
break;
|
||||
else if (ch > p->ch)
|
||||
start = half;
|
||||
else
|
||||
end = half;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* g_unichar_compose:
|
||||
* @a: a Unicode character
|
||||
* @b: a Unicode character
|
||||
* @ch: return location for the composed character
|
||||
*
|
||||
* Performs a single composition step of the
|
||||
* Unicode character normalization algorithm.
|
||||
*
|
||||
* This function does not perform algorithmic composition
|
||||
* for Hangul characters, and does not include compatibility
|
||||
* compositions. It does, however, include 'singleton'
|
||||
* compositions which replace a character by a single
|
||||
* other character. To obtain these, pass zero for @b.
|
||||
*
|
||||
* Returns: %TRUE if the characters could be composed
|
||||
*
|
||||
* Since: 2.30
|
||||
*/
|
||||
gboolean
|
||||
g_unichar_compose (gunichar a,
|
||||
gunichar b,
|
||||
gunichar *ch)
|
||||
{
|
||||
gint start = 0;
|
||||
gint end = G_N_ELEMENTS (comp_step_table);
|
||||
|
||||
if (compose_hangul_step (a, b, ch))
|
||||
return TRUE;
|
||||
|
||||
if (a >= comp_step_table[start].a &&
|
||||
a <= comp_step_table[end - 1].a)
|
||||
{
|
||||
while (TRUE)
|
||||
{
|
||||
gint half = (start + end) / 2;
|
||||
const decomposition_step *p = &(comp_step_table[half]);
|
||||
if (a == p->a && b == p->b)
|
||||
{
|
||||
*ch = p->ch;
|
||||
return TRUE;
|
||||
}
|
||||
else if (half == start)
|
||||
break;
|
||||
else if (a > p->a || (a == p->a && b > p->b))
|
||||
start = half;
|
||||
else
|
||||
end = half;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
3080
glib/gunidecomp.h
3080
glib/gunidecomp.h
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user