mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2025-03-14 19:55:12 +01:00
Bug 654195 - Add g_unichar_compose() and g_unichar_decompose()
This commit is contained in:
parent
8c7de592ff
commit
761a1841ee
@ -2702,6 +2702,8 @@ g_unichar_tolower
|
|||||||
g_unichar_totitle
|
g_unichar_totitle
|
||||||
g_unichar_digit_value
|
g_unichar_digit_value
|
||||||
g_unichar_xdigit_value
|
g_unichar_xdigit_value
|
||||||
|
g_unichar_compose
|
||||||
|
g_unichar_decompose
|
||||||
GUnicodeType
|
GUnicodeType
|
||||||
g_unichar_type
|
g_unichar_type
|
||||||
GUnicodeBreakType
|
GUnicodeBreakType
|
||||||
|
@ -870,6 +870,46 @@ sub print_decomp
|
|||||||
|
|
||||||
printf OUT "static const gchar decomp_expansion_string[] = %s;\n\n", $decomp_string;
|
printf OUT "static const gchar decomp_expansion_string[] = %s;\n\n", $decomp_string;
|
||||||
|
|
||||||
|
print OUT "typedef struct\n{\n";
|
||||||
|
print OUT " gunichar ch;\n";
|
||||||
|
print OUT " gunichar a;\n";
|
||||||
|
print OUT " gunichar b;\n";
|
||||||
|
print OUT "} decomposition_step;\n\n";
|
||||||
|
|
||||||
|
print OUT "static const decomposition_step decomp_step_table[] =\n{\n";
|
||||||
|
$first = 1;
|
||||||
|
my @steps = ();
|
||||||
|
for ($count = 0; $count <= $last; ++$count)
|
||||||
|
{
|
||||||
|
if ((defined $decompositions[$count]) && (!$decompose_compat[$count]))
|
||||||
|
{
|
||||||
|
print OUT ",\n"
|
||||||
|
if ! $first;
|
||||||
|
$first = 0;
|
||||||
|
my @list;
|
||||||
|
@list = (split(' ', $decompositions[$count]), "0");
|
||||||
|
printf OUT qq( { 0x%05x, 0x%05x, 0x%05x }), $count, hex($list[0]), hex($list[1]);
|
||||||
|
# don't include 1:1 in the compose table
|
||||||
|
push @steps, [ ($count, hex($list[0]), hex($list[1])) ]
|
||||||
|
if hex($list[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
|
print OUT "static const decomposition_step comp_step_table[] =\n{\n";
|
||||||
|
my @inverted;
|
||||||
|
@inverted = sort { @{$a}[1] <=> @{$b}[1] ||
|
||||||
|
@{$a}[2] <=> @{$b}[2] } @steps;
|
||||||
|
$first = 1;
|
||||||
|
foreach my $i ( 0 .. $#inverted )
|
||||||
|
{
|
||||||
|
print OUT ",\n"
|
||||||
|
if ! $first;
|
||||||
|
$first = 0;
|
||||||
|
printf OUT qq( { 0x%05x, 0x%05x, 0x%05x }), $inverted[$i][0], $inverted[$i][1], $inverted[$i][2];
|
||||||
|
}
|
||||||
|
print OUT "\n};\n\n";
|
||||||
|
|
||||||
print OUT "#endif /* DECOMP_H */\n";
|
print OUT "#endif /* DECOMP_H */\n";
|
||||||
|
|
||||||
printf STDERR "Generated %d bytes in decomp tables\n", $bytes_out;
|
printf STDERR "Generated %d bytes in decomp tables\n", $bytes_out;
|
||||||
|
@ -1198,6 +1198,8 @@ g_tree_traverse
|
|||||||
g_unichar_break_type
|
g_unichar_break_type
|
||||||
g_unicode_canonical_ordering
|
g_unicode_canonical_ordering
|
||||||
g_unichar_combining_class
|
g_unichar_combining_class
|
||||||
|
g_unichar_compose
|
||||||
|
g_unichar_decompose
|
||||||
g_unichar_isalnum
|
g_unichar_isalnum
|
||||||
g_unichar_isalpha
|
g_unichar_isalpha
|
||||||
g_unichar_iscntrl
|
g_unichar_iscntrl
|
||||||
|
@ -280,12 +280,21 @@ GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
|
|||||||
gint g_unichar_combining_class (gunichar uc) G_GNUC_CONST;
|
gint g_unichar_combining_class (gunichar uc) G_GNUC_CONST;
|
||||||
|
|
||||||
|
|
||||||
|
/* Pairwise canonical compose/decompose */
|
||||||
|
gboolean g_unichar_compose (gunichar a,
|
||||||
|
gunichar b,
|
||||||
|
gunichar *ch);
|
||||||
|
gboolean g_unichar_decompose (gunichar ch,
|
||||||
|
gunichar *a,
|
||||||
|
gunichar *b);
|
||||||
|
|
||||||
/* Compute canonical ordering of a string in-place. This rearranges
|
/* Compute canonical ordering of a string in-place. This rearranges
|
||||||
decomposed characters in the string according to their combining
|
decomposed characters in the string according to their combining
|
||||||
classes. See the Unicode manual for more information. */
|
classes. See the Unicode manual for more information. */
|
||||||
void g_unicode_canonical_ordering (gunichar *string,
|
void g_unicode_canonical_ordering (gunichar *string,
|
||||||
gsize len);
|
gsize len);
|
||||||
|
|
||||||
|
|
||||||
/* Compute canonical decomposition of a character. Returns g_malloc()d
|
/* Compute canonical decomposition of a character. Returns g_malloc()d
|
||||||
string of Unicode characters. RESULT_LEN is set to the resulting
|
string of Unicode characters. RESULT_LEN is set to the resulting
|
||||||
length of the string. */
|
length of the string. */
|
||||||
|
@ -152,7 +152,7 @@ decompose_hangul (gunichar s,
|
|||||||
r[1] = V;
|
r[1] = V;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (T != TBase)
|
if (T != TBase)
|
||||||
{
|
{
|
||||||
if (r)
|
if (r)
|
||||||
r[2] = T;
|
r[2] = T;
|
||||||
@ -530,3 +530,190 @@ g_utf8_normalize (const gchar *str,
|
|||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
decompose_hangul_step (gunichar ch,
|
||||||
|
gunichar *a,
|
||||||
|
gunichar *b)
|
||||||
|
{
|
||||||
|
gint SIndex;
|
||||||
|
gunichar L, V, T;
|
||||||
|
|
||||||
|
SIndex = ch - SBase;
|
||||||
|
|
||||||
|
if (SIndex < 0 || SIndex >= SCount)
|
||||||
|
return FALSE; /* not a hangul syllable */
|
||||||
|
|
||||||
|
L = LBase + SIndex / NCount;
|
||||||
|
V = VBase + (SIndex % NCount) / TCount;
|
||||||
|
T = TBase + SIndex % TCount;
|
||||||
|
|
||||||
|
if (T != TBase)
|
||||||
|
{
|
||||||
|
gint LIndex, VIndex;
|
||||||
|
gunichar LV;
|
||||||
|
|
||||||
|
/* split LVT -> LV,T */
|
||||||
|
LIndex = L - LBase;
|
||||||
|
VIndex = V - VBase;
|
||||||
|
LV = SBase + (LIndex * VCount + VIndex) * TCount;
|
||||||
|
|
||||||
|
*a = LV;
|
||||||
|
*b = T;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* split LV -> L,V */
|
||||||
|
*a = L;
|
||||||
|
*b = V;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
compose_hangul_step (gunichar a,
|
||||||
|
gunichar b,
|
||||||
|
gunichar *ch)
|
||||||
|
{
|
||||||
|
gint LIndex, SIndex;
|
||||||
|
|
||||||
|
/* first try L,V -> LV */
|
||||||
|
LIndex = a - LBase;
|
||||||
|
if (0 <= LIndex && LIndex < LCount)
|
||||||
|
{
|
||||||
|
gint VIndex;
|
||||||
|
|
||||||
|
VIndex = b - VBase;
|
||||||
|
if (0 <= VIndex && VIndex < VCount)
|
||||||
|
{
|
||||||
|
*ch = SBase + (LIndex * VCount + VIndex) * TCount;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* next try LV,T -> LVT */
|
||||||
|
SIndex = a - SBase;
|
||||||
|
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0)
|
||||||
|
{
|
||||||
|
gint TIndex;
|
||||||
|
|
||||||
|
TIndex = b - TBase;
|
||||||
|
if (0 < TIndex && TIndex < TCount)
|
||||||
|
{
|
||||||
|
*ch = a + TIndex;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* g_unichar_decompose:
|
||||||
|
* @ch: a Unicode character
|
||||||
|
* @a: return location for the first component of @ch
|
||||||
|
* @b: return location for the second component of @ch
|
||||||
|
*
|
||||||
|
* Performs a single decomposition step of the
|
||||||
|
* Unicode character normalization algorithm.
|
||||||
|
*
|
||||||
|
* This function does not include compatibility
|
||||||
|
* decompositions. It does, however, include algorithmic
|
||||||
|
* Hangul Jamo decomposition, as well as 'singleton'
|
||||||
|
* decompositions which replace a character by a single
|
||||||
|
* other character. In this case, *@b will be set to zero.
|
||||||
|
*
|
||||||
|
* Returns: %TRUE if the character could be decomposed
|
||||||
|
*
|
||||||
|
* Since: 2.30
|
||||||
|
*/
|
||||||
|
gboolean
|
||||||
|
g_unichar_decompose (gunichar ch,
|
||||||
|
gunichar *a,
|
||||||
|
gunichar *b)
|
||||||
|
{
|
||||||
|
gint start = 0;
|
||||||
|
gint end = G_N_ELEMENTS (decomp_step_table);
|
||||||
|
|
||||||
|
if (decompose_hangul_step (ch, a, b))
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
if (ch >= decomp_step_table[start].ch &&
|
||||||
|
ch <= decomp_step_table[end - 1].ch)
|
||||||
|
{
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
gint half = (start + end) / 2;
|
||||||
|
const decomposition_step *p = &(decomp_step_table[half]);
|
||||||
|
if (ch == p->ch)
|
||||||
|
{
|
||||||
|
*a = p->a;
|
||||||
|
*b = p->b;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
else if (half == start)
|
||||||
|
break;
|
||||||
|
else if (ch > p->ch)
|
||||||
|
start = half;
|
||||||
|
else
|
||||||
|
end = half;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* g_unichar_compose:
|
||||||
|
* @a: a Unicode character
|
||||||
|
* @b: a Unicode character
|
||||||
|
* @ch: return location for the composed character
|
||||||
|
*
|
||||||
|
* Performs a single composition step of the
|
||||||
|
* Unicode character normalization algorithm.
|
||||||
|
*
|
||||||
|
* This function does not perform algorithmic composition
|
||||||
|
* for Hangul characters, and does not include compatibility
|
||||||
|
* compositions. It does, however, include 'singleton'
|
||||||
|
* compositions which replace a character by a single
|
||||||
|
* other character. To obtain these, pass zero for @b.
|
||||||
|
*
|
||||||
|
* Returns: %TRUE if the characters could be composed
|
||||||
|
*
|
||||||
|
* Since: 2.30
|
||||||
|
*/
|
||||||
|
gboolean
|
||||||
|
g_unichar_compose (gunichar a,
|
||||||
|
gunichar b,
|
||||||
|
gunichar *ch)
|
||||||
|
{
|
||||||
|
gint start = 0;
|
||||||
|
gint end = G_N_ELEMENTS (comp_step_table);
|
||||||
|
|
||||||
|
if (compose_hangul_step (a, b, ch))
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
if (a >= comp_step_table[start].a &&
|
||||||
|
a <= comp_step_table[end - 1].a)
|
||||||
|
{
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
gint half = (start + end) / 2;
|
||||||
|
const decomposition_step *p = &(comp_step_table[half]);
|
||||||
|
if (a == p->a && b == p->b)
|
||||||
|
{
|
||||||
|
*ch = p->ch;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
else if (half == start)
|
||||||
|
break;
|
||||||
|
else if (a > p->a || (a == p->a && b > p->b))
|
||||||
|
start = half;
|
||||||
|
else
|
||||||
|
end = half;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
3080
glib/gunidecomp.h
3080
glib/gunidecomp.h
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user