unicode: Allow noncharacters

Implement unicode corrigendum #9.

https://bugzilla.gnome.org/show_bug.cgi?id=694669
This commit is contained in:
Christian Persch 2013-02-25 14:48:14 +01:00
parent 06a59f889a
commit f91ef4ef15
2 changed files with 4 additions and 12 deletions

View File

@ -104,22 +104,14 @@
* a point above 0x0010ffff, since UTF-16 couldn't represent it.
*
* The second check covers surrogate pairs (category Cs).
*
* The last two checks cover "Noncharacter": defined as:
* "A code point that is permanently reserved for
* internal use, and that should never be interchanged. In
* Unicode 3.1, these consist of the values U+nFFFE and U+nFFFF
* (where n is from 0 to 10_16) and the values U+FDD0..U+FDEF."
*
* @param Char the character
*/
#define UNICODE_VALID(Char) \
((Char) < 0x110000 && \
(((Char) & 0xFFFFF800) != 0xD800) && \
((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
((Char) & 0xFFFE) != 0xFFFE)
(((Char) & 0xFFFFF800) != 0xD800))
static const gchar utf8_skip_data[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

View File

@ -33,7 +33,7 @@ test_unichar_validate (void)
g_assert (g_unichar_validate ('j'));
g_assert (g_unichar_validate (8356));
g_assert (g_unichar_validate (8356));
g_assert (!g_unichar_validate (0xfdd1));
g_assert (g_unichar_validate (0xfdd1));
g_assert (g_unichar_validate (917760));
g_assert (!g_unichar_validate (0x110000));
}