mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2024-11-14 13:26:16 +01:00
298 lines
3.2 KiB
Plaintext
298 lines
3.2 KiB
Plaintext
|
# This file is derived from
|
|||
|
#
|
|||
|
# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
|
|||
|
#
|
|||
|
# Which was created by Markus Kuhn <mkuhn@acm.org> - 2000-09-02
|
|||
|
#
|
|||
|
# lines begining with # and blank lines are ignored
|
|||
|
#
|
|||
|
# Beyond that, this file consists of a series of test cases. Each test case consists of
|
|||
|
# 2 or 3 lines:
|
|||
|
#
|
|||
|
# 1. A UTF-8 string
|
|||
|
# 2. A status
|
|||
|
# VALID : The string is a valid UTF-8 representation of valid Unicode
|
|||
|
# INCOMPLETE : The string has a partial character at the end
|
|||
|
# NOTUNICODE : The string is valid UTF-8, but the characters represented
|
|||
|
# are not valid unicode (
|
|||
|
# OVERLONG : The string includes overlong sequences
|
|||
|
# MALFORMED : The string is not valid UTF-8
|
|||
|
# 3. If the status is VALID or NOTUNICODE, the UCS-4 representation of the string,
|
|||
|
# as a series of hex numbers.
|
|||
|
|
|||
|
# 1 Some correct UTF-8 text
|
|||
|
κόσμε
|
|||
|
VALID
|
|||
|
03ba 1f79 03c3 03bc 03b5
|
|||
|
|
|||
|
# 2.1 First possible sequence of a certain length
|
|||
|
#
|
|||
|
# FIXME - handle NULLS?
|
|||
|
#
|
|||
|
# [ NULL BYTE ]
|
|||
|
#VALID
|
|||
|
#0000
|
|||
|
|
|||
|
|
|||
|
VALID
|
|||
|
0080
|
|||
|
|
|||
|
ࠀ
|
|||
|
VALID
|
|||
|
0800
|
|||
|
|
|||
|
𐀀
|
|||
|
VALID
|
|||
|
00010000
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
00200000
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
04000000
|
|||
|
|
|||
|
|
|||
|
VALID
|
|||
|
0000007f
|
|||
|
|
|||
|
߿
|
|||
|
VALID
|
|||
|
000007ff
|
|||
|
|
|||
|
|
|||
|
NOTUNICODE
|
|||
|
0000ffff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
001fffff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
03ffffff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
7fffffff
|
|||
|
|
|||
|
# 2.3 Other boundary conditions
|
|||
|
|
|||
|
|
|||
|
VALID
|
|||
|
d7ff
|
|||
|
|
|||
|
|
|||
|
VALID
|
|||
|
e000
|
|||
|
|
|||
|
<EFBFBD>
|
|||
|
VALID
|
|||
|
fffd
|
|||
|
|
|||
|
|
|||
|
VALID
|
|||
|
0010ffff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
00110000
|
|||
|
|
|||
|
# 3.1 Unexpected continuation bytes
|
|||
|
|
|||
|
<EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
|
|||
|
# 3.2 Lonely start characters
|
|||
|
|
|||
|
<EFBFBD> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD> <20> <20> <20> <20> <20> <20> <20>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD> <20> <20> <20>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD> <20>
|
|||
|
MALFORMED
|
|||
|
|
|||
|
# 3.3 Sequences with last continuation byte missing
|
|||
|
|
|||
|
<EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
INCOMPLETE
|
|||
|
|
|||
|
# 3.4 Concatenation of incomplete sequences
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
|
|||
|
# 3.5 Impossible bytes
|
|||
|
|
|||
|
<EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD>
|
|||
|
MALFORMED
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
MALFORMED
|
|||
|
|
|||
|
# Examples of an overlong ASCII character
|
|||
|
|
|||
|
<EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
|
|||
|
# Maximum overlong sequences
|
|||
|
|
|||
|
<EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
|
|||
|
# Overlong representation of the NUL character
|
|||
|
|
|||
|
<EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
OVERLONG
|
|||
|
|
|||
|
# Illegal code positions
|
|||
|
|
|||
|
# Single UTF-16 surrogates
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
d800
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
db7f
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
db80
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
dbff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
dc00
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
df80
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
dfff
|
|||
|
|
|||
|
# Paired UTF-16 surrogates
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
d800 dc00
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
d800 dfff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
db7f dc00
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
db7f dfff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
db80 dc00
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
db80 dfff
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
dbff dc00
|
|||
|
|
|||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
NOTUNICODE
|
|||
|
dbff dfff
|
|||
|
|
|||
|
# Other illegal code positions
|
|||
|
|
|||
|
|
|||
|
NOTUNICODE
|
|||
|
fffe
|
|||
|
|
|||
|
|
|||
|
NOTUNICODE
|
|||
|
ffff
|
|||
|
|
|||
|
################
|
|||
|
#
|
|||
|
# Some more tests, not from Markus Kuhn's file
|
|||
|
#
|
|||
|
|
|||
|
# Mixed plane 0 and higher planes
|
|||
|
|
|||
|
A𐀀BC
|
|||
|
VALID
|
|||
|
41 00010000 42 10ffff 43
|