ghostutils: Treat 0x80 (and above) as non-ASCII

Any ASCII character above 0x7F should be treated as UTF-8 in
ghostutils functions because GLib expects host names to be either
punycode encoded or in valid UTF-8 format.

The checks in gutf8.c already treat 0x80 as non-ASCII, but two checks
in ghostutils.c erroneously check for "great than" not "greater than or
equal to".

Clarify this by adding a new macro which is reused by PUNICODE_IS_BASIC
for better documentation in code.
This commit is contained in:
Tobias Stoeckmann
2025-09-21 15:57:49 +02:00
parent ed36f4c642
commit a4a32bbeaa
2 changed files with 33 additions and 6 deletions

View File

@@ -56,7 +56,8 @@
#define PUNYCODE_INITIAL_BIAS 72 #define PUNYCODE_INITIAL_BIAS 72
#define PUNYCODE_INITIAL_N 0x80 #define PUNYCODE_INITIAL_N 0x80
#define PUNYCODE_IS_BASIC(cp) ((guint)(cp) < 0x80) #define IS_ASCII(cp) ((guint) (cp) < 0x80)
#define PUNYCODE_IS_BASIC(cp) IS_ASCII (cp)
/* Encode/decode a single base-36 digit */ /* Encode/decode a single base-36 digit */
static inline gchar static inline gchar
@@ -258,8 +259,8 @@ contains_non_ascii (const gchar *str,
for (p = str; len == -1 ? *p : p < str + len; p++) for (p = str; len == -1 ? *p : p < str + len; p++)
{ {
if ((guchar)*p > 0x80) if (!IS_ASCII (*p))
return TRUE; return TRUE;
} }
return FALSE; return FALSE;
} }
@@ -505,9 +506,9 @@ g_hostname_to_ascii (const gchar *hostname)
unicode = FALSE; unicode = FALSE;
for (p = label; *p && !idna_is_dot (p); p++) for (p = label; *p && !idna_is_dot (p); p++)
{ {
if ((guchar)*p > 0x80) if (!IS_ASCII (*p))
unicode = TRUE; unicode = TRUE;
} }
oldlen = out->len; oldlen = out->len;
llen = p - label; llen = p - label;

View File

@@ -72,6 +72,7 @@ static const gint num_non_round_trip_names = G_N_ELEMENTS (non_round_trip_names)
static const gchar *bad_names[] = { static const gchar *bad_names[] = {
"disallowed\xef\xbf\xbd" "character", "disallowed\xef\xbf\xbd" "character",
"non-utf\x88", "non-utf\x88",
"smallest-non-utf-char\x80",
"xn--mixed-\xc3\xbcp", "xn--mixed-\xc3\xbcp",
"verylongverylongverylongverylongverylongverylongverylongverylongverylong" "verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong" "verylongverylongverylongverylongverylongverylongverylongverylongverylong"
@@ -331,6 +332,30 @@ test_is_ip_addr (void)
} }
} }
static const gchar *ascii_names[] = {
"ascii-\x7F",
};
static const gchar *non_ascii_names[] = {
"disallowed\x80" "character",
};
static void
test_hostname_is_non_ascii (void)
{
for (size_t i = 0; i < G_N_ELEMENTS (ascii_names); i++)
g_assert_false (g_hostname_is_non_ascii (ascii_names[i]));
for (size_t i = 0; i < G_N_ELEMENTS (idn_test_domains); i++)
g_assert_false (g_hostname_is_non_ascii (idn_test_domains[i].ascii_name));
for (size_t i = 0; i < G_N_ELEMENTS (non_ascii_names); i++)
g_assert_true (g_hostname_is_non_ascii (non_ascii_names[i]));
for (size_t i = 0; i < G_N_ELEMENTS (idn_test_domains); i++)
g_assert_true (g_hostname_is_non_ascii (idn_test_domains[i].unicode_name));
}
/* FIXME: test names with both unicode and ACE-encoded labels */ /* FIXME: test names with both unicode and ACE-encoded labels */
/* FIXME: test invalid unicode names */ /* FIXME: test invalid unicode names */
@@ -362,6 +387,7 @@ main (int argc,
return 0; return 0;
} }
g_test_add_func ("/hostutils/hostname_is_non_ascii", test_hostname_is_non_ascii);
g_test_add_func ("/hostutils/to_ascii", test_to_ascii); g_test_add_func ("/hostutils/to_ascii", test_to_ascii);
g_test_add_func ("/hostutils/to_unicode", test_to_unicode); g_test_add_func ("/hostutils/to_unicode", test_to_unicode);
g_test_add_func ("/hostutils/is_ip_addr", test_is_ip_addr); g_test_add_func ("/hostutils/is_ip_addr", test_is_ip_addr);