Merge branch 'ghostutils_ascii' into 'main'

ghostutils: Treat 0x80 (and above) as non-ASCII

See merge request GNOME/glib!4827
This commit is contained in:
Philip Withnall
2025-09-22 16:36:52 +00:00
2 changed files with 33 additions and 6 deletions

View File

@@ -56,7 +56,8 @@
#define PUNYCODE_INITIAL_BIAS 72 #define PUNYCODE_INITIAL_BIAS 72
#define PUNYCODE_INITIAL_N 0x80 #define PUNYCODE_INITIAL_N 0x80
#define PUNYCODE_IS_BASIC(cp) ((guint)(cp) < 0x80) #define IS_ASCII(cp) ((guint) (cp) < 0x80)
#define PUNYCODE_IS_BASIC(cp) IS_ASCII (cp)
/* Encode/decode a single base-36 digit */ /* Encode/decode a single base-36 digit */
static inline gchar static inline gchar
@@ -258,8 +259,8 @@ contains_non_ascii (const gchar *str,
for (p = str; len == -1 ? *p : p < str + len; p++) for (p = str; len == -1 ? *p : p < str + len; p++)
{ {
if ((guchar)*p > 0x80) if (!IS_ASCII (*p))
return TRUE; return TRUE;
} }
return FALSE; return FALSE;
} }
@@ -505,9 +506,9 @@ g_hostname_to_ascii (const gchar *hostname)
unicode = FALSE; unicode = FALSE;
for (p = label; *p && !idna_is_dot (p); p++) for (p = label; *p && !idna_is_dot (p); p++)
{ {
if ((guchar)*p > 0x80) if (!IS_ASCII (*p))
unicode = TRUE; unicode = TRUE;
} }
oldlen = out->len; oldlen = out->len;
llen = p - label; llen = p - label;

View File

@@ -72,6 +72,7 @@ static const gint num_non_round_trip_names = G_N_ELEMENTS (non_round_trip_names)
static const gchar *bad_names[] = { static const gchar *bad_names[] = {
"disallowed\xef\xbf\xbd" "character", "disallowed\xef\xbf\xbd" "character",
"non-utf\x88", "non-utf\x88",
"smallest-non-utf-char\x80",
"xn--mixed-\xc3\xbcp", "xn--mixed-\xc3\xbcp",
"verylongverylongverylongverylongverylongverylongverylongverylongverylong" "verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong" "verylongverylongverylongverylongverylongverylongverylongverylongverylong"
@@ -331,6 +332,30 @@ test_is_ip_addr (void)
} }
} }
static const gchar *ascii_names[] = {
"ascii-\x7F",
};
static const gchar *non_ascii_names[] = {
"disallowed\x80" "character",
};
static void
test_hostname_is_non_ascii (void)
{
for (size_t i = 0; i < G_N_ELEMENTS (ascii_names); i++)
g_assert_false (g_hostname_is_non_ascii (ascii_names[i]));
for (size_t i = 0; i < G_N_ELEMENTS (idn_test_domains); i++)
g_assert_false (g_hostname_is_non_ascii (idn_test_domains[i].ascii_name));
for (size_t i = 0; i < G_N_ELEMENTS (non_ascii_names); i++)
g_assert_true (g_hostname_is_non_ascii (non_ascii_names[i]));
for (size_t i = 0; i < G_N_ELEMENTS (idn_test_domains); i++)
g_assert_true (g_hostname_is_non_ascii (idn_test_domains[i].unicode_name));
}
/* FIXME: test names with both unicode and ACE-encoded labels */ /* FIXME: test names with both unicode and ACE-encoded labels */
/* FIXME: test invalid unicode names */ /* FIXME: test invalid unicode names */
@@ -362,6 +387,7 @@ main (int argc,
return 0; return 0;
} }
g_test_add_func ("/hostutils/hostname_is_non_ascii", test_hostname_is_non_ascii);
g_test_add_func ("/hostutils/to_ascii", test_to_ascii); g_test_add_func ("/hostutils/to_ascii", test_to_ascii);
g_test_add_func ("/hostutils/to_unicode", test_to_unicode); g_test_add_func ("/hostutils/to_unicode", test_to_unicode);
g_test_add_func ("/hostutils/is_ip_addr", test_is_ip_addr); g_test_add_func ("/hostutils/is_ip_addr", test_is_ip_addr);