diff --git a/glib/ghostutils.c b/glib/ghostutils.c index 66be5c9a6..6e671d64c 100644 --- a/glib/ghostutils.c +++ b/glib/ghostutils.c @@ -21,6 +21,10 @@ #include +#ifdef G_OS_UNIX +#include +#endif + #include "ghostutils.h" #include "garray.h" @@ -29,6 +33,10 @@ #include "gstrfuncs.h" #include "glibintl.h" +#ifdef G_PLATFORM_WIN32 +#include +#endif + /** * SECTION:ghostutils @@ -405,6 +413,45 @@ idna_end_of_label (const gchar *str) return str; } +static gsize +get_hostname_max_length_bytes (void) +{ +#if defined(G_OS_WIN32) + wchar_t tmp[MAX_COMPUTERNAME_LENGTH]; + return sizeof (tmp) / sizeof (tmp[0]); +#elif defined(_SC_HOST_NAME_MAX) + glong max = sysconf (_SC_HOST_NAME_MAX); + if (max > 0) + return (gsize) max; + +#ifdef HOST_NAME_MAX + return HOST_NAME_MAX; +#else + return _POSIX_HOST_NAME_MAX; +#endif /* HOST_NAME_MAX */ +#else + /* Fallback to some reasonable value + * See https://stackoverflow.com/questions/8724954/what-is-the-maximum-number-of-characters-for-a-host-name-in-unix/28918017#28918017 */ + return 255; +#endif +} + +/* Returns %TRUE if `strlen (str) > comparison_length`, but without actually + * running `strlen(str)`, as that would take a very long time for long + * (untrusted) input strings. */ +static gboolean +strlen_greater_than (const gchar *str, + gsize comparison_length) +{ + gsize i; + + for (i = 0; str[i] != '\0'; i++) + if (i > comparison_length) + return TRUE; + + return FALSE; +} + /** * g_hostname_to_ascii: * @hostname: a valid UTF-8 or ASCII hostname @@ -413,8 +460,8 @@ idna_end_of_label (const gchar *str) * string containing no uppercase letters and not ending with a * trailing dot. * - * Returns: an ASCII hostname, which must be freed, or %NULL if - * @hostname is in some way invalid. + * Returns: (nullable) (transfer full): an ASCII hostname, which must be freed, + * or %NULL if @hostname is in some way invalid. * * Since: 2.22 **/ @@ -425,6 +472,32 @@ g_hostname_to_ascii (const gchar *hostname) GString *out; gssize llen, oldlen; gboolean unicode; + gsize hostname_max_length_bytes = get_hostname_max_length_bytes (); + + /* Do an initial check on the hostname length, as overlong hostnames take a + * long time in the IDN cleanup algorithm in nameprep(). The ultimate + * restriction is that the IDN-decoded (i.e. pure ASCII) hostname cannot be + * longer than 255 bytes. That’s the least restrictive limit on hostname + * length of all the ways hostnames can be interpreted. Typically, the + * hostname will be an FQDN, which is limited to 253 bytes long. POSIX + * hostnames are limited to `get_hostname_max_length_bytes()` (typically 255 + * bytes). + * + * See https://stackoverflow.com/a/28918017/2931197 + * + * It’s possible for a hostname to be %-encoded, in which case its decoded + * length will be as much as 3× shorter. + * + * It’s also possible for a hostname to use overlong UTF-8 encodings, in which + * case its decoded length will be as much as 4× shorter. + * + * Note: This check is not intended as an absolute guarantee that a hostname + * is the right length and will be accepted by other systems. It’s intended to + * stop wildly-invalid hostnames from taking forever in nameprep(). + */ + if (hostname_max_length_bytes <= G_MAXSIZE / 4 && + strlen_greater_than (hostname, 4 * MAX (255, hostname_max_length_bytes))) + return NULL; label = name = nameprep (hostname, -1, &unicode); if (!name || !unicode) @@ -594,8 +667,8 @@ punycode_decode (const gchar *input, * Of course if @hostname is not an internationalized hostname, then * the canonical presentation form will be entirely ASCII. * - * Returns: a UTF-8 hostname, which must be freed, or %NULL if - * @hostname is in some way invalid. + * Returns: (nullable) (transfer full): a UTF-8 hostname, which must be freed, + * or %NULL if @hostname is in some way invalid. * * Since: 2.22 **/ @@ -604,6 +677,12 @@ g_hostname_to_unicode (const gchar *hostname) { GString *out; gssize llen; + gsize hostname_max_length_bytes = get_hostname_max_length_bytes (); + + /* See the comment at the top of g_hostname_to_ascii(). */ + if (hostname_max_length_bytes <= G_MAXSIZE / 4 && + strlen_greater_than (hostname, 4 * MAX (255, hostname_max_length_bytes))) + return NULL; out = g_string_new (NULL); diff --git a/glib/guri.c b/glib/guri.c index 19fe4ac71..571cf10aa 100644 --- a/glib/guri.c +++ b/glib/guri.c @@ -604,9 +604,21 @@ parse_host (const gchar *start, } if (g_hostname_is_non_ascii (decoded)) - host = g_hostname_to_ascii (decoded); + { + host = g_hostname_to_ascii (decoded); + if (host == NULL) + { + g_free (decoded); + g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST, + _("Illegal internationalized hostname ‘%.*s’ in URI"), + (gint) length, start); + return FALSE; + } + } else - host = g_steal_pointer (&decoded); + { + host = g_steal_pointer (&decoded); + } ok: if (out) diff --git a/glib/tests/hostutils.c b/glib/tests/hostutils.c index d694e626c..1d6f8550d 100644 --- a/glib/tests/hostutils.c +++ b/glib/tests/hostutils.c @@ -69,7 +69,23 @@ static const gint num_non_round_trip_names = G_N_ELEMENTS (non_round_trip_names) static const gchar *bad_names[] = { "disallowed\xef\xbf\xbd" "character", "non-utf\x88", - "xn--mixed-\xc3\xbcp" + "xn--mixed-\xc3\xbcp", + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong" + "verylongverylongverylongverylongverylongverylongverylongverylongverylong", }; static const gint num_bad_names = G_N_ELEMENTS (bad_names); @@ -81,7 +97,7 @@ test_to_ascii (void) for (i = 0; i < num_idn_test_domains; i++) { - g_assert (g_hostname_is_non_ascii (idn_test_domains[i].unicode_name)); + g_assert_true (g_hostname_is_non_ascii (idn_test_domains[i].unicode_name)); ascii = g_hostname_to_ascii (idn_test_domains[i].unicode_name); g_assert_cmpstr (idn_test_domains[i].ascii_name, ==, ascii); g_free (ascii); @@ -94,14 +110,14 @@ test_to_ascii (void) for (i = 0; i < num_non_round_trip_names; i++) { if (non_round_trip_names[i].orig_is_unicode) - g_assert (g_hostname_is_non_ascii (non_round_trip_names[i].orig_name)); + g_assert_true (g_hostname_is_non_ascii (non_round_trip_names[i].orig_name)); else - g_assert (!g_hostname_is_non_ascii (non_round_trip_names[i].orig_name)); + g_assert_true (!g_hostname_is_non_ascii (non_round_trip_names[i].orig_name)); if (non_round_trip_names[i].ascii_is_encoded) - g_assert (g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name)); + g_assert_true (g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name)); else - g_assert (!g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name)); + g_assert_true (!g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name)); ascii = g_hostname_to_ascii (non_round_trip_names[i].orig_name); g_assert_cmpstr (non_round_trip_names[i].ascii_name, ==, ascii); @@ -127,7 +143,7 @@ test_to_unicode (void) for (i = 0; i < num_idn_test_domains; i++) { - g_assert (g_hostname_is_ascii_encoded (idn_test_domains[i].ascii_name)); + g_assert_true (g_hostname_is_ascii_encoded (idn_test_domains[i].ascii_name)); unicode = g_hostname_to_unicode (idn_test_domains[i].ascii_name); g_assert_cmpstr (idn_test_domains[i].unicode_name, ==, unicode); g_free (unicode); diff --git a/glib/tests/uri.c b/glib/tests/uri.c index 72698b73c..2c610382b 100644 --- a/glib/tests/uri.c +++ b/glib/tests/uri.c @@ -758,6 +758,10 @@ static const UriAbsoluteTest absolute_tests[] = { { NULL, NULL, NULL, -1, NULL, NULL, NULL } }, { "http://[fe80::dead:beef%25em1%00]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST, { NULL, NULL, NULL, -1, NULL, NULL, NULL } }, + + /* Invalid IDN hostname */ + { "http://xn--mixed-\xc3\xbcp/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST, + { NULL, NULL, NULL, -1, NULL, NULL, NULL } }, }; static void