Merge branch 'ossfuzz-27371-hostname-length' into 'master'

ghostutils: Abandon hostname conversion early if it’s too long

See merge request GNOME/glib!1782
This commit is contained in:
Sebastian Dröge 2020-12-07 08:29:31 +00:00
commit 986525f1e9
4 changed files with 124 additions and 13 deletions

View File

@ -21,6 +21,10 @@
#include <string.h> #include <string.h>
#ifdef G_OS_UNIX
#include <unistd.h>
#endif
#include "ghostutils.h" #include "ghostutils.h"
#include "garray.h" #include "garray.h"
@ -29,6 +33,10 @@
#include "gstrfuncs.h" #include "gstrfuncs.h"
#include "glibintl.h" #include "glibintl.h"
#ifdef G_PLATFORM_WIN32
#include <windows.h>
#endif
/** /**
* SECTION:ghostutils * SECTION:ghostutils
@ -405,6 +413,45 @@ idna_end_of_label (const gchar *str)
return str; return str;
} }
static gsize
get_hostname_max_length_bytes (void)
{
#if defined(G_OS_WIN32)
wchar_t tmp[MAX_COMPUTERNAME_LENGTH];
return sizeof (tmp) / sizeof (tmp[0]);
#elif defined(_SC_HOST_NAME_MAX)
glong max = sysconf (_SC_HOST_NAME_MAX);
if (max > 0)
return (gsize) max;
#ifdef HOST_NAME_MAX
return HOST_NAME_MAX;
#else
return _POSIX_HOST_NAME_MAX;
#endif /* HOST_NAME_MAX */
#else
/* Fallback to some reasonable value
* See https://stackoverflow.com/questions/8724954/what-is-the-maximum-number-of-characters-for-a-host-name-in-unix/28918017#28918017 */
return 255;
#endif
}
/* Returns %TRUE if `strlen (str) > comparison_length`, but without actually
* running `strlen(str)`, as that would take a very long time for long
* (untrusted) input strings. */
static gboolean
strlen_greater_than (const gchar *str,
gsize comparison_length)
{
gsize i;
for (i = 0; str[i] != '\0'; i++)
if (i > comparison_length)
return TRUE;
return FALSE;
}
/** /**
* g_hostname_to_ascii: * g_hostname_to_ascii:
* @hostname: a valid UTF-8 or ASCII hostname * @hostname: a valid UTF-8 or ASCII hostname
@ -413,8 +460,8 @@ idna_end_of_label (const gchar *str)
* string containing no uppercase letters and not ending with a * string containing no uppercase letters and not ending with a
* trailing dot. * trailing dot.
* *
* Returns: an ASCII hostname, which must be freed, or %NULL if * Returns: (nullable) (transfer full): an ASCII hostname, which must be freed,
* @hostname is in some way invalid. * or %NULL if @hostname is in some way invalid.
* *
* Since: 2.22 * Since: 2.22
**/ **/
@ -425,6 +472,32 @@ g_hostname_to_ascii (const gchar *hostname)
GString *out; GString *out;
gssize llen, oldlen; gssize llen, oldlen;
gboolean unicode; gboolean unicode;
gsize hostname_max_length_bytes = get_hostname_max_length_bytes ();
/* Do an initial check on the hostname length, as overlong hostnames take a
* long time in the IDN cleanup algorithm in nameprep(). The ultimate
* restriction is that the IDN-decoded (i.e. pure ASCII) hostname cannot be
* longer than 255 bytes. Thats the least restrictive limit on hostname
* length of all the ways hostnames can be interpreted. Typically, the
* hostname will be an FQDN, which is limited to 253 bytes long. POSIX
* hostnames are limited to `get_hostname_max_length_bytes()` (typically 255
* bytes).
*
* See https://stackoverflow.com/a/28918017/2931197
*
* Its possible for a hostname to be %-encoded, in which case its decoded
* length will be as much as 3× shorter.
*
* Its also possible for a hostname to use overlong UTF-8 encodings, in which
* case its decoded length will be as much as 4× shorter.
*
* Note: This check is not intended as an absolute guarantee that a hostname
* is the right length and will be accepted by other systems. Its intended to
* stop wildly-invalid hostnames from taking forever in nameprep().
*/
if (hostname_max_length_bytes <= G_MAXSIZE / 4 &&
strlen_greater_than (hostname, 4 * MAX (255, hostname_max_length_bytes)))
return NULL;
label = name = nameprep (hostname, -1, &unicode); label = name = nameprep (hostname, -1, &unicode);
if (!name || !unicode) if (!name || !unicode)
@ -594,8 +667,8 @@ punycode_decode (const gchar *input,
* Of course if @hostname is not an internationalized hostname, then * Of course if @hostname is not an internationalized hostname, then
* the canonical presentation form will be entirely ASCII. * the canonical presentation form will be entirely ASCII.
* *
* Returns: a UTF-8 hostname, which must be freed, or %NULL if * Returns: (nullable) (transfer full): a UTF-8 hostname, which must be freed,
* @hostname is in some way invalid. * or %NULL if @hostname is in some way invalid.
* *
* Since: 2.22 * Since: 2.22
**/ **/
@ -604,6 +677,12 @@ g_hostname_to_unicode (const gchar *hostname)
{ {
GString *out; GString *out;
gssize llen; gssize llen;
gsize hostname_max_length_bytes = get_hostname_max_length_bytes ();
/* See the comment at the top of g_hostname_to_ascii(). */
if (hostname_max_length_bytes <= G_MAXSIZE / 4 &&
strlen_greater_than (hostname, 4 * MAX (255, hostname_max_length_bytes)))
return NULL;
out = g_string_new (NULL); out = g_string_new (NULL);

View File

@ -604,9 +604,21 @@ parse_host (const gchar *start,
} }
if (g_hostname_is_non_ascii (decoded)) if (g_hostname_is_non_ascii (decoded))
host = g_hostname_to_ascii (decoded); {
host = g_hostname_to_ascii (decoded);
if (host == NULL)
{
g_free (decoded);
g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
_("Illegal internationalized hostname %.*s in URI"),
(gint) length, start);
return FALSE;
}
}
else else
host = g_steal_pointer (&decoded); {
host = g_steal_pointer (&decoded);
}
ok: ok:
if (out) if (out)

View File

@ -69,7 +69,23 @@ static const gint num_non_round_trip_names = G_N_ELEMENTS (non_round_trip_names)
static const gchar *bad_names[] = { static const gchar *bad_names[] = {
"disallowed\xef\xbf\xbd" "character", "disallowed\xef\xbf\xbd" "character",
"non-utf\x88", "non-utf\x88",
"xn--mixed-\xc3\xbcp" "xn--mixed-\xc3\xbcp",
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong"
"verylongverylongverylongverylongverylongverylongverylongverylongverylong",
}; };
static const gint num_bad_names = G_N_ELEMENTS (bad_names); static const gint num_bad_names = G_N_ELEMENTS (bad_names);
@ -81,7 +97,7 @@ test_to_ascii (void)
for (i = 0; i < num_idn_test_domains; i++) for (i = 0; i < num_idn_test_domains; i++)
{ {
g_assert (g_hostname_is_non_ascii (idn_test_domains[i].unicode_name)); g_assert_true (g_hostname_is_non_ascii (idn_test_domains[i].unicode_name));
ascii = g_hostname_to_ascii (idn_test_domains[i].unicode_name); ascii = g_hostname_to_ascii (idn_test_domains[i].unicode_name);
g_assert_cmpstr (idn_test_domains[i].ascii_name, ==, ascii); g_assert_cmpstr (idn_test_domains[i].ascii_name, ==, ascii);
g_free (ascii); g_free (ascii);
@ -94,14 +110,14 @@ test_to_ascii (void)
for (i = 0; i < num_non_round_trip_names; i++) for (i = 0; i < num_non_round_trip_names; i++)
{ {
if (non_round_trip_names[i].orig_is_unicode) if (non_round_trip_names[i].orig_is_unicode)
g_assert (g_hostname_is_non_ascii (non_round_trip_names[i].orig_name)); g_assert_true (g_hostname_is_non_ascii (non_round_trip_names[i].orig_name));
else else
g_assert (!g_hostname_is_non_ascii (non_round_trip_names[i].orig_name)); g_assert_true (!g_hostname_is_non_ascii (non_round_trip_names[i].orig_name));
if (non_round_trip_names[i].ascii_is_encoded) if (non_round_trip_names[i].ascii_is_encoded)
g_assert (g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name)); g_assert_true (g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name));
else else
g_assert (!g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name)); g_assert_true (!g_hostname_is_ascii_encoded (non_round_trip_names[i].ascii_name));
ascii = g_hostname_to_ascii (non_round_trip_names[i].orig_name); ascii = g_hostname_to_ascii (non_round_trip_names[i].orig_name);
g_assert_cmpstr (non_round_trip_names[i].ascii_name, ==, ascii); g_assert_cmpstr (non_round_trip_names[i].ascii_name, ==, ascii);
@ -127,7 +143,7 @@ test_to_unicode (void)
for (i = 0; i < num_idn_test_domains; i++) for (i = 0; i < num_idn_test_domains; i++)
{ {
g_assert (g_hostname_is_ascii_encoded (idn_test_domains[i].ascii_name)); g_assert_true (g_hostname_is_ascii_encoded (idn_test_domains[i].ascii_name));
unicode = g_hostname_to_unicode (idn_test_domains[i].ascii_name); unicode = g_hostname_to_unicode (idn_test_domains[i].ascii_name);
g_assert_cmpstr (idn_test_domains[i].unicode_name, ==, unicode); g_assert_cmpstr (idn_test_domains[i].unicode_name, ==, unicode);
g_free (unicode); g_free (unicode);

View File

@ -758,6 +758,10 @@ static const UriAbsoluteTest absolute_tests[] = {
{ NULL, NULL, NULL, -1, NULL, NULL, NULL } }, { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
{ "http://[fe80::dead:beef%25em1%00]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST, { "http://[fe80::dead:beef%25em1%00]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
{ NULL, NULL, NULL, -1, NULL, NULL, NULL } }, { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
/* Invalid IDN hostname */
{ "http://xn--mixed-\xc3\xbcp/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
{ NULL, NULL, NULL, -1, NULL, NULL, NULL } },
}; };
static void static void