From ed0c7c7e0e8f7298352646b2fd6e06a11e242ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Sun, 2 Jun 2024 12:40:16 +0200 Subject: Properly re-implement userinfo parsing (rfc2396) * src/url.c (url_skip_credentials): Properly re-implement userinfo parsing (rfc2396) The reason why the implementation is based on RFC 2396, an outdated standard, is that the whole file is based on that RFC, and mixing standard here might be dangerous. --- src/url.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/url.c b/src/url.c index 69e948b..07c3bc8 100644 --- a/src/url.c +++ b/src/url.c @@ -41,6 +41,7 @@ as that of the covered work. */ #include "url.h" #include "host.h" /* for is_valid_ipv6_address */ #include "c-strcase.h" +#include "c-ctype.h" #ifdef HAVE_ICONV # include @@ -526,12 +527,39 @@ scheme_leading_string (enum url_scheme scheme) static const char * url_skip_credentials (const char *url) { - /* Look for '@' that comes before terminators, such as '/', '?', - '#', or ';'. */ - const char *p = (const char *)strpbrk (url, "@/?#;"); - if (!p || *p != '@') - return url; - return p + 1; + /* + * This whole file implements https://www.rfc-editor.org/rfc/rfc2396 . + * RFC 2396 is outdated since 2005 and needs a rewrite or a thorough re-visit. + * + * The RFC says + * server = [ [ userinfo "@" ] hostport ] + * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" | "+" | "$" | "," ) + * unreserved = alphanum | mark + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + static const char *allowed = "-_.!~*'();:&=+$,"; + + for (const char *p = url; *p; p++) + { + if (c_isalnum(*p)) + continue; + + if (strchr(allowed, *p)) + continue; + + if (*p == '%' && c_isxdigit(p[1]) && c_isxdigit(p[2])) + { + p += 2; + continue; + } + + if (*p == '@') + return p + 1; + + break; + } + + return url; } /* Parse credentials contained in [BEG, END). The region is expected -- cgit v1.1