guri: Improve performance of remove_dot_segments() algorithm

This commit is contained in:
Sebastian Wilhelmi 2021-11-17 15:20:28 +00:00 committed by Philip Withnall
parent ae0a0d0dd5
commit d4ef27b3ba
2 changed files with 85 additions and 47 deletions

View File

@ -1296,68 +1296,93 @@ g_uri_is_valid (const gchar *uri_string,
} }
/* This does the "Remove Dot Segments" algorithm from section 5.2.4 of /* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
* RFC 3986, except that @path is modified in place. * RFC 3986.
* *
* See https://tools.ietf.org/html/rfc3986#section-5.2.4 * See https://tools.ietf.org/html/rfc3986#section-5.2.4
*/ */
static void static void
remove_dot_segments (gchar *path) remove_dot_segments (gchar *path)
{ {
gchar *p, *q; /* The output can be written to the same buffer that the input
* is read from, as the output pointer is only ever increased
* when the input pointer is increased as well, and the input
* pointer is never decreased. */
gchar *input = path;
gchar *output = path;
if (!*path) if (!*path)
return; return;
/* Remove "./" where "." is a complete segment. */ while (*input)
for (p = path + 1; *p; )
{ {
if (*(p - 1) == '/' && /* A. If the input buffer begins with a prefix of "../" or "./",
*p == '.' && *(p + 1) == '/') * then remove that prefix from the input buffer; otherwise,
memmove (p, p + 2, strlen (p + 2) + 1); */
if (strncmp (input, "../", 3) == 0)
input += 3;
else if (strncmp (input, "./", 2) == 0)
input += 2;
/* B. if the input buffer begins with a prefix of "/./" or "/.",
* where "." is a complete path segment, then replace that
* prefix with "/" in the input buffer; otherwise,
*/
else if (strncmp (input, "/./", 3) == 0)
input += 2;
else if (strcmp (input, "/.") == 0)
input[1] = '\0';
/* C. if the input buffer begins with a prefix of "/../" or "/..",
* where ".." is a complete path segment, then replace that
* prefix with "/" in the input buffer and remove the last
* segment and its preceding "/" (if any) from the output
* buffer; otherwise,
*/
else if (strncmp (input, "/../", 4) == 0)
{
input += 3;
if (output > path)
{
do
{
output--;
}
while (*output != '/' && output > path);
}
}
else if (strcmp (input, "/..") == 0)
{
input[1] = '\0';
if (output > path)
{
do
{
output--;
}
while (*output != '/' && output > path);
}
}
/* D. if the input buffer consists only of "." or "..", then remove
* that from the input buffer; otherwise,
*/
else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
input[0] = '\0';
/* E. move the first path segment in the input buffer to the end of
* the output buffer, including the initial "/" character (if
* any) and any subsequent characters up to, but not including,
* the next "/" character or the end of the input buffer.
*/
else else
p++;
}
/* Remove "." at end. */
if (p > path + 2 &&
*(p - 1) == '.' && *(p - 2) == '/')
*(p - 1) = '\0';
/* Remove "<segment>/../" where <segment> != ".." */
for (p = path + 1; *p; )
{ {
if (!strncmp (p, "../", 3)) *output++ = *input++;
{ while (*input && *input != '/')
p += 3; *output++ = *input++;
continue;
} }
q = strchr (p + 1, '/');
if (!q)
break;
if (strncmp (q, "/../", 4) != 0)
{
p = q + 1;
continue;
} }
memmove (p, q + 4, strlen (q + 4) + 1); *output = '\0';
p = path + 1;
}
/* Remove "<segment>/.." at end where <segment> != ".." */
q = strrchr (path, '/');
if (q && q != path && !strcmp (q, "/.."))
{
p = q - 1;
while (p > path && *p != '/')
p--;
if (strncmp (p, "/../", 4) != 0)
*(p + 1) = 0;
}
/* Remove extraneous initial "/.."s */
while (!strncmp (path, "/../", 4))
memmove (path, path + 3, strlen (path) - 2);
if (!strcmp (path, "/.."))
path[1] = '\0';
} }
/** /**

View File

@ -898,6 +898,19 @@ static const UriRelativeTest relative_tests[] = {
{ "ScHeMe://User:P%61ss@HOST.%63om:1234/path/./from/../to%7d/item%2dobj?qu%65ry=something#fr%61gment", { "ScHeMe://User:P%61ss@HOST.%63om:1234/path/./from/../to%7d/item%2dobj?qu%65ry=something#fr%61gment",
"scheme://User:Pass@HOST.com:1234/path/to%7D/item-obj?query=something#fragment", "scheme://User:Pass@HOST.com:1234/path/to%7D/item-obj?query=something#fragment",
{ "scheme", "User:Pass", "HOST.com", 1234, "/path/to}/item-obj", "query=something", "fragment" } }, { "scheme", "User:Pass", "HOST.com", 1234, "/path/to}/item-obj", "query=something", "fragment" } },
/* Test corner cases of remove_dot_segments */
{ "http:..", "http:",
{ "http", NULL, NULL, -1, "", NULL, NULL } },
{ "http:../", "http:",
{ "http", NULL, NULL, -1, "", NULL, NULL } },
{ "http:.", "http:",
{ "http", NULL, NULL, -1, "", NULL, NULL } },
{ "http:./", "http:",
{ "http", NULL, NULL, -1, "", NULL, NULL } },
{ "http:a/..", "http:/",
{ "http", NULL, NULL, -1, "/", NULL, NULL } },
{ "http:a/../", "http:/",
{ "http", NULL, NULL, -1, "/", NULL, NULL } },
}; };
static int num_relative_tests = G_N_ELEMENTS (relative_tests); static int num_relative_tests = G_N_ELEMENTS (relative_tests);