gfileutils: Improve performance of g_canonicalize_filename()

Improve the performance of canonicalising filenames with many `..` or
`.` components, by modifying the path inline rather than calling
`memmove()`.

Signed-off-by: Philip Withnall <pwithnall@endlessos.org>

Fixes: #2541
This commit is contained in:
Sebastian Wilhelmi 2021-11-26 13:43:56 +00:00 committed by Philip Withnall
parent 72377e3b6e
commit 9a30a495ec
2 changed files with 94 additions and 57 deletions

View File

@ -2736,8 +2736,7 @@ gchar *
g_canonicalize_filename (const gchar *filename, g_canonicalize_filename (const gchar *filename,
const gchar *relative_to) const gchar *relative_to)
{ {
gchar *canon, *start, *p, *q; gchar *canon, *input, *output, *start;
guint i;
g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL); g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL);
@ -2770,74 +2769,76 @@ g_canonicalize_filename (const gchar *filename,
return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL); return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL);
} }
/* POSIX allows double slashes at the start to /* Find the first dir separator and use the canonical dir separator. */
* mean something special (as does windows too). for (output = start - 1;
* So, "//" != "/", but more than two slashes (output >= canon) && G_IS_DIR_SEPARATOR (*output);
output--)
*output = G_DIR_SEPARATOR;
output += 2;
/* POSIX allows double slashes at the start to mean something special
* (as does windows too). So, "//" != "/", but more than two slashes
* is treated as "/". * is treated as "/".
*/ */
i = 0; if (start - output == 1)
for (p = start - 1; output++;
(p >= canon) &&
G_IS_DIR_SEPARATOR (*p);
p--)
i++;
if (i > 2)
{
i -= 1;
start -= i;
memmove (start, start+i, strlen (start+i) + 1);
}
/* Make sure we're using the canonical dir separator */ input = start;
p++; while (*input)
while (p < start && G_IS_DIR_SEPARATOR (*p))
*p++ = G_DIR_SEPARATOR;
p = start;
while (*p != 0)
{ {
if (p[0] == '.' && (p[1] == 0 || G_IS_DIR_SEPARATOR (p[1]))) /* input points to the next non-separator to be processed. */
/* output points to the next location to write to. */
g_assert (input > canon && G_IS_DIR_SEPARATOR (input[-1]));
g_assert (output > canon && G_IS_DIR_SEPARATOR (output[-1]));
g_assert (input >= output);
/* Ignore repeated dir separators. */
while (G_IS_DIR_SEPARATOR (input[0]))
input++;
/* Ignore single dot directory components. */
if (input[0] == '.' && (input[1] == 0 || G_IS_DIR_SEPARATOR (input[1])))
{ {
memmove (p, p+1, strlen (p+1)+1); if (input[1] == 0)
break;
input += 2;
} }
else if (p[0] == '.' && p[1] == '.' && (p[2] == 0 || G_IS_DIR_SEPARATOR (p[2]))) /* Remove double-dot directory components along with the preceding
* path component. */
else if (input[0] == '.' && input[1] == '.' &&
(input[2] == 0 || G_IS_DIR_SEPARATOR (input[2])))
{ {
q = p + 2; if (output > start)
/* Skip previous separator */ {
p = p - 2; do
if (p < start) {
p = start; output--;
while (p > start && !G_IS_DIR_SEPARATOR (*p)) }
p--; while (!G_IS_DIR_SEPARATOR (output[-1]) && output > start);
if (G_IS_DIR_SEPARATOR (*p)) }
*p++ = G_DIR_SEPARATOR; if (input[2] == 0)
memmove (p, q, strlen (q)+1); break;
input += 3;
} }
/* Copy the input to the output until the next separator,
* while converting it to canonical separator */
else else
{ {
/* Skip until next separator */ while (*input && !G_IS_DIR_SEPARATOR (*input))
while (*p != 0 && !G_IS_DIR_SEPARATOR (*p)) *output++ = *input++;
p++; if (input[0] == 0)
break;
if (*p != 0) input++;
{ *output++ = G_DIR_SEPARATOR;
/* Canonicalize one separator */
*p++ = G_DIR_SEPARATOR;
}
} }
/* Remove additional separators */
q = p;
while (*q && G_IS_DIR_SEPARATOR (*q))
q++;
if (p != q)
memmove (p, q, strlen (q) + 1);
} }
/* Remove trailing slashes */ /* Remove a potentially trailing dir separator */
if (p > start && G_IS_DIR_SEPARATOR (*(p-1))) if (output > start && G_IS_DIR_SEPARATOR (output[-1]))
*(p-1) = 0; output--;
*output = '\0';
return canon; return canon;
} }

View File

@ -1051,6 +1051,18 @@ test_paths (void)
{ "///triple/slash", ".", "/triple/slash" }, { "///triple/slash", ".", "/triple/slash" },
{ "//double/slash", ".", "//double/slash" }, { "//double/slash", ".", "//double/slash" },
{ "/cwd/../with/./complexities/", "./hello", "/with/complexities/hello" }, { "/cwd/../with/./complexities/", "./hello", "/with/complexities/hello" },
{ "/", ".dot-dir", "/.dot-dir" },
{ "/cwd", "..", "/" },
{ "/etc", "hello/..", "/etc" },
{ "/etc", "hello/../", "/etc" },
{ "/", "..", "/" },
{ "/", "../", "/" },
{ "/", "/..", "/" },
{ "/", "/../", "/" },
{ "/", ".", "/" },
{ "/", "./", "/" },
{ "/", "/.", "/" },
{ "/", "/./", "/" },
#else #else
{ "/etc", "../usr/share", "\\usr\\share" }, { "/etc", "../usr/share", "\\usr\\share" },
{ "/", "/foo/bar", "\\foo\\bar" }, { "/", "/foo/bar", "\\foo\\bar" },
@ -1066,6 +1078,18 @@ test_paths (void)
{ "///triple/slash", ".", "\\triple\\slash" }, { "///triple/slash", ".", "\\triple\\slash" },
{ "//double/slash", ".", "//double/slash\\" }, { "//double/slash", ".", "//double/slash\\" },
{ "/cwd/../with/./complexities/", "./hello", "\\with\\complexities\\hello" }, { "/cwd/../with/./complexities/", "./hello", "\\with\\complexities\\hello" },
{ "/", ".dot-dir", "/.dot-dir" },
{ "/cwd", "..", "/" },
{ "/etc", "hello/..", "/etc" },
{ "/etc", "hello/../", "/etc" },
{ "/", "..", "/" },
{ "/", "../", "/" },
{ "/", "/..", "/" },
{ "/", "/../", "/" },
{ "/", ".", "/" },
{ "/", "./", "/" },
{ "/", "/.", "/" },
{ "/", "/./", "/" },
{ "\\etc", "..\\usr\\share", "\\usr\\share" }, { "\\etc", "..\\usr\\share", "\\usr\\share" },
{ "\\", "\\foo\\bar", "\\foo\\bar" }, { "\\", "\\foo\\bar", "\\foo\\bar" },
@ -1081,6 +1105,18 @@ test_paths (void)
{ "\\\\\\triple\\slash", ".", "\\triple\\slash" }, { "\\\\\\triple\\slash", ".", "\\triple\\slash" },
{ "\\\\double\\slash", ".", "\\\\double\\slash\\" }, { "\\\\double\\slash", ".", "\\\\double\\slash\\" },
{ "\\cwd\\..\\with\\.\\complexities\\", ".\\hello", "\\with\\complexities\\hello" }, { "\\cwd\\..\\with\\.\\complexities\\", ".\\hello", "\\with\\complexities\\hello" },
{ "\\", ".dot-dir", "\\.dot-dir" },
{ "\\cwd", "..", "\\" },
{ "\\etc", "hello\\..", "\\etc" },
{ "\\etc", "hello\\..\\", "\\etc" },
{ "\\", "..", "\\" },
{ "\\", "..\\", "\\" },
{ "\\", "\\..", "\\" },
{ "\\", "\\..\\", "\\" },
{ "\\", ".", "\\" },
{ "\\", ".\\", "\\" },
{ "\\", "\\.", "\\" },
{ "\\", "\\.\\", "\\" },
#endif #endif
}; };
const guint n_canonicalize_filename_checks = G_N_ELEMENTS (canonicalize_filename_checks); const guint n_canonicalize_filename_checks = G_N_ELEMENTS (canonicalize_filename_checks);