From 9a30a495ec36e247492834ea23f2812e453e77a3 Mon Sep 17 00:00:00 2001 From: Sebastian Wilhelmi Date: Fri, 26 Nov 2021 13:43:56 +0000 Subject: [PATCH] gfileutils: Improve performance of g_canonicalize_filename() Improve the performance of canonicalising filenames with many `..` or `.` components, by modifying the path inline rather than calling `memmove()`. Signed-off-by: Philip Withnall Fixes: #2541 --- glib/gfileutils.c | 115 +++++++++++++++++++++++----------------------- tests/testglib.c | 36 +++++++++++++++ 2 files changed, 94 insertions(+), 57 deletions(-) diff --git a/glib/gfileutils.c b/glib/gfileutils.c index 92e06ebc8..281a522eb 100644 --- a/glib/gfileutils.c +++ b/glib/gfileutils.c @@ -2736,8 +2736,7 @@ gchar * g_canonicalize_filename (const gchar *filename, const gchar *relative_to) { - gchar *canon, *start, *p, *q; - guint i; + gchar *canon, *input, *output, *start; g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL); @@ -2770,74 +2769,76 @@ g_canonicalize_filename (const gchar *filename, return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL); } - /* POSIX allows double slashes at the start to - * mean something special (as does windows too). - * So, "//" != "/", but more than two slashes + /* Find the first dir separator and use the canonical dir separator. */ + for (output = start - 1; + (output >= canon) && G_IS_DIR_SEPARATOR (*output); + output--) + *output = G_DIR_SEPARATOR; + + output += 2; + + /* POSIX allows double slashes at the start to mean something special + * (as does windows too). So, "//" != "/", but more than two slashes * is treated as "/". */ - i = 0; - for (p = start - 1; - (p >= canon) && - G_IS_DIR_SEPARATOR (*p); - p--) - i++; - if (i > 2) - { - i -= 1; - start -= i; - memmove (start, start+i, strlen (start+i) + 1); - } + if (start - output == 1) + output++; - /* Make sure we're using the canonical dir separator */ - p++; - while (p < start && G_IS_DIR_SEPARATOR (*p)) - *p++ = G_DIR_SEPARATOR; - - p = start; - while (*p != 0) + input = start; + while (*input) { - if (p[0] == '.' && (p[1] == 0 || G_IS_DIR_SEPARATOR (p[1]))) + /* input points to the next non-separator to be processed. */ + /* output points to the next location to write to. */ + g_assert (input > canon && G_IS_DIR_SEPARATOR (input[-1])); + g_assert (output > canon && G_IS_DIR_SEPARATOR (output[-1])); + g_assert (input >= output); + + /* Ignore repeated dir separators. */ + while (G_IS_DIR_SEPARATOR (input[0])) + input++; + + /* Ignore single dot directory components. */ + if (input[0] == '.' && (input[1] == 0 || G_IS_DIR_SEPARATOR (input[1]))) { - memmove (p, p+1, strlen (p+1)+1); + if (input[1] == 0) + break; + input += 2; } - else if (p[0] == '.' && p[1] == '.' && (p[2] == 0 || G_IS_DIR_SEPARATOR (p[2]))) + /* Remove double-dot directory components along with the preceding + * path component. */ + else if (input[0] == '.' && input[1] == '.' && + (input[2] == 0 || G_IS_DIR_SEPARATOR (input[2]))) { - q = p + 2; - /* Skip previous separator */ - p = p - 2; - if (p < start) - p = start; - while (p > start && !G_IS_DIR_SEPARATOR (*p)) - p--; - if (G_IS_DIR_SEPARATOR (*p)) - *p++ = G_DIR_SEPARATOR; - memmove (p, q, strlen (q)+1); + if (output > start) + { + do + { + output--; + } + while (!G_IS_DIR_SEPARATOR (output[-1]) && output > start); + } + if (input[2] == 0) + break; + input += 3; } + /* Copy the input to the output until the next separator, + * while converting it to canonical separator */ else { - /* Skip until next separator */ - while (*p != 0 && !G_IS_DIR_SEPARATOR (*p)) - p++; - - if (*p != 0) - { - /* Canonicalize one separator */ - *p++ = G_DIR_SEPARATOR; - } + while (*input && !G_IS_DIR_SEPARATOR (*input)) + *output++ = *input++; + if (input[0] == 0) + break; + input++; + *output++ = G_DIR_SEPARATOR; } - - /* Remove additional separators */ - q = p; - while (*q && G_IS_DIR_SEPARATOR (*q)) - q++; - - if (p != q) - memmove (p, q, strlen (q) + 1); } - /* Remove trailing slashes */ - if (p > start && G_IS_DIR_SEPARATOR (*(p-1))) - *(p-1) = 0; + /* Remove a potentially trailing dir separator */ + if (output > start && G_IS_DIR_SEPARATOR (output[-1])) + output--; + + *output = '\0'; return canon; } diff --git a/tests/testglib.c b/tests/testglib.c index 071afdc1d..a3546fae6 100644 --- a/tests/testglib.c +++ b/tests/testglib.c @@ -1051,6 +1051,18 @@ test_paths (void) { "///triple/slash", ".", "/triple/slash" }, { "//double/slash", ".", "//double/slash" }, { "/cwd/../with/./complexities/", "./hello", "/with/complexities/hello" }, + { "/", ".dot-dir", "/.dot-dir" }, + { "/cwd", "..", "/" }, + { "/etc", "hello/..", "/etc" }, + { "/etc", "hello/../", "/etc" }, + { "/", "..", "/" }, + { "/", "../", "/" }, + { "/", "/..", "/" }, + { "/", "/../", "/" }, + { "/", ".", "/" }, + { "/", "./", "/" }, + { "/", "/.", "/" }, + { "/", "/./", "/" }, #else { "/etc", "../usr/share", "\\usr\\share" }, { "/", "/foo/bar", "\\foo\\bar" }, @@ -1066,6 +1078,18 @@ test_paths (void) { "///triple/slash", ".", "\\triple\\slash" }, { "//double/slash", ".", "//double/slash\\" }, { "/cwd/../with/./complexities/", "./hello", "\\with\\complexities\\hello" }, + { "/", ".dot-dir", "/.dot-dir" }, + { "/cwd", "..", "/" }, + { "/etc", "hello/..", "/etc" }, + { "/etc", "hello/../", "/etc" }, + { "/", "..", "/" }, + { "/", "../", "/" }, + { "/", "/..", "/" }, + { "/", "/../", "/" }, + { "/", ".", "/" }, + { "/", "./", "/" }, + { "/", "/.", "/" }, + { "/", "/./", "/" }, { "\\etc", "..\\usr\\share", "\\usr\\share" }, { "\\", "\\foo\\bar", "\\foo\\bar" }, @@ -1081,6 +1105,18 @@ test_paths (void) { "\\\\\\triple\\slash", ".", "\\triple\\slash" }, { "\\\\double\\slash", ".", "\\\\double\\slash\\" }, { "\\cwd\\..\\with\\.\\complexities\\", ".\\hello", "\\with\\complexities\\hello" }, + { "\\", ".dot-dir", "\\.dot-dir" }, + { "\\cwd", "..", "\\" }, + { "\\etc", "hello\\..", "\\etc" }, + { "\\etc", "hello\\..\\", "\\etc" }, + { "\\", "..", "\\" }, + { "\\", "..\\", "\\" }, + { "\\", "\\..", "\\" }, + { "\\", "\\..\\", "\\" }, + { "\\", ".", "\\" }, + { "\\", ".\\", "\\" }, + { "\\", "\\.", "\\" }, + { "\\", "\\.\\", "\\" }, #endif }; const guint n_canonicalize_filename_checks = G_N_ELEMENTS (canonicalize_filename_checks);