mirror of
				https://gitlab.gnome.org/GNOME/glib.git
				synced 2025-10-25 22:42:17 +02:00 
			
		
		
		
	gfileutils: Improve performance of g_canonicalize_filename()
Improve the performance of canonicalising filenames with many `..` or `.` components, by modifying the path inline rather than calling `memmove()`. Signed-off-by: Philip Withnall <pwithnall@endlessos.org> Fixes: #2541
This commit is contained in:
		
				
					committed by
					
						 Philip Withnall
						Philip Withnall
					
				
			
			
				
	
			
			
			
						parent
						
							b7cec3d9f0
						
					
				
				
					commit
					28a15f95c4
				
			| @@ -2736,8 +2736,7 @@ gchar * | |||||||
| g_canonicalize_filename (const gchar *filename, | g_canonicalize_filename (const gchar *filename, | ||||||
|                          const gchar *relative_to) |                          const gchar *relative_to) | ||||||
| { | { | ||||||
|   gchar *canon, *start, *p, *q; |   gchar *canon, *input, *output, *start; | ||||||
|   guint i; |  | ||||||
|  |  | ||||||
|   g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL); |   g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL); | ||||||
|  |  | ||||||
| @@ -2770,74 +2769,76 @@ g_canonicalize_filename (const gchar *filename, | |||||||
|       return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL); |       return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   /* POSIX allows double slashes at the start to |   /* Find the first dir separator and use the canonical dir separator. */ | ||||||
|    * mean something special (as does windows too). |   for (output = start - 1; | ||||||
|    * So, "//" != "/", but more than two slashes |        (output >= canon) && G_IS_DIR_SEPARATOR (*output); | ||||||
|  |        output--) | ||||||
|  |     *output = G_DIR_SEPARATOR; | ||||||
|  |  | ||||||
|  |   output += 2; | ||||||
|  |  | ||||||
|  |   /* POSIX allows double slashes at the start to mean something special | ||||||
|  |    * (as does windows too). So, "//" != "/", but more than two slashes | ||||||
|    * is treated as "/". |    * is treated as "/". | ||||||
|    */ |    */ | ||||||
|   i = 0; |   if (start - output == 1) | ||||||
|   for (p = start - 1; |     output++; | ||||||
|        (p >= canon) && |  | ||||||
|          G_IS_DIR_SEPARATOR (*p); |  | ||||||
|        p--) |  | ||||||
|     i++; |  | ||||||
|   if (i > 2) |  | ||||||
|     { |  | ||||||
|       i -= 1; |  | ||||||
|       start -= i; |  | ||||||
|       memmove (start, start+i, strlen (start+i) + 1); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   /* Make sure we're using the canonical dir separator */ |   input = start; | ||||||
|   p++; |   while (*input) | ||||||
|   while (p < start && G_IS_DIR_SEPARATOR (*p)) |  | ||||||
|     *p++ = G_DIR_SEPARATOR; |  | ||||||
|  |  | ||||||
|   p = start; |  | ||||||
|   while (*p != 0) |  | ||||||
|     { |     { | ||||||
|       if (p[0] == '.' && (p[1] == 0 || G_IS_DIR_SEPARATOR (p[1]))) |       /* input points to the next non-separator to be processed. */ | ||||||
|  |       /* output points to the next location to write to. */ | ||||||
|  |       g_assert (input > canon && G_IS_DIR_SEPARATOR (input[-1])); | ||||||
|  |       g_assert (output > canon && G_IS_DIR_SEPARATOR (output[-1])); | ||||||
|  |       g_assert (input >= output); | ||||||
|  |  | ||||||
|  |       /* Ignore repeated dir separators. */ | ||||||
|  |       while (G_IS_DIR_SEPARATOR (input[0])) | ||||||
|  |        input++; | ||||||
|  |  | ||||||
|  |       /* Ignore single dot directory components. */ | ||||||
|  |       if (input[0] == '.' && (input[1] == 0 || G_IS_DIR_SEPARATOR (input[1]))) | ||||||
|         { |         { | ||||||
|           memmove (p, p+1, strlen (p+1)+1); |            if (input[1] == 0) | ||||||
|  |              break; | ||||||
|  |            input += 2; | ||||||
|         } |         } | ||||||
|       else if (p[0] == '.' && p[1] == '.' && (p[2] == 0 || G_IS_DIR_SEPARATOR (p[2]))) |       /* Remove double-dot directory components along with the preceding | ||||||
|  |        * path component. */ | ||||||
|  |       else if (input[0] == '.' && input[1] == '.' && | ||||||
|  |                (input[2] == 0 || G_IS_DIR_SEPARATOR (input[2]))) | ||||||
|         { |         { | ||||||
|           q = p + 2; |           if (output > start) | ||||||
|           /* Skip previous separator */ |             { | ||||||
|           p = p - 2; |               do | ||||||
|           if (p < start) |                 { | ||||||
|             p = start; |                   output--; | ||||||
|           while (p > start && !G_IS_DIR_SEPARATOR (*p)) |                 } | ||||||
|             p--; |               while (!G_IS_DIR_SEPARATOR (output[-1]) && output > start); | ||||||
|           if (G_IS_DIR_SEPARATOR (*p)) |             } | ||||||
|             *p++ = G_DIR_SEPARATOR; |           if (input[2] == 0) | ||||||
|           memmove (p, q, strlen (q)+1); |             break; | ||||||
|  |           input += 3; | ||||||
|         } |         } | ||||||
|  |       /* Copy the input to the output until the next separator, | ||||||
|  |        * while converting it to canonical separator */ | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|           /* Skip until next separator */ |           while (*input && !G_IS_DIR_SEPARATOR (*input)) | ||||||
|           while (*p != 0 && !G_IS_DIR_SEPARATOR (*p)) |             *output++ = *input++; | ||||||
|             p++; |           if (input[0] == 0) | ||||||
|  |             break; | ||||||
|           if (*p != 0) |           input++; | ||||||
|             { |           *output++ = G_DIR_SEPARATOR; | ||||||
|               /* Canonicalize one separator */ |  | ||||||
|               *p++ = G_DIR_SEPARATOR; |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|       /* Remove additional separators */ |  | ||||||
|       q = p; |  | ||||||
|       while (*q && G_IS_DIR_SEPARATOR (*q)) |  | ||||||
|         q++; |  | ||||||
|  |  | ||||||
|       if (p != q) |  | ||||||
|         memmove (p, q, strlen (q) + 1); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   /* Remove trailing slashes */ |   /* Remove a potentially trailing dir separator */ | ||||||
|   if (p > start && G_IS_DIR_SEPARATOR (*(p-1))) |   if (output > start && G_IS_DIR_SEPARATOR (output[-1])) | ||||||
|     *(p-1) = 0; |     output--; | ||||||
|  |  | ||||||
|  |   *output = '\0'; | ||||||
|  |  | ||||||
|   return canon; |   return canon; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1051,6 +1051,18 @@ test_paths (void) | |||||||
|     { "///triple/slash", ".", "/triple/slash" }, |     { "///triple/slash", ".", "/triple/slash" }, | ||||||
|     { "//double/slash", ".", "//double/slash" }, |     { "//double/slash", ".", "//double/slash" }, | ||||||
|     { "/cwd/../with/./complexities/", "./hello", "/with/complexities/hello" }, |     { "/cwd/../with/./complexities/", "./hello", "/with/complexities/hello" }, | ||||||
|  |     { "/", ".dot-dir", "/.dot-dir" }, | ||||||
|  |     { "/cwd", "..", "/" }, | ||||||
|  |     { "/etc", "hello/..", "/etc" }, | ||||||
|  |     { "/etc", "hello/../", "/etc" }, | ||||||
|  |     { "/", "..", "/" }, | ||||||
|  |     { "/", "../", "/" }, | ||||||
|  |     { "/", "/..", "/" }, | ||||||
|  |     { "/", "/../", "/" }, | ||||||
|  |     { "/", ".", "/" }, | ||||||
|  |     { "/", "./", "/" }, | ||||||
|  |     { "/", "/.", "/" }, | ||||||
|  |     { "/", "/./", "/" }, | ||||||
| #else | #else | ||||||
|     { "/etc", "../usr/share", "\\usr\\share" }, |     { "/etc", "../usr/share", "\\usr\\share" }, | ||||||
|     { "/", "/foo/bar", "\\foo\\bar" }, |     { "/", "/foo/bar", "\\foo\\bar" }, | ||||||
| @@ -1066,6 +1078,18 @@ test_paths (void) | |||||||
|     { "///triple/slash", ".", "\\triple\\slash" }, |     { "///triple/slash", ".", "\\triple\\slash" }, | ||||||
|     { "//double/slash", ".", "//double/slash\\" }, |     { "//double/slash", ".", "//double/slash\\" }, | ||||||
|     { "/cwd/../with/./complexities/", "./hello", "\\with\\complexities\\hello" }, |     { "/cwd/../with/./complexities/", "./hello", "\\with\\complexities\\hello" }, | ||||||
|  |     { "/", ".dot-dir", "/.dot-dir" }, | ||||||
|  |     { "/cwd", "..", "/" }, | ||||||
|  |     { "/etc", "hello/..", "/etc" }, | ||||||
|  |     { "/etc", "hello/../", "/etc" }, | ||||||
|  |     { "/", "..", "/" }, | ||||||
|  |     { "/", "../", "/" }, | ||||||
|  |     { "/", "/..", "/" }, | ||||||
|  |     { "/", "/../", "/" }, | ||||||
|  |     { "/", ".", "/" }, | ||||||
|  |     { "/", "./", "/" }, | ||||||
|  |     { "/", "/.", "/" }, | ||||||
|  |     { "/", "/./", "/" }, | ||||||
|  |  | ||||||
|     { "\\etc", "..\\usr\\share", "\\usr\\share" }, |     { "\\etc", "..\\usr\\share", "\\usr\\share" }, | ||||||
|     { "\\", "\\foo\\bar", "\\foo\\bar" }, |     { "\\", "\\foo\\bar", "\\foo\\bar" }, | ||||||
| @@ -1081,6 +1105,18 @@ test_paths (void) | |||||||
|     { "\\\\\\triple\\slash", ".", "\\triple\\slash" }, |     { "\\\\\\triple\\slash", ".", "\\triple\\slash" }, | ||||||
|     { "\\\\double\\slash", ".", "\\\\double\\slash\\" }, |     { "\\\\double\\slash", ".", "\\\\double\\slash\\" }, | ||||||
|     { "\\cwd\\..\\with\\.\\complexities\\", ".\\hello", "\\with\\complexities\\hello" }, |     { "\\cwd\\..\\with\\.\\complexities\\", ".\\hello", "\\with\\complexities\\hello" }, | ||||||
|  |     { "\\", ".dot-dir", "\\.dot-dir" }, | ||||||
|  |     { "\\cwd", "..", "\\" }, | ||||||
|  |     { "\\etc", "hello\\..", "\\etc" }, | ||||||
|  |     { "\\etc", "hello\\..\\", "\\etc" }, | ||||||
|  |     { "\\", "..", "\\" }, | ||||||
|  |     { "\\", "..\\", "\\" }, | ||||||
|  |     { "\\", "\\..", "\\" }, | ||||||
|  |     { "\\", "\\..\\", "\\" }, | ||||||
|  |     { "\\", ".", "\\" }, | ||||||
|  |     { "\\", ".\\", "\\" }, | ||||||
|  |     { "\\", "\\.", "\\" }, | ||||||
|  |     { "\\", "\\.\\", "\\" }, | ||||||
| #endif | #endif | ||||||
|   }; |   }; | ||||||
|   const guint n_canonicalize_filename_checks = G_N_ELEMENTS (canonicalize_filename_checks); |   const guint n_canonicalize_filename_checks = G_N_ELEMENTS (canonicalize_filename_checks); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user