From 0e5d9fd249e357934c81a40a9ea4693aee07f5f5 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Fri, 17 Mar 2023 13:17:40 +0000 Subject: [PATCH] gfile: Support copy_file_range() for file copies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it can’t be used in all situations, it is a little bit faster than `splice()` in some situations, basically if the file system supports copy on write. In other situations it’s no slower than `splice()`. See `man copy_file_range` for the situations where it doesn’t work. In all of these situations, it will return an error, and the GLib code will fall through and try the existing `splice()` copy code instead. From my testing of `time gio copy A B` with a 9GB file, the `splice()` code path takes 22s, and the `copy_file_range()` code path takes 20s. Signed-off-by: Philip Withnall Fixes: #2863 --- gio/gfile.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++ meson.build | 1 + 2 files changed, 141 insertions(+) diff --git a/gio/gfile.c b/gio/gfile.c index 4558f2d3b..31da0b05e 100644 --- a/gio/gfile.c +++ b/gio/gfile.c @@ -3007,6 +3007,122 @@ copy_stream_with_progress (GInputStream *in, return res; } +#ifdef HAVE_COPY_FILE_RANGE +static gboolean +do_copy_file_range (int fd_in, + loff_t *off_in, + int fd_out, + loff_t *off_out, + size_t len, + size_t *bytes_transferred, + GError **error) +{ + ssize_t result; + + do + { + result = copy_file_range (fd_in, off_in, fd_out, off_out, len, 0); + + if (result == -1) + { + int errsv = errno; + + if (errsv == EINTR) + { + continue; + } + else if (errsv == ENOSYS || errsv == EINVAL || errsv == EOPNOTSUPP || errsv == EXDEV) + { + g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED, + _("Copy file range not supported")); + } + else + { + g_set_error (error, G_IO_ERROR, + g_io_error_from_errno (errsv), + _("Error splicing file: %s"), + g_strerror (errsv)); + } + + return FALSE; + } + } while (result == -1); + + g_assert (result >= 0); + *bytes_transferred = result; + + return TRUE; +} + +static gboolean +copy_file_range_with_progress (GInputStream *in, + GFileInfo *in_info, + GOutputStream *out, + GCancellable *cancellable, + GFileProgressCallback progress_callback, + gpointer progress_callback_data, + GError **error) +{ + goffset total_size, last_notified_size; + size_t copy_len; + loff_t offset_in; + loff_t offset_out; + int fd_in, fd_out; + + fd_in = g_file_descriptor_based_get_fd (G_FILE_DESCRIPTOR_BASED (in)); + fd_out = g_file_descriptor_based_get_fd (G_FILE_DESCRIPTOR_BASED (out)); + + g_assert (g_file_info_has_attribute (in_info, G_FILE_ATTRIBUTE_STANDARD_SIZE)); + total_size = g_file_info_get_size (in_info); + + /* Bail out if the reported size of the file is zero. It might be zero, but it + * might also just be a kernel file in /proc. They report their file size as + * zero, but then have data when you start reading. Go to the fallback code + * path for those. */ + if (total_size == 0) + { + g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED, + _("Copy file range not supported")); + return FALSE; + } + + offset_in = offset_out = 0; + copy_len = total_size; + last_notified_size = 0; + + /* Call copy_file_range() in a loop until the whole contents are copied. For + * smaller files, this loop will iterate only once. For larger files, the + * kernel (at least, kernel 6.1.6) will return after 2GB anyway, so that gives + * us more loop iterations and more progress reporting. */ + while (copy_len > 0) + { + size_t n_copied; + + if (g_cancellable_set_error_if_cancelled (cancellable, error) || + !do_copy_file_range (fd_in, &offset_in, fd_out, &offset_out, copy_len, &n_copied, error)) + return FALSE; + + if (n_copied == 0) + break; + + g_assert (n_copied <= copy_len); + copy_len -= n_copied; + + if (progress_callback) + { + progress_callback (offset_in, total_size, progress_callback_data); + last_notified_size = total_size; + } + } + + /* Make sure we send full copied size */ + if (progress_callback && last_notified_size != total_size) + progress_callback (offset_in, total_size, progress_callback_data); + + return TRUE; +} +#endif /* HAVE_COPY_FILE_RANGE */ + #ifdef HAVE_SPLICE static gboolean @@ -3401,6 +3517,30 @@ file_copy_fallback (GFile *source, } #endif +#ifdef HAVE_COPY_FILE_RANGE + if (G_IS_FILE_DESCRIPTOR_BASED (in) && G_IS_FILE_DESCRIPTOR_BASED (out)) + { + GError *copy_file_range_error = NULL; + + if (copy_file_range_with_progress (in, info, out, cancellable, + progress_callback, progress_callback_data, + ©_file_range_error)) + { + ret = TRUE; + goto out; + } + else if (!g_error_matches (copy_file_range_error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED)) + { + g_propagate_error (error, g_steal_pointer (©_file_range_error)); + goto out; + } + else + { + g_clear_error (©_file_range_error); + } + } +#endif /* HAVE_COPY_FILE_RANGE */ + #ifdef HAVE_SPLICE if (G_IS_FILE_DESCRIPTOR_BASED (in) && G_IS_FILE_DESCRIPTOR_BASED (out)) { diff --git a/meson.build b/meson.build index f275a3475..a5aade045 100644 --- a/meson.build +++ b/meson.build @@ -611,6 +611,7 @@ endif functions = [ 'accept4', 'close_range', + 'copy_file_range', 'endmntent', 'endservent', 'epoll_create',