gfileutils: Implement GFileSetContentsFlags

This moves `write_to_temp_file()` into `g_file_set_contents_full()` and
coalesces its handling of `do_fsync` with the `rename_file()` call. It
adds support for `G_FILE_SET_CONTENTS_DURABLE` and
`G_FILE_SET_CONTENTS_NONE` — previously only
`G_FILE_SET_CONTENTS_CONSISTENT | G_FILE_SET_CONTENTS_ONLY_EXISTING` was
supported.

In the case that `G_FILE_SET_CONTENTS_CONSISTENT |
G_FILE_SET_CONTENTS_DURABLE` is set, an additional `fsync()` is now done
on the directory after renaming the temporary file.

In the case that `G_FILE_SET_CONTENTS_ONLY_EXISTING` isn’t set, the
`fsync()` after writing the temporary file will always be done (unless
the file system guarantees it never needs to be done).

In the case that only `G_FILE_SET_CONTENTS_DURABLE` is set, the
destination file will be written to directly (using this mode is not
really advised).

Signed-off-by: Philip Withnall <withnall@endlessm.com>

Fixes: #1302
This commit is contained in:
Philip Withnall 2020-05-27 13:26:07 +01:00
parent 387c159862
commit f3cea1c464

View File

@ -46,6 +46,10 @@
#define O_BINARY 0 #define O_BINARY 0
#endif #endif
#ifndef O_CLOEXEC
#define O_CLOEXEC 0
#endif
#include "gfileutils.h" #include "gfileutils.h"
#include "gstdio.h" #include "gstdio.h"
@ -1024,6 +1028,7 @@ g_file_get_contents (const gchar *filename,
static gboolean static gboolean
rename_file (const char *old_name, rename_file (const char *old_name,
const char *new_name, const char *new_name,
gboolean do_fsync,
GError **err) GError **err)
{ {
errno = 0; errno = 0;
@ -1047,6 +1052,28 @@ rename_file (const char *old_name,
return FALSE; return FALSE;
} }
/* In order to guarantee that the *new* contents of the file are seen in
* future, fsync() the directory containing the file. Otherwise if the file
* system was unmounted cleanly now, it would be undefined whether the old
* or new contents of the file were visible after recovery.
*
* This assumes the @old_name and @new_name are in the same directory. */
#ifdef HAVE_FSYNC
if (do_fsync)
{
gchar *dir = g_path_get_dirname (new_name);
int dir_fd = g_open (dir, O_RDONLY, 0);
if (dir_fd >= 0)
{
fsync (dir_fd);
g_close (dir_fd, NULL);
}
g_free (dir);
}
#endif /* HAVE_FSYNC */
return TRUE; return TRUE;
} }
@ -1065,21 +1092,44 @@ fd_should_be_fsynced (int fd,
/* On Linux, on btrfs, skip the fsync since rename-over-existing is /* On Linux, on btrfs, skip the fsync since rename-over-existing is
* guaranteed to be atomic and this is the only case in which we * guaranteed to be atomic and this is the only case in which we
* would fsync() anyway. * would fsync() anyway.
*
* See https://btrfs.wiki.kernel.org/index.php/FAQ#What_are_the_crash_guarantees_of_overwrite-by-rename.3F
*/ */
if (fstatfs (fd, &buf) == 0 && buf.f_type == BTRFS_SUPER_MAGIC) if ((flags & G_FILE_SET_CONTENTS_CONSISTENT) &&
fstatfs (fd, &buf) == 0 && buf.f_type == BTRFS_SUPER_MAGIC)
return FALSE; return FALSE;
} }
#endif /* BTRFS_SUPER_MAGIC */ #endif /* BTRFS_SUPER_MAGIC */
errno = 0;
/* If the final destination exists and is > 0 bytes, we want to sync the /* If the final destination exists and is > 0 bytes, we want to sync the
* newly written file to ensure the data is on disk when we rename over * newly written file to ensure the data is on disk when we rename over
* the destination. Otherwise if we get a system crash we can lose both * the destination. Otherwise if we get a system crash we can lose both
* the new and the old file on some filesystems. (I.E. those that don't * the new and the old file on some filesystems. (I.E. those that don't
* guarantee the data is written to the disk before the metadata.) * guarantee the data is written to the disk before the metadata.)
*
* There is no difference (in file system terms) if the old file doesnt
* already exist, apart from the fact that if the system crashes and the new
* data hasnt been fsync()ed, there is only one bit of old data to lose (that
* the file didnt exist in the first place). In some situations, such as
* trashing files, the old file never exists, so it seems reasonable to avoid
* the fsync(). This is not a widely applicable optimisation though.
*/ */
return (g_lstat (test_file, &statbuf) == 0 && statbuf.st_size > 0); if ((flags & (G_FILE_SET_CONTENTS_CONSISTENT | G_FILE_SET_CONTENTS_DURABLE)) &&
(flags & G_FILE_SET_CONTENTS_ONLY_EXISTING))
{
errno = 0;
if (g_lstat (test_file, &statbuf) == 0)
return (statbuf.st_size > 0);
else if (errno == ENOENT)
return FALSE;
else
return TRUE; /* lstat() failed; be cautious */
}
else
{
return (flags & (G_FILE_SET_CONTENTS_CONSISTENT | G_FILE_SET_CONTENTS_DURABLE));
}
#else /* if !HAVE_FSYNC */ #else /* if !HAVE_FSYNC */
return FALSE; return FALSE;
#endif /* !HAVE_FSYNC */ #endif /* !HAVE_FSYNC */
@ -1159,43 +1209,6 @@ steal_fd (int *fd_ptr)
return fd; return fd;
} }
static gchar *
write_to_temp_file (const gchar *contents,
gsize length,
const gchar *dest_file,
GError **err)
{
gchar *tmp_name = NULL;
int fd;
gboolean do_fsync;
tmp_name = g_strdup_printf ("%s.XXXXXX", dest_file);
errno = 0;
fd = g_mkstemp_full (tmp_name, O_RDWR | O_BINARY, 0666);
if (fd == -1)
{
int saved_errno = errno;
set_file_error (err,
tmp_name, _("Failed to create file “%s”: %s"),
saved_errno);
g_free (tmp_name);
return NULL;
}
do_fsync = fd_should_be_fsynced (fd, dest_file, flags);
if (!write_to_file (contents, length, steal_fd (&fd), tmp_name, do_fsync, err))
{
g_unlink (tmp_name);
g_free (tmp_name);
return NULL;
}
return g_steal_pointer (&tmp_name);
}
/** /**
* g_file_set_contents: * g_file_set_contents:
* @filename: (type filename): name of a file to write @contents to, in the GLib file name * @filename: (type filename): name of a file to write @contents to, in the GLib file name
@ -1235,8 +1248,20 @@ g_file_set_contents (const gchar *filename,
* Writes all of @contents to a file named @filename, with good error checking. * Writes all of @contents to a file named @filename, with good error checking.
* If a file called @filename already exists it will be overwritten. * If a file called @filename already exists it will be overwritten.
* *
* This write is atomic in the sense that it is first written to a temporary * @flags control the properties of the write operation: whether its atomic,
* file which is then renamed to the final name. Notes: * and what the tradeoff is between returning quickly or being resilient to
* system crashes.
*
* As this function performs file I/O, it is recommended to not call it anywhere
* where blocking would cause problems, such as in the main loop of a graphical
* application. In particular, if @flags has any value other than
* %G_FILE_SET_CONTENTS_NONE then this function may call `fsync()`.
*
* If %G_FILE_SET_CONTENTS_CONSISTENT is set in @flags, the operation is atomic
* in the sense that it is first written to a temporary file which is then
* renamed to the final name.
*
* Notes:
* *
* - On UNIX, if @filename already exists hard links to @filename will break. * - On UNIX, if @filename already exists hard links to @filename will break.
* Also since the file is recreated, existing permissions, access control * Also since the file is recreated, existing permissions, access control
@ -1244,15 +1269,17 @@ g_file_set_contents (const gchar *filename,
* the link itself will be replaced, not the linked file. * the link itself will be replaced, not the linked file.
* *
* - On UNIX, if @filename already exists and is non-empty, and if the system * - On UNIX, if @filename already exists and is non-empty, and if the system
* supports it (via a journalling filesystem or equivalent), the fsync() * supports it (via a journalling filesystem or equivalent), and if
* call (or equivalent) will be used to ensure atomic replacement: @filename * %G_FILE_SET_CONTENTS_CONSISTENT is set in @flags, the `fsync()` call (or
* equivalent) will be used to ensure atomic replacement: @filename
* will contain either its old contents or @contents, even in the face of * will contain either its old contents or @contents, even in the face of
* system power loss, the disk being unsafely removed, etc. * system power loss, the disk being unsafely removed, etc.
* *
* - On UNIX, if @filename does not already exist or is empty, there is a * - On UNIX, if @filename does not already exist or is empty, there is a
* possibility that system power loss etc. after calling this function will * possibility that system power loss etc. after calling this function will
* leave @filename empty or full of NUL bytes, depending on the underlying * leave @filename empty or full of NUL bytes, depending on the underlying
* filesystem. * filesystem, unless %G_FILE_SET_CONTENTS_DURABLE and
* %G_FILE_SET_CONTENTS_CONSISTENT are set in @flags.
* *
* - On Windows renaming a file will not remove an existing file with the * - On Windows renaming a file will not remove an existing file with the
* new name, so on Windows there is a race condition between the existing * new name, so on Windows there is a race condition between the existing
@ -1280,34 +1307,67 @@ g_file_set_contents_full (const gchar *filename,
GFileSetContentsFlags flags, GFileSetContentsFlags flags,
GError **error) GError **error)
{ {
gchar *tmp_filename;
gboolean retval;
GError *rename_error = NULL;
g_return_val_if_fail (filename != NULL, FALSE); g_return_val_if_fail (filename != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE); g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
g_return_val_if_fail (contents != NULL || length == 0, FALSE); g_return_val_if_fail (contents != NULL || length == 0, FALSE);
g_return_val_if_fail (length >= -1, FALSE); g_return_val_if_fail (length >= -1, FALSE);
/* @flags are handled as follows:
* - %G_FILE_SET_CONTENTS_NONE: write directly to @filename, no fsync()s
* - %G_FILE_SET_CONTENTS_CONSISTENT: write to temp file, fsync() it, rename()
* - %G_FILE_SET_CONTENTS_CONSISTENT | ONLY_EXISTING: as above, but skip the
* fsync() if @filename doesnt exist or is empty
* - %G_FILE_SET_CONTENTS_DURABLE: write directly to @filename, fsync() it
* - %G_FILE_SET_CONTENTS_DURABLE | ONLY_EXISTING: as above, but skip the
* fsync() if @filename doesnt exist or is empty
* - %G_FILE_SET_CONTENTS_CONSISTENT | DURABLE: write to temp file, fsync()
* it, rename(), fsync() containing directory
* - %G_FILE_SET_CONTENTS_CONSISTENT | DURABLE | ONLY_EXISTING: as above, but
* skip both fsync()s if @filename doesnt exist or is empty
*/
if (length < 0) if (length < 0)
length = strlen (contents); length = strlen (contents);
tmp_filename = write_to_temp_file (contents, (gsize) length, filename, error); if (flags & G_FILE_SET_CONTENTS_CONSISTENT)
if (!tmp_filename)
{ {
gchar *tmp_filename = NULL;
GError *rename_error = NULL;
gboolean retval;
int fd;
gboolean do_fsync;
tmp_filename = g_strdup_printf ("%s.XXXXXX", filename);
errno = 0;
fd = g_mkstemp_full (tmp_filename, O_RDWR | O_BINARY, 0666);
if (fd == -1)
{
int saved_errno = errno;
set_file_error (error,
tmp_filename, _("Failed to create file “%s”: %s"),
saved_errno);
retval = FALSE; retval = FALSE;
goto out; goto consistent_out;
} }
if (!rename_file (tmp_filename, filename, &rename_error)) do_fsync = fd_should_be_fsynced (fd, filename, flags);
if (!write_to_file (contents, length, steal_fd (&fd), tmp_filename, do_fsync, error))
{
g_unlink (tmp_filename);
retval = FALSE;
goto consistent_out;
}
if (!rename_file (tmp_filename, filename, do_fsync, &rename_error))
{ {
#ifndef G_OS_WIN32 #ifndef G_OS_WIN32
g_unlink (tmp_filename); g_unlink (tmp_filename);
g_propagate_error (error, rename_error); g_propagate_error (error, rename_error);
retval = FALSE; retval = FALSE;
goto out; goto consistent_out;
#else /* G_OS_WIN32 */ #else /* G_OS_WIN32 */
@ -1320,7 +1380,7 @@ g_file_set_contents_full (const gchar *filename,
g_unlink (tmp_filename); g_unlink (tmp_filename);
g_propagate_error (error, rename_error); g_propagate_error (error, rename_error);
retval = FALSE; retval = FALSE;
goto out; goto consistent_out;
} }
g_error_free (rename_error); g_error_free (rename_error);
@ -1334,24 +1394,68 @@ g_file_set_contents_full (const gchar *filename,
saved_errno); saved_errno);
g_unlink (tmp_filename); g_unlink (tmp_filename);
retval = FALSE; retval = FALSE;
goto out; goto consistent_out;
} }
if (!rename_file (tmp_filename, filename, error)) if (!rename_file (tmp_filename, filename, flags, error))
{ {
g_unlink (tmp_filename); g_unlink (tmp_filename);
retval = FALSE; retval = FALSE;
goto out; goto consistent_out;
} }
#endif #endif /* G_OS_WIN32 */
} }
retval = TRUE; retval = TRUE;
out: consistent_out:
g_free (tmp_filename); g_free (tmp_filename);
return retval; return retval;
}
else
{
int direct_fd;
int open_flags;
gboolean do_fsync;
open_flags = O_RDWR | O_BINARY | O_CREAT | O_CLOEXEC;
#ifdef O_NOFOLLOW
/* Windows doesnt have symlinks, so O_NOFOLLOW is unnecessary there. */
open_flags |= O_NOFOLLOW;
#endif
errno = 0;
direct_fd = g_open (filename, open_flags, 0666);
if (direct_fd < 0)
{
int saved_errno = errno;
#ifdef O_NOFOLLOW
/* ELOOP indicates that @filename is a symlink, since we used
* O_NOFOLLOW (alternately it could indicate that @filename contains
* looping or too many symlinks). In either case, try again on the
* %G_FILE_SET_CONTENTS_CONSISTENT code path. */
if (saved_errno == ELOOP)
return g_file_set_contents_full (filename, contents, length,
flags | G_FILE_SET_CONTENTS_CONSISTENT,
error);
#endif
set_file_error (error,
filename, _("Failed to open file “%s”: %s"),
saved_errno);
return FALSE;
}
do_fsync = fd_should_be_fsynced (direct_fd, filename, flags);
if (!write_to_file (contents, length, steal_fd (&direct_fd), filename,
do_fsync, error))
return FALSE;
}
return TRUE;
} }
/* /*