/* GIO - GLib Input, Output and Streaming Library
*
* Copyright 2025 Red Hat, Inc.
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, see .
*/
#include "config.h"
#include "gcancellable.h"
#include "gdbusnamewatching.h"
#include "gdbusproxy.h"
#include "ginitable.h"
#include "gioerror.h"
#include "giomodule-priv.h"
#include "glibintl.h"
#include "glib/glib-private.h"
#include "glib/gstdio.h"
#include "gmemorymonitor.h"
#include "gmemorymonitorbase.h"
#include "gmemorymonitorpsi.h"
#include
#include
#include
/**
* GMemoryMonitorPsi:
*
* A Linux [iface@Gio.MemoryMonitor] which uses the kernel
* [pressure stall information](https://www.kernel.org/doc/html/latest/accounting/psi.html) (PSI).
*
* When it receives a PSI event, it emits
* [signal@Gio.MemoryMonitor::low-memory-warning] with an appropriate warning
* level.
*
* Since: 2.86
*/
/* Unprivileged users can also create monitors, with
* the only limitation that the window size must be a
* `multiple of 2s`, in order to prevent excessive resource usage.
* see: https://www.kernel.org/doc/html/latest/accounting/psi.html*/
#define PSI_WINDOW_SEC 2
typedef enum {
PROP_PROC_PATH = 1,
} GMemoryMonitorPsiProperty;
typedef enum
{
MEMORY_PRESSURE_MONITOR_TRIGGER_SOME,
MEMORY_PRESSURE_MONITOR_TRIGGER_FULL,
MEMORY_PRESSURE_MONITOR_TRIGGER_MFD
} MemoryPressureMonitorTriggerType;
/* Each trigger here results in an open fd for the lifetime
* of the `GMemoryMonitor`, so don’t add too many */
static const struct
{
MemoryPressureMonitorTriggerType trigger_type;
int threshold_ms;
} triggers[G_MEMORY_MONITOR_LOW_MEMORY_LEVEL_COUNT] = {
{ MEMORY_PRESSURE_MONITOR_TRIGGER_SOME, 70 }, /* 70ms out of 2sec for partial stall */
{ MEMORY_PRESSURE_MONITOR_TRIGGER_SOME, 100 }, /* 100ms out of 2sec for partial stall */
{ MEMORY_PRESSURE_MONITOR_TRIGGER_FULL, 100 }, /* 100ms out of 2sec for complete stall */
};
typedef struct
{
GSource source;
GPollFD *pollfd;
GMemoryMonitorLowMemoryLevel level_type;
GWeakRef monitor_weak;
} MemoryMonitorSource;
typedef gboolean (*MemoryMonitorCallbackFunc) (GMemoryMonitorPsi *monitor,
GMemoryMonitorLowMemoryLevel level_type,
void *user_data);
#define G_MEMORY_MONITOR_PSI_GET_INITABLE_IFACE(o) (G_TYPE_INSTANCE_GET_INTERFACE ((o), G_TYPE_INITABLE, GInitable))
static void g_memory_monitor_psi_iface_init (GMemoryMonitorInterface *iface);
static void g_memory_monitor_psi_initable_iface_init (GInitableIface *iface);
struct _GMemoryMonitorPsi
{
GMemoryMonitorBase parent_instance;
GMainContext *worker; /* (unowned) */
GSource *triggers[G_MEMORY_MONITOR_LOW_MEMORY_LEVEL_COUNT]; /* (owned) (nullable) */
char *cg_path;
char *proc_path;
gboolean proc_override;
};
G_DEFINE_TYPE_WITH_CODE (GMemoryMonitorPsi, g_memory_monitor_psi, G_TYPE_MEMORY_MONITOR_BASE,
G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
g_memory_monitor_psi_initable_iface_init)
G_IMPLEMENT_INTERFACE (G_TYPE_MEMORY_MONITOR,
g_memory_monitor_psi_iface_init)
_g_io_modules_ensure_extension_points_registered ();
g_io_extension_point_implement (G_MEMORY_MONITOR_EXTENSION_POINT_NAME,
g_define_type_id,
"psi",
20))
static void
g_memory_monitor_psi_init (GMemoryMonitorPsi *psi)
{
}
static void
g_memory_monitor_psi_set_property (GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
GMemoryMonitorPsi *monitor = G_MEMORY_MONITOR_PSI (object);
switch ((GMemoryMonitorPsiProperty) prop_id)
{
case PROP_PROC_PATH:
/* Construct only */
g_assert (monitor->proc_path == NULL);
monitor->proc_path = g_value_dup_string (value);
if (monitor->proc_path != NULL)
monitor->proc_override = TRUE;
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static void
g_memory_monitor_psi_get_property (GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
GMemoryMonitorPsi *monitor = G_MEMORY_MONITOR_PSI (object);
switch ((GMemoryMonitorPsiProperty) prop_id)
{
case PROP_PROC_PATH:
g_value_set_string (value, monitor->proc_override ? monitor->proc_path : NULL);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static gboolean
g_memory_monitor_low_trigger_cb (GMemoryMonitorPsi *monitor,
GMemoryMonitorLowMemoryLevel level_type,
void *user_data)
{
gdouble mem_ratio;
/* Should be executed in the worker context */
g_assert (g_main_context_is_owner (monitor->worker));
mem_ratio = g_memory_monitor_base_query_mem_ratio ();
/* if the test is running, skip memory ratio test */
if (!monitor->proc_override)
{
/* if mem free ratio > 0.5, don't signal */
if (mem_ratio < 0.0)
return G_SOURCE_REMOVE;
if (mem_ratio > 0.5)
return G_SOURCE_CONTINUE;
}
g_memory_monitor_base_send_event_to_user (G_MEMORY_MONITOR_BASE (monitor), level_type);
return G_SOURCE_CONTINUE;
}
static gboolean
event_check (GSource *source)
{
MemoryMonitorSource *ev_source = (MemoryMonitorSource *) source;
if (ev_source->pollfd->revents)
return G_SOURCE_CONTINUE;
return G_SOURCE_REMOVE;
}
static gboolean
event_dispatch (GSource *source,
GSourceFunc callback,
gpointer user_data)
{
MemoryMonitorSource *ev_source = (MemoryMonitorSource *) source;
GMemoryMonitorPsi *monitor = NULL;
monitor = g_weak_ref_get (&ev_source->monitor_weak);
if (monitor == NULL)
return G_SOURCE_REMOVE;
if (monitor->proc_override)
{
if (!(g_source_query_unix_fd (source, ev_source->pollfd) & G_IO_IN))
{
g_object_unref (monitor);
return G_SOURCE_CONTINUE;
}
}
else
{
if (!(g_source_query_unix_fd (source, ev_source->pollfd) & G_IO_PRI))
{
g_object_unref (monitor);
return G_SOURCE_CONTINUE;
}
}
if (callback)
((MemoryMonitorCallbackFunc) callback) (monitor, ev_source->level_type, user_data);
g_object_unref (monitor);
return G_SOURCE_CONTINUE;
}
static void
event_finalize (GSource *source)
{
MemoryMonitorSource *ev_source = (MemoryMonitorSource *) source;
g_weak_ref_clear (&ev_source->monitor_weak);
}
static GSourceFuncs memory_monitor_event_funcs = {
.check = event_check,
.dispatch = event_dispatch,
.finalize = event_finalize,
};
static GSource *
g_memory_monitor_create_source (GMemoryMonitorPsi *monitor,
int fd,
GMemoryMonitorLowMemoryLevel level_type,
gboolean is_path_override)
{
MemoryMonitorSource *source;
source = (MemoryMonitorSource *) g_source_new (&memory_monitor_event_funcs, sizeof (MemoryMonitorSource));
if (is_path_override)
source->pollfd = g_source_add_unix_fd ((GSource *) source, fd, G_IO_IN | G_IO_ERR);
else
source->pollfd = g_source_add_unix_fd ((GSource *) source, fd, G_IO_PRI | G_IO_ERR);
source->level_type = level_type;
g_weak_ref_init (&source->monitor_weak, monitor);
return (GSource *) source;
}
static gboolean
g_memory_monitor_psi_calculate_mem_pressure_path (GMemoryMonitorPsi *monitor,
GError **error)
{
pid_t pid;
gchar *path_read = NULL;
gchar *replacement = NULL;
GRegex *regex = NULL;
if (!monitor->proc_override)
{
pid = getpid ();
monitor->proc_path = g_strdup_printf ("/proc/%d/cgroup", pid);
}
if (!g_file_get_contents (monitor->proc_path, &path_read, NULL, error))
{
g_free (path_read);
return FALSE;
}
/* cgroupv2 is only supportted and the format is shown as follows:
* ex: 0::/user.slice/user-0.slice/session-c3.scope */
regex = g_regex_new ("^0::", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, error);
if (!g_regex_match (regex, path_read, G_REGEX_MATCH_DEFAULT, NULL))
{
g_debug ("Unsupported cgroup path information.");
g_free (path_read);
g_regex_unref (regex);
return FALSE;
}
/* drop "0::" */
replacement = g_regex_replace (regex, path_read,
-1, 0,
"", G_REGEX_MATCH_DEFAULT, error);
if (replacement == NULL)
{
g_debug ("Unsupported cgroup path format.");
g_free (path_read);
g_regex_unref (regex);
return FALSE;
}
replacement = g_strstrip (replacement);
if (monitor->proc_override)
{
monitor->cg_path = g_steal_pointer (&replacement);
g_free (path_read);
g_regex_unref (regex);
return TRUE;
}
monitor->cg_path = g_build_filename ("/sys/fs/cgroup", replacement, "memory.pressure", NULL);
g_debug ("cgroup path is %s", monitor->cg_path);
g_free (path_read);
g_free (replacement);
g_regex_unref (regex);
return g_file_test (monitor->cg_path, G_FILE_TEST_EXISTS);
}
static GSource *
g_memory_monitor_psi_setup_trigger (GMemoryMonitorPsi *monitor,
GMemoryMonitorLowMemoryLevel level_type,
int threshold_us,
int window_us,
GError **error)
{
GSource *source;
int fd;
int ret;
size_t wlen;
gchar *trigger = NULL;
fd = g_open (monitor->cg_path, O_RDWR | O_NONBLOCK | O_CLOEXEC);
if (fd < 0)
{
int errsv = errno;
g_debug ("Error on opening %s: %s", monitor->cg_path, g_strerror (errsv));
g_set_error (error, G_IO_ERROR, g_io_error_from_errno (errsv),
"Error on opening ‘%s’: %s", monitor->cg_path, g_strerror (errsv));
return NULL;
}
/* The kernel PSI [1] trigger format is:
*