From 71b8c4302bdf803e9fbdc53536d7503f64126c8534c895f61210bf8114e99cd2 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 14 Jan 2011 14:50:30 +0000 Subject: [PATCH] - Hopefully fix bnc#664105: Kernel segfault in kernel/timer.c - comm: stapio (process) - related to preloadtrace.ko OBS-URL: https://build.opensuse.org/package/show/devel:tools/systemtap?expand=0&rev=20 --- systemtap.changes | 6 + systemtap.spec | 4 + systemtap_fix_mod_vs_del_timer_race.patch | 183 ++++++++++++++++++ ...emtap_improve_gettimeofday_stability.patch | 141 ++++++++++++++ 4 files changed, 334 insertions(+) create mode 100644 systemtap_fix_mod_vs_del_timer_race.patch create mode 100644 systemtap_improve_gettimeofday_stability.patch diff --git a/systemtap.changes b/systemtap.changes index d870149..7961396 100644 --- a/systemtap.changes +++ b/systemtap.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Fri Jan 14 14:32:11 UTC 2011 - trenn@novell.com + +- Hopefully fix bnc#664105: + Kernel segfault in kernel/timer.c - comm: stapio (process) - related to preloadtrace.ko + ------------------------------------------------------------------- Mon Sep 13 11:25:26 UTC 2010 - coolo@novell.com diff --git a/systemtap.spec b/systemtap.spec index 2e349b3..b8bf8e6 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -41,6 +41,8 @@ Source: ftp://sources.redhat.com/pub/systemtap/snapshots/systemtap-%{pac Patch1: systemtap-docdir-fix.diff Patch2: systemtap-hppa.diff Patch3: systemtap-support-2.6.36.diff +Patch4: systemtap_improve_gettimeofday_stability.patch +Patch5: systemtap_fix_mod_vs_del_timer_race.patch Requires: libebl1 Requires: %{name}-runtime = %{version}-%{release} @@ -104,6 +106,8 @@ This package contains the support tools for static probes. %patch1 -p1 %patch2 %patch3 -p1 +%patch4 -p1 +%patch5 -p1 %build autoreconf -fi diff --git a/systemtap_fix_mod_vs_del_timer_race.patch b/systemtap_fix_mod_vs_del_timer_race.patch new file mode 100644 index 0000000..0d52871 --- /dev/null +++ b/systemtap_fix_mod_vs_del_timer_race.patch @@ -0,0 +1,183 @@ +From: Frank Ch. Eigler +Subject: PR10651 / RHBZ653286: mod_timer vs. del_timer_sync races +References: bnc#664105 +Patch-Mainline: yes +Git-commit: f2b610b6295628ae1fef744421c293883f1e4298 +Git-repo: git://sources.redhat.com/git/systemtap.git + +Signed-off-by: Thomas Renninger + +It appears possible for del_timer_sync (from outside) and mod_timer +(from within a timer callback) to race. Defeat this race by ensuring +that the timer callback checks an atomic_t flag before rescheduling +itself with mod_timer. + +* runtime/transport/relay_v2.c (transport_state): Turn into an atomic_t. + Update users. + (__stp_relay_wakeup_timer): Observe flag. + (_stp_transport_data_fs_{start,stop}): Update flag before timer manipulations. +* runtime/transport/ring_buffer.c: Ditto for corresponding functions. + +diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c +index 974941d..33e198e 100644 +--- a/runtime/transport/relay_v2.c ++++ b/runtime/transport/relay_v2.c +@@ -4,7 +4,7 @@ + * code started as a proposed relayfs interface called 'utt'. It has + * been modified and simplified for systemtap. + * +- * Changes Copyright (C) 2009 Red Hat Inc. ++ * Changes Copyright (C) 2009-2010 Red Hat Inc. + * + * Original utt code by: + * Copyright (C) 2006 Jens Axboe +@@ -42,7 +42,7 @@ + * to be changed. */ + struct _stp_relay_data_type { + struct rchan *rchan; +- enum _stp_transport_state transport_state; ++ atomic_t /* enum _stp_transport_state */ transport_state; + struct dentry *dropped_file; + atomic_t dropped; + atomic_t wakeup; +@@ -133,7 +133,10 @@ static void __stp_relay_wakeup_timer(unsigned long val) + #endif + } + +- mod_timer(&_stp_relay_data.timer, jiffies + STP_RELAY_TIMER_INTERVAL); ++ if (atomic_read(&_stp_relay_data.transport_state) == STP_TRANSPORT_RUNNING) ++ mod_timer(&_stp_relay_data.timer, jiffies + STP_RELAY_TIMER_INTERVAL); ++ else ++ dbug_trans(0, "relay_v2 wakeup timer expiry\n"); + } + + static void __stp_relay_timer_init(void) +@@ -149,7 +152,7 @@ static void __stp_relay_timer_init(void) + + static enum _stp_transport_state _stp_transport_get_state(void) + { +- return _stp_relay_data.transport_state; ++ return atomic_read (&_stp_relay_data.transport_state); + } + + static void _stp_transport_data_fs_overwrite(int overwrite) +@@ -242,19 +245,19 @@ static struct rchan_callbacks __stp_relay_callbacks = { + + static void _stp_transport_data_fs_start(void) + { +- if (_stp_relay_data.transport_state == STP_TRANSPORT_INITIALIZED) { ++ if (atomic_read (&_stp_relay_data.transport_state) == STP_TRANSPORT_INITIALIZED) { ++ atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_RUNNING); + /* We're initialized. Now start the timer. */ + __stp_relay_timer_init(); +- _stp_relay_data.transport_state = STP_TRANSPORT_RUNNING; + } + } + + static void _stp_transport_data_fs_stop(void) + { +- if (_stp_relay_data.transport_state == STP_TRANSPORT_RUNNING) { ++ if (atomic_read (&_stp_relay_data.transport_state) == STP_TRANSPORT_RUNNING) { ++ atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_STOPPED); + del_timer_sync(&_stp_relay_data.timer); + dbug_trans(0, "flushing...\n"); +- _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; + if (_stp_relay_data.rchan) + relay_flush(_stp_relay_data.rchan); + } +@@ -277,7 +280,7 @@ static int _stp_transport_data_fs_init(void) + u64 npages; + struct sysinfo si; + +- _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; ++ atomic_set(&_stp_relay_data.transport_state, STP_TRANSPORT_STOPPED); + _stp_relay_data.overwrite_flag = 0; + atomic_set(&_stp_relay_data.dropped, 0); + _stp_relay_data.dropped_file = NULL; +@@ -337,7 +340,7 @@ static int _stp_transport_data_fs_init(void) + goto err; + } + dbug_trans(1, "returning 0...\n"); +- _stp_relay_data.transport_state = STP_TRANSPORT_INITIALIZED; ++ atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_INITIALIZED); + + return 0; + +diff --git a/runtime/transport/ring_buffer.c b/runtime/transport/ring_buffer.c +index 6c327c0..232f099 100644 +--- a/runtime/transport/ring_buffer.c ++++ b/runtime/transport/ring_buffer.c +@@ -55,7 +55,7 @@ struct _stp_iterator { + #endif + + struct _stp_relay_data_type { +- enum _stp_transport_state transport_state; ++ atomic_t /* enum _stp_transport_state */ transport_state; + struct ring_buffer *rb; + struct _stp_iterator iter[NR_ITERS]; + cpumask_var_t trace_reader_cpumask; +@@ -649,10 +649,12 @@ static int _stp_data_write_commit(void *entry) + + static void __stp_relay_wakeup_timer(unsigned long val) + { +- if (waitqueue_active(&_stp_poll_wait) +- && ! _stp_ring_buffer_empty()) ++ if (waitqueue_active(&_stp_poll_wait) && ! _stp_ring_buffer_empty()) + wake_up_interruptible(&_stp_poll_wait); +- mod_timer(&_stp_relay_data.timer, jiffies + STP_RELAY_TIMER_INTERVAL); ++ if (atomic_read(&_stp_relay_data.transport_state) == STP_TRANSPORT_RUNNING) ++ mod_timer(&_stp_relay_data.timer, jiffies + STP_RELAY_TIMER_INTERVAL); ++ else ++ dbug_trans(0, "ring_buffer wakeup timer expiry\n"); + } + + static void __stp_relay_timer_start(void) +@@ -677,7 +679,7 @@ static int _stp_transport_data_fs_init(void) + int rc; + int cpu, cpu2; + +- _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; ++ atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_STOPPED); + _stp_relay_data.rb = NULL; + + // allocate buffer +@@ -738,23 +740,23 @@ static int _stp_transport_data_fs_init(void) + } + + dbug_trans(1, "returning 0...\n"); +- _stp_relay_data.transport_state = STP_TRANSPORT_INITIALIZED; ++ atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_INITIALIZED); + return 0; + } + + static void _stp_transport_data_fs_start(void) + { +- if (_stp_relay_data.transport_state == STP_TRANSPORT_INITIALIZED) { +- __stp_relay_timer_start(); +- _stp_relay_data.transport_state = STP_TRANSPORT_RUNNING; ++ if (atomic_read(&_stp_relay_data.transport_state) == STP_TRANSPORT_INITIALIZED) { ++ atomic_set(&_stp_relay_data.transport_state, STP_TRANSPORT_RUNNING); ++ __stp_relay_timer_start(); + } + } + + static void _stp_transport_data_fs_stop(void) + { +- if (_stp_relay_data.transport_state == STP_TRANSPORT_RUNNING) { +- __stp_relay_timer_stop(); +- _stp_relay_data.transport_state = STP_TRANSPORT_STOPPED; ++ if (atomic_read(&_stp_relay_data.transport_state) == STP_TRANSPORT_RUNNING) { ++ atomic_set(&_stp_relay_data.transport_state, STP_TRANSPORT_STOPPED); ++ __stp_relay_timer_stop(); + } + } + +@@ -773,7 +775,7 @@ static void _stp_transport_data_fs_close(void) + + static enum _stp_transport_state _stp_transport_get_state(void) + { +- return _stp_relay_data.transport_state; ++ return atomic_read (&_stp_relay_data.transport_state); + } + + static void _stp_transport_data_fs_overwrite(int overwrite) diff --git a/systemtap_improve_gettimeofday_stability.patch b/systemtap_improve_gettimeofday_stability.patch new file mode 100644 index 0000000..c49b141 --- /dev/null +++ b/systemtap_improve_gettimeofday_stability.patch @@ -0,0 +1,141 @@ +From: Josh Stone +Subject: Improve gettimeofday stability +References: bnc#664105 +Patch-Mainline: yes +Git-commit: 3fd1c4901e81e5f6b550bb988d612281e0fc231b +Git-repo: git://sources.redhat.com/git/systemtap.git + +Signed-off-by: Thomas Renninger + +These are a few tweaks to improve the stability of our gettimeofday +startup and shutdown. Hopefully this helps with PR10651 and PR12182, +but that's not been confirmed yet. + +* runtime/runtime.h: Don't include time.c here. +* translate.cxx (c_unparser::emit_common_header): Include time.c here, + only if NEEDed (so we also ensure init/kill_time will be called). + (c_unparser::emit_module_init): Change to STAP_SESSION_STOPPED for the + final cleanup efforts, especially for time.c to wrap up. + (c_unparser::emit_module_exit): Ditto. +* runtime/time.c (__stp_time_timer_callback): Use the session_state + instead of stp_timer_reregister, and continue until STOPPED. + (__stp_init_time): Don't add_timer from the IPI. + (_stp_init_time): Instead, add_timer_on each cpu here. + +diff --git a/runtime/runtime.h b/runtime/runtime.h +index d4f41db..814d8ad 100644 +--- a/runtime/runtime.h ++++ b/runtime/runtime.h +@@ -132,7 +132,6 @@ static struct + #include "copy.c" + #include "regs.c" + #include "regs-ia64.c" +-#include "time.c" + + #include "task_finder.c" + +diff --git a/runtime/time.c b/runtime/time.c +index 0d13487..38a808a 100644 +--- a/runtime/time.c ++++ b/runtime/time.c +@@ -56,9 +56,6 @@ typedef struct __stp_time_t { + + static void *stp_time = NULL; + +-/* Flag to tell the timer callback whether to reregister */ +-static int stp_timer_reregister = 0; +- + /* Try to estimate the number of CPU cycles in a millisecond - i.e. kHz. This + * relies heavily on the accuracy of udelay. By calling udelay twice, we + * attempt to account for overhead in the call. +@@ -134,7 +131,7 @@ __stp_time_timer_callback(unsigned long val) + XXX: The worst that can probably happen is that we get + two consecutive timer resets. */ + +- if (likely(stp_timer_reregister)) ++ if (likely(atomic_read(&session_state) != STAP_SESSION_STOPPED)) + mod_timer(&time->timer, jiffies + 1); + } + +@@ -154,7 +151,6 @@ __stp_init_time(void *info) + init_timer(&time->timer); + time->timer.expires = jiffies + 1; + time->timer.function = __stp_time_timer_callback; +- add_timer(&time->timer); + } + + #ifdef CONFIG_CPU_FREQ +@@ -208,7 +204,6 @@ _stp_kill_time(void) + { + if (stp_time) { + int cpu; +- stp_timer_reregister = 0; + for_each_online_cpu(cpu) { + stp_time_t *time = per_cpu_ptr(stp_time, cpu); + del_timer_sync(&time->timer); +@@ -229,20 +224,23 @@ _stp_kill_time(void) + static int + _stp_init_time(void) + { +- int ret = 0; ++ int cpu, ret = 0; + + _stp_kill_time(); + + stp_time = _stp_alloc_percpu(sizeof(stp_time_t)); + if (unlikely(stp_time == 0)) + return -1; +- +- stp_timer_reregister = 1; ++ + #ifdef STAPCONF_ONEACHCPU_RETRY + ret = on_each_cpu(__stp_init_time, NULL, 0, 1); + #else + ret = on_each_cpu(__stp_init_time, NULL, 1); + #endif ++ for_each_online_cpu(cpu) { ++ stp_time_t *time = per_cpu_ptr(stp_time, cpu); ++ add_timer_on(&time->timer, cpu); ++ } + + #ifdef CONFIG_CPU_FREQ + if (!ret && !__stp_constant_freq()) { +@@ -250,7 +248,6 @@ _stp_init_time(void) + CPUFREQ_TRANSITION_NOTIFIER); + + if (!ret) { +- int cpu; + for_each_online_cpu(cpu) { + unsigned long flags; + int freq_khz = cpufreq_get(cpu); +diff --git a/translate.cxx b/translate.cxx +index f360940..4447a24 100644 +--- a/translate.cxx ++++ b/translate.cxx +@@ -1085,6 +1085,10 @@ c_unparser::emit_common_header () + if (!session->stat_decls.empty()) + o->newline() << "#include \"stat.c\"\n"; + ++ o->newline() << "#ifdef STAP_NEED_GETTIMEOFDAY"; ++ o->newline() << "#include \"time.c\""; // Don't we all need more? ++ o->newline() << "#endif"; ++ + o->newline(); + } + +@@ -1327,6 +1331,7 @@ c_unparser::emit_module_init () + } + + // For any partially registered/unregistered kernel facilities. ++ o->newline() << "atomic_set (&session_state, STAP_SESSION_STOPPED);"; + o->newline() << "#ifdef STAPCONF_SYNCHRONIZE_SCHED"; + o->newline() << "synchronize_sched();"; + o->newline() << "#endif"; +@@ -1409,6 +1414,7 @@ c_unparser::emit_module_exit () + o->newline(-2) << "} while (holdon);"; + + // cargo cult epilogue ++ o->newline() << "atomic_set (&session_state, STAP_SESSION_STOPPED);"; + o->newline() << "#ifdef STAPCONF_SYNCHRONIZE_SCHED"; + o->newline() << "synchronize_sched();"; + o->newline() << "#endif";