From c8e2a3c4355b4794267cd6e58a074802b4607cb9 Mon Sep 17 00:00:00 2001 From: Joel Colledge Date: Fri, 22 Sep 2023 16:57:24 +0200 Subject: [PATCH 20/20] drbd: do not allow auto-demote to be interrupted by signal Pending signals can mess up auto-demote: drbd res: Preparing cluster-wide state change 671410162 (0->-1 3/2) drbd res: Aborting cluster-wide state change 671410162 (6ms) rv = -21 drbd res: Auto-demote failed: Interrupted state change After this state change failure no process has the DRBD device open, but DRBD remains Primary. --- drbd/drbd_main.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drbd/drbd_main.c b/drbd/drbd_main.c index 1864861db21d..0719229f210e 100644 --- a/drbd/drbd_main.c +++ b/drbd/drbd_main.c @@ -2851,17 +2851,30 @@ static void drbd_release(struct gendisk *gd) !test_and_set_bit(DESTROYING_DEV, &device->flags)) call_rcu(&device->rcu, drbd_reclaim_device); - if (resource->res_opts.auto_promote) { - enum drbd_state_rv rv; + if (resource->res_opts.auto_promote && + open_rw_cnt == 0 && + resource->role[NOW] == R_PRIMARY && + !test_bit(EXPLICIT_PRIMARY, &resource->flags)) { + sigset_t mask, oldmask; + int rv; + + /* + * Auto-demote is triggered by the last opener releasing the + * DRBD device. However, it is an implicit action, so it should + * not be affected by the state of the process. In particular, + * it should ignore any pending signals. It may be the case + * that the process is releasing DRBD because it is being + * terminated using a signal. + */ + sigfillset(&mask); + sigprocmask(SIG_BLOCK, &mask, &oldmask); - if (was_writable && open_rw_cnt == 0 && - resource->role[NOW] == R_PRIMARY && - !test_bit(EXPLICIT_PRIMARY, &resource->flags)) { - rv = drbd_set_role(resource, R_SECONDARY, false, "auto-demote", NULL); - if (rv < SS_SUCCESS) - drbd_warn(resource, "Auto-demote failed: %s (%d)\n", - drbd_set_st_err_str(rv), rv); - } + rv = drbd_set_role(resource, R_SECONDARY, false, "auto-demote", NULL); + if (rv < SS_SUCCESS) + drbd_warn(resource, "Auto-demote failed: %s (%d)\n", + drbd_set_st_err_str(rv), rv); + + sigprocmask(SIG_SETMASK, &oldmask, NULL); } if (open_ro_cnt == 0 && open_rw_cnt == 0 && resource->fail_io[NOW]) { -- 2.35.3