drbd/0014-drbd-include-source-of-state-change-in-log.patch

1481 lines
63 KiB
Diff
Raw Permalink Normal View History

From d6bbb1b1635b9912d77d1a26aba3f380a2ed986d Mon Sep 17 00:00:00 2001
From: Joel Colledge <joel.colledge@linbit.com>
Date: Mon, 11 Sep 2023 16:45:27 +0200
Subject: [PATCH 14/20] drbd: include source of state change in log
This is useful for understanding why a state change occurs. In
particular, whether it was triggered by userspace.
---
drbd/drbd_int.h | 10 ++--
drbd/drbd_main.c | 12 ++---
drbd/drbd_nl.c | 103 ++++++++++++++++++++------------------
drbd/drbd_receiver.c | 77 +++++++++++++++-------------
drbd/drbd_req.c | 6 +--
drbd/drbd_sender.c | 25 ++++++----
drbd/drbd_state.c | 116 ++++++++++++++++++++++++-------------------
drbd/drbd_state.h | 40 +++++++++++----
8 files changed, 224 insertions(+), 165 deletions(-)
diff --git a/drbd/drbd_int.h b/drbd/drbd_int.h
index f6e7c3ac2629..b705f26e71a4 100644
--- a/drbd/drbd_int.h
+++ b/drbd/drbd_int.h
@@ -1876,7 +1876,8 @@ extern void drbd_destroy_resource(struct kref *kref);
extern void drbd_destroy_device(struct kref *kref);
-extern int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts);
+extern int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts,
+ const char *tag);
extern struct drbd_connection *drbd_create_connection(struct drbd_resource *resource,
struct drbd_transport_class *tc);
extern void drbd_transport_shutdown(struct drbd_connection *connection, enum drbd_tr_free_op op);
@@ -1931,7 +1932,9 @@ extern void resync_after_online_grow(struct drbd_peer_device *);
extern void drbd_reconsider_queue_parameters(struct drbd_device *device,
struct drbd_backing_dev *bdev);
extern bool barrier_pending(struct drbd_resource *resource);
-extern enum drbd_state_rv drbd_set_role(struct drbd_resource *, enum drbd_role, bool, struct sk_buff *);
+extern enum drbd_state_rv
+drbd_set_role(struct drbd_resource *resource, enum drbd_role role, bool force, const char *tag,
+ struct sk_buff *reply_skb);
extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
extern int drbd_maybe_khelper(struct drbd_device *, struct drbd_connection *, char *);
extern int drbd_create_peer_device_default_config(struct drbd_peer_device *peer_device);
@@ -1943,7 +1946,8 @@ extern int drbd_worker(struct drbd_thread *thi);
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
void drbd_resync_after_changed(struct drbd_device *device);
extern bool drbd_stable_sync_source_present(struct drbd_peer_device *, enum which_state);
-extern void drbd_start_resync(struct drbd_peer_device *, enum drbd_repl_state);
+extern void drbd_start_resync(struct drbd_peer_device *peer_device, enum drbd_repl_state side,
+ const char *tag);
extern void resume_next_sg(struct drbd_device *device);
extern void suspend_other_sg(struct drbd_device *device);
extern void drbd_resync_finished(struct drbd_peer_device *, enum drbd_disk_state);
diff --git a/drbd/drbd_main.c b/drbd/drbd_main.c
index 6bb618909aa0..4b0b967c2c97 100644
--- a/drbd/drbd_main.c
+++ b/drbd/drbd_main.c
@@ -2505,7 +2505,7 @@ static int try_to_promote(struct drbd_device *device, long timeout, bool ndelay)
unsigned long start = jiffies;
long t;
- rv = drbd_set_role(resource, R_PRIMARY, false, NULL);
+ rv = drbd_set_role(resource, R_PRIMARY, false, "auto-promote", NULL);
timeout -= jiffies - start;
if (ndelay || rv >= SS_SUCCESS || timeout <= 0) {
@@ -2854,7 +2854,7 @@ static void drbd_release(struct gendisk *gd, fmode_t mode)
open_rw_cnt == 0 &&
resource->role[NOW] == R_PRIMARY &&
!test_bit(EXPLICIT_PRIMARY, &resource->flags)) {
- rv = drbd_set_role(resource, R_SECONDARY, false, NULL);
+ rv = drbd_set_role(resource, R_SECONDARY, false, "auto-demote", NULL);
if (rv < SS_SUCCESS)
drbd_warn(resource, "Auto-demote failed: %s (%d)\n",
drbd_set_st_err_str(rv), rv);
@@ -2866,7 +2866,7 @@ static void drbd_release(struct gendisk *gd, fmode_t mode)
begin_state_change(resource, &irq_flags, CS_VERBOSE);
resource->fail_io[NEW] = false;
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "release");
}
/* if the open counts are 0, we free the whole list, otherwise we remove the specific pid */
@@ -3349,7 +3349,7 @@ static void wake_all_device_misc(struct drbd_resource *resource)
rcu_read_unlock();
}
-int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts)
+int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts, const char *tag)
{
struct drbd_connection *connection;
cpumask_var_t new_cpu_mask;
@@ -3414,7 +3414,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
if (force_state_recalc) {
begin_state_change(resource, &irq_flags, CS_VERBOSE | CS_FORCE_RECALC);
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, tag);
}
if (wake_device_misc)
@@ -3491,7 +3491,7 @@ struct drbd_resource *drbd_create_resource(const char *name,
}
resource->pp_vacant = page_pool_count;
- if (set_resource_options(resource, res_opts))
+ if (set_resource_options(resource, res_opts, "create-resource"))
goto fail_free_pages;
list_add_tail_rcu(&resource->resources, &drbd_resources);
diff --git a/drbd/drbd_nl.c b/drbd/drbd_nl.c
index cb5cdb184824..b7e9e43312f9 100644
--- a/drbd/drbd_nl.c
+++ b/drbd/drbd_nl.c
@@ -768,7 +768,7 @@ static bool intentional_diskless(struct drbd_resource *resource)
return intentional_diskless;
}
-static bool conn_try_outdate_peer(struct drbd_connection *connection)
+static bool conn_try_outdate_peer(struct drbd_connection *connection, const char *tag)
{
struct drbd_resource *resource = connection->resource;
unsigned long last_reconnect_jif;
@@ -792,7 +792,7 @@ static bool conn_try_outdate_peer(struct drbd_connection *connection)
!(disk_state == D_DISKLESS && intentional_diskless(resource))) {
begin_state_change_locked(resource, CS_VERBOSE | CS_HARD);
__change_io_susp_fencing(connection, false);
- end_state_change_locked(resource);
+ end_state_change_locked(resource, tag);
read_unlock_irq(&resource->state_rwlock);
return false;
}
@@ -862,7 +862,7 @@ static bool conn_try_outdate_peer(struct drbd_connection *connection)
goto abort;
}
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, tag);
goto out;
abort:
@@ -875,7 +875,7 @@ static int _try_outdate_peer_async(void *data)
{
struct drbd_connection *connection = (struct drbd_connection *)data;
- conn_try_outdate_peer(connection);
+ conn_try_outdate_peer(connection, "outdate-async");
kref_debug_put(&connection->kref_debug, 4);
kref_put(&connection->kref, drbd_destroy_connection);
@@ -1014,7 +1014,8 @@ static bool wait_up_to_date(struct drbd_resource *resource)
}
enum drbd_state_rv
-drbd_set_role(struct drbd_resource *resource, enum drbd_role role, bool force, struct sk_buff *reply_skb)
+drbd_set_role(struct drbd_resource *resource, enum drbd_role role, bool force, const char *tag,
+ struct sk_buff *reply_skb)
{
struct drbd_device *device;
int vnr, try = 0;
@@ -1042,7 +1043,7 @@ retry:
err_str = NULL;
}
rv = stable_state_change(resource,
- change_role(resource, role, flags, &err_str));
+ change_role(resource, role, flags, tag, &err_str));
if (rv == SS_TIMEOUT || rv == SS_CONCURRENT_ST_CHG) {
long timeout = twopc_retry_timeout(resource, try);
@@ -1104,7 +1105,7 @@ retry:
if (device->disk_state[NOW] != D_CONSISTENT)
continue;
- if (conn_try_outdate_peer(connection))
+ if (conn_try_outdate_peer(connection, tag))
fenced_peers = true;
else
any_fencing_failed = true;
@@ -1140,7 +1141,7 @@ retry:
up(&resource->state_sem); /* Allow connect while fencing */
for_each_connection_ref(connection, im, resource) {
- bool outdated_peer = conn_try_outdate_peer(connection);
+ bool outdated_peer = conn_try_outdate_peer(connection, tag);
if (!outdated_peer && force) {
drbd_warn(connection, "Forced into split brain situation!\n");
flags |= CS_FP_LOCAL_UP_TO_DATE;
@@ -1331,7 +1332,9 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
new_role = info->genlhdr->cmd == DRBD_ADM_PRIMARY ? R_PRIMARY : R_SECONDARY;
rv = drbd_set_role(adm_ctx.resource,
new_role,
- parms.force, adm_ctx.reply_skb);
+ parms.force,
+ new_role == R_PRIMARY ? "primary" : "secondary",
+ adm_ctx.reply_skb);
if (new_role == R_PRIMARY) {
if (rv >= SS_SUCCESS)
@@ -3093,7 +3096,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
drbd_flush_workqueue(&resource->work);
rv = stable_state_change(resource,
- change_disk_state(device, D_ATTACHING, CS_VERBOSE | CS_SERIALIZE, NULL));
+ change_disk_state(device, D_ATTACHING, CS_VERBOSE | CS_SERIALIZE, "attach", NULL));
retcode = (enum drbd_ret_code)rv;
if (rv >= SS_SUCCESS)
update_resource_dagtag(resource, nbc);
@@ -3353,8 +3356,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
/* change_disk_state uses disk_state_from_md(device); in case D_NEGOTIATING not
necessary, and falls back to a local state change */
- rv = stable_state_change(resource,
- change_disk_state(device, D_NEGOTIATING, CS_VERBOSE | CS_SERIALIZE, NULL));
+ rv = stable_state_change(resource, change_disk_state(device,
+ D_NEGOTIATING, CS_VERBOSE | CS_SERIALIZE, "attach", NULL));
if (rv < SS_SUCCESS) {
if (rv == SS_CW_FAILED_BY_PEER)
@@ -3385,7 +3388,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
force_diskless_dec:
put_ldev(device);
force_diskless:
- change_disk_state(device, D_DISKLESS, CS_HARD, NULL);
+ change_disk_state(device, D_DISKLESS, CS_HARD, "attach", NULL);
fail:
mutex_unlock_cond(&resource->conf_update, &have_conf_update);
drbd_backing_dev_free(device, nbc);
@@ -3406,7 +3409,7 @@ static enum drbd_disk_state get_disk_state(struct drbd_device *device)
}
static int adm_detach(struct drbd_device *device, bool force, bool intentional_diskless,
- struct sk_buff *reply_skb)
+ const char *tag, struct sk_buff *reply_skb)
{
const char *err_str = NULL;
int ret, retcode;
@@ -3414,7 +3417,7 @@ static int adm_detach(struct drbd_device *device, bool force, bool intentional_d
device->device_conf.intentional_diskless = intentional_diskless;
if (force) {
set_bit(FORCE_DETACH, &device->flags);
- change_disk_state(device, D_DETACHING, CS_HARD, NULL);
+ change_disk_state(device, D_DETACHING, CS_HARD, tag, NULL);
retcode = SS_SUCCESS;
goto out;
}
@@ -3422,7 +3425,7 @@ static int adm_detach(struct drbd_device *device, bool force, bool intentional_d
drbd_suspend_io(device, READ_AND_WRITE); /* so no-one is stuck in drbd_al_begin_io */
retcode = stable_state_change(device->resource,
change_disk_state(device, D_DETACHING,
- CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE, &err_str));
+ CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE, tag, &err_str));
/* D_DETACHING will transition to DISKLESS. */
drbd_resume_io(device);
ret = wait_event_interruptible(device->misc_wait,
@@ -3473,7 +3476,7 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
retcode = (enum drbd_ret_code)adm_detach(adm_ctx.device, parms.force_detach,
- parms.intentional_diskless_detach, adm_ctx.reply_skb);
+ parms.intentional_diskless_detach, "detach", adm_ctx.reply_skb);
mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
@@ -4385,7 +4388,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
drbd_md_mark_dirty(device);
}
- rv = change_cstate(connection, C_UNCONNECTED, CS_VERBOSE);
+ rv = change_cstate_tag(connection, C_UNCONNECTED, CS_VERBOSE, "connect", NULL);
drbd_adm_finish(&adm_ctx, info, rv);
return 0;
out:
@@ -4535,7 +4538,7 @@ int drbd_open_ro_count(struct drbd_resource *resource)
}
static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force,
- struct sk_buff *reply_skb)
+ const char *tag, struct sk_buff *reply_skb)
{
struct drbd_resource *resource = connection->resource;
enum drbd_conn_state cstate;
@@ -4545,7 +4548,7 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
long t;
repeat:
- rv = change_cstate_es(connection, C_DISCONNECTING, flags, &err_str);
+ rv = change_cstate_tag(connection, C_DISCONNECTING, flags, tag, &err_str);
switch (rv) {
case SS_CW_FAILED_BY_PEER:
case SS_NEED_CONNECTION:
@@ -4571,7 +4574,7 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
break;
case SS_IS_DISKLESS:
case SS_LOWER_THAN_OUTDATED:
- rv = change_cstate(connection, C_DISCONNECTING, CS_HARD);
+ rv = change_cstate_tag(connection, C_DISCONNECTING, CS_HARD, tag, NULL);
break;
case SS_NO_QUORUM:
if (!(flags & CS_VERBOSE)) {
@@ -4597,7 +4600,7 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
/* this can only be called immediately after a successful
* peer_try_disconnect, within the same resource->adm_mutex */
-static void del_connection(struct drbd_connection *connection)
+static void del_connection(struct drbd_connection *connection, const char *tag)
{
struct drbd_resource *resource = connection->resource;
struct drbd_peer_device *peer_device;
@@ -4619,7 +4622,7 @@ static void del_connection(struct drbd_connection *connection)
* after drbd_receiver() returned. Typically, we should be
* C_STANDALONE already, now, and this becomes a no-op.
*/
- rv2 = change_cstate(connection, C_STANDALONE, CS_VERBOSE | CS_HARD);
+ rv2 = change_cstate_tag(connection, C_STANDALONE, CS_VERBOSE | CS_HARD, tag, NULL);
if (rv2 < SS_SUCCESS)
drbd_err(connection,
"unexpected rv2=%d in del_connection()\n",
@@ -4654,6 +4657,7 @@ static int adm_disconnect(struct sk_buff *skb, struct genl_info *info, bool dest
struct drbd_connection *connection;
enum drbd_state_rv rv;
enum drbd_ret_code retcode;
+ const char *tag = destroy ? "del-peer" : "disconnect";
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
if (!adm_ctx.reply_skb)
@@ -4671,10 +4675,10 @@ static int adm_disconnect(struct sk_buff *skb, struct genl_info *info, bool dest
connection = adm_ctx.connection;
mutex_lock(&adm_ctx.resource->adm_mutex);
- rv = conn_try_disconnect(connection, parms.force_disconnect, adm_ctx.reply_skb);
+ rv = conn_try_disconnect(connection, parms.force_disconnect, tag, adm_ctx.reply_skb);
if (rv >= SS_SUCCESS && destroy) {
mutex_lock(&connection->resource->conf_update);
- del_connection(connection);
+ del_connection(connection, tag);
mutex_unlock(&connection->resource->conf_update);
}
if (rv < SS_SUCCESS)
@@ -4721,10 +4725,10 @@ void resync_after_online_grow(struct drbd_peer_device *peer_device)
if (!sync_source && connection->agreed_pro_version < 110) {
stable_change_repl_state(peer_device, L_WF_SYNC_UUID,
- CS_VERBOSE | CS_SERIALIZE);
+ CS_VERBOSE | CS_SERIALIZE, "online-grow");
return;
}
- drbd_start_resync(peer_device, sync_source ? L_SYNC_SOURCE : L_SYNC_TARGET);
+ drbd_start_resync(peer_device, sync_source ? L_SYNC_SOURCE : L_SYNC_TARGET, "online-grow");
}
sector_t drbd_local_max_size(struct drbd_device *device) __must_hold(local)
@@ -4956,7 +4960,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
goto fail;
}
- err = set_resource_options(adm_ctx.resource, &res_opts);
+ err = set_resource_options(adm_ctx.resource, &res_opts, "resource-options");
if (err) {
retcode = ERR_INVALID_REQUEST;
if (err == -ENOMEM)
@@ -4976,11 +4980,11 @@ static enum drbd_state_rv invalidate_resync(struct drbd_peer_device *peer_device
drbd_flush_workqueue(&peer_device->connection->sender_work);
- rv = change_repl_state(peer_device, L_STARTING_SYNC_T, CS_SERIALIZE);
+ rv = change_repl_state(peer_device, L_STARTING_SYNC_T, CS_SERIALIZE, "invalidate");
if (rv < SS_SUCCESS && rv != SS_NEED_CONNECTION)
rv = stable_change_repl_state(peer_device, L_STARTING_SYNC_T,
- CS_VERBOSE | CS_SERIALIZE);
+ CS_VERBOSE | CS_SERIALIZE, "invalidate");
wait_event_interruptible(resource->state_wait,
peer_device->repl_state[NOW] != L_STARTING_SYNC_T);
@@ -5005,7 +5009,7 @@ static enum drbd_state_rv invalidate_no_resync(struct drbd_device *device) __mus
}
}
__change_disk_state(device, D_INCONSISTENT);
- rv = end_state_change(resource, &irq_flags);
+ rv = end_state_change(resource, &irq_flags, "invalidate");
if (rv >= SS_SUCCESS) {
drbd_bitmap_io(device, &drbd_bmio_set_all_n_write,
@@ -5080,7 +5084,8 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
retcode = invalidate_resync(sync_from_peer_device);
} else {
retcode = change_repl_state(sync_from_peer_device, L_WF_BITMAP_T,
- CS_VERBOSE | CS_CLUSTER_WIDE | CS_WAIT_COMPLETE | CS_SERIALIZE);
+ CS_VERBOSE | CS_CLUSTER_WIDE | CS_WAIT_COMPLETE |
+ CS_SERIALIZE, "invalidate");
}
} else {
int retry = 3;
@@ -5103,7 +5108,8 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
}
retcode = change_repl_state(peer_device, L_WF_BITMAP_T,
CS_VERBOSE | CS_CLUSTER_WIDE |
- CS_WAIT_COMPLETE | CS_SERIALIZE);
+ CS_WAIT_COMPLETE | CS_SERIALIZE,
+ "invalidate");
}
if (retcode >= SS_SUCCESS)
goto out;
@@ -5140,13 +5146,15 @@ static int full_sync_from_peer(struct drbd_peer_device *peer_device)
struct drbd_resource *resource = device->resource;
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
- retcode = stable_change_repl_state(peer_device, L_STARTING_SYNC_S, CS_SERIALIZE);
+ retcode = stable_change_repl_state(peer_device, L_STARTING_SYNC_S, CS_SERIALIZE,
+ "invalidate-remote");
if (retcode < SS_SUCCESS) {
if (retcode == SS_NEED_CONNECTION && resource->role[NOW] == R_PRIMARY) {
/* The peer will get a resync upon connect anyways.
* Just make that into a full resync. */
retcode = change_peer_disk_state(peer_device, D_INCONSISTENT,
- CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE);
+ CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE,
+ "invalidate-remote");
if (retcode >= SS_SUCCESS) {
if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
"set_n_write from invalidate_peer",
@@ -5155,7 +5163,7 @@ static int full_sync_from_peer(struct drbd_peer_device *peer_device)
}
} else {
retcode = stable_change_repl_state(peer_device, L_STARTING_SYNC_S,
- CS_VERBOSE | CS_SERIALIZE);
+ CS_VERBOSE | CS_SERIALIZE, "invalidate-remote");
}
}
@@ -5214,7 +5222,8 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
retcode = full_sync_from_peer(peer_device);
} else {
retcode = change_repl_state(peer_device, L_WF_BITMAP_S,
- CS_VERBOSE | CS_CLUSTER_WIDE | CS_WAIT_COMPLETE | CS_SERIALIZE);
+ CS_VERBOSE | CS_CLUSTER_WIDE | CS_WAIT_COMPLETE | CS_SERIALIZE,
+ "invalidate-remote");
}
drbd_resume_io(device);
@@ -5329,7 +5338,7 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
__change_io_susp_fencing(connection, false);
__change_io_susp_quorum(resource, false);
- retcode = end_state_change(resource, &irq_flags);
+ retcode = end_state_change(resource, &irq_flags, "resume-io");
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
drbd_adm_finish(&adm_ctx, info, retcode);
@@ -5348,7 +5357,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
retcode = stable_state_change(adm_ctx.device->resource,
change_disk_state(adm_ctx.device, D_OUTDATED,
- CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE, NULL));
+ CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE, "outdate", NULL));
mutex_unlock(&adm_ctx.resource->adm_mutex);
drbd_adm_finish(&adm_ctx, info, retcode);
@@ -6010,7 +6019,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
drbd_suspend_io(device, READ_AND_WRITE);
wait_event(device->misc_wait, !atomic_read(&device->pending_bitmap_work.n));
rv = stable_change_repl_state(peer_device,
- L_VERIFY_S, CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE);
+ L_VERIFY_S, CS_VERBOSE | CS_WAIT_COMPLETE | CS_SERIALIZE, "verify");
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
@@ -6082,7 +6091,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
unsigned long irq_flags;
begin_state_change(device->resource, &irq_flags, CS_VERBOSE);
__change_disk_state(device, D_UP_TO_DATE);
- end_state_change(device->resource, &irq_flags);
+ end_state_change(device->resource, &irq_flags, "new-c-uuid");
for_each_peer_device(peer_device, device) {
if (NODE_MASK(peer_device->node_id) & nodes) {
@@ -6122,7 +6131,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
if (NODE_MASK(peer_device->node_id) & diskful)
__change_peer_disk_state(peer_device, D_UP_TO_DATE);
}
- end_state_change(device->resource, &irq_flags);
+ end_state_change(device->resource, &irq_flags, "new-c-uuid");
}
drbd_md_sync_if_dirty(device);
@@ -6353,7 +6362,7 @@ static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
for_each_peer_device_ref(peer_device, im, device)
stable_change_repl_state(peer_device, L_OFF,
- CS_VERBOSE | CS_WAIT_COMPLETE);
+ CS_VERBOSE | CS_WAIT_COMPLETE, "del-minor");
/* If drbd_ldev_destroy() is pending, wait for it to run before
* unregistering the device. */
@@ -6465,7 +6474,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&resource->adm_mutex);
set_bit(DOWN_IN_PROGRESS, &resource->flags);
/* demote */
- retcode = drbd_set_role(resource, R_SECONDARY, false, adm_ctx.reply_skb);
+ retcode = drbd_set_role(resource, R_SECONDARY, false, "down", adm_ctx.reply_skb);
if (retcode < SS_SUCCESS) {
opener_info(adm_ctx.resource, adm_ctx.reply_skb, retcode);
goto out;
@@ -6474,10 +6483,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
for_each_connection_ref(connection, im, resource) {
retcode = SS_SUCCESS;
if (connection->cstate[NOW] > C_STANDALONE)
- retcode = conn_try_disconnect(connection, 0, adm_ctx.reply_skb);
+ retcode = conn_try_disconnect(connection, 0, "down", adm_ctx.reply_skb);
if (retcode >= SS_SUCCESS) {
mutex_lock(&resource->conf_update);
- del_connection(connection);
+ del_connection(connection, "down");
mutex_unlock(&resource->conf_update);
} else {
kref_debug_put(&connection->kref_debug, 13);
@@ -6491,7 +6500,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
idr_for_each_entry(&resource->devices, device, i) {
kref_get(&device->kref);
rcu_read_unlock();
- retcode = adm_detach(device, 0, 0, adm_ctx.reply_skb);
+ retcode = adm_detach(device, 0, 0, "down", adm_ctx.reply_skb);
mutex_lock(&resource->conf_update);
ret = adm_del_minor(device);
mutex_unlock(&resource->conf_update);
diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c
index 223353b1081c..0c3ab0fd486c 100644
--- a/drbd/drbd_receiver.c
+++ b/drbd/drbd_receiver.c
@@ -920,7 +920,7 @@ static void apply_local_state_change(struct drbd_connection *connection, enum ao
resource->role[NEW] = R_SECONDARY;
}
}
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "connect-failed");
}
static int connect_work(struct drbd_work *work, int cancel)
@@ -945,8 +945,9 @@ static int connect_work(struct drbd_work *work, int cancel)
rv = SS_CONCURRENT_ST_CHG;
break;
}
- rv = change_cstate(connection, C_CONNECTED, CS_SERIALIZE |
- CS_ALREADY_SERIALIZED | CS_VERBOSE | CS_DONT_RETRY);
+ rv = change_cstate_tag(connection, C_CONNECTED, CS_SERIALIZE |
+ CS_ALREADY_SERIALIZED | CS_VERBOSE | CS_DONT_RETRY,
+ "connected", NULL);
up(&resource->state_sem);
if (rv != SS_PRIMARY_READER)
break;
@@ -1011,7 +1012,8 @@ static bool conn_connect(struct drbd_connection *connection)
start:
have_mutex = false;
clear_bit(DISCONNECT_EXPECTED, &connection->flags);
- if (change_cstate(connection, C_CONNECTING, CS_VERBOSE) < SS_SUCCESS) {
+ if (change_cstate_tag(connection, C_CONNECTING, CS_VERBOSE, "connecting", NULL)
+ < SS_SUCCESS) {
/* We do not have a network config. */
return false;
}
@@ -3697,7 +3699,8 @@ static enum sync_strategy drbd_asb_recover_1p(struct drbd_peer_device *peer_devi
/* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
* we might be here in L_OFF which is transient.
* we do not need to wait for the after state change work either. */
- rv2 = change_role(resource, R_SECONDARY, CS_VERBOSE, NULL);
+ rv2 = change_role(resource, R_SECONDARY, CS_VERBOSE,
+ "after-sb-1pri", NULL);
if (rv2 != SS_SUCCESS) {
drbd_maybe_khelper(device, connection, "pri-lost-after-sb");
} else {
@@ -3750,7 +3753,8 @@ static enum sync_strategy drbd_asb_recover_2p(struct drbd_peer_device *peer_devi
/* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
* we might be here in L_OFF which is transient.
* we do not need to wait for the after state change work either. */
- rv2 = change_role(device->resource, R_SECONDARY, CS_VERBOSE, NULL);
+ rv2 = change_role(device->resource, R_SECONDARY, CS_VERBOSE,
+ "after-sb-2pri", NULL);
if (rv2 != SS_SUCCESS) {
drbd_maybe_khelper(device, connection, "pri-lost-after-sb");
} else {
@@ -5500,6 +5504,7 @@ static void drbd_resync(struct drbd_peer_device *peer_device,
enum sync_rule rule;
int peer_node_id;
enum drbd_state_rv rv;
+ const char *tag = reason == AFTER_UNSTABLE ? "after-unstable" : "diskless-primary";
strategy = drbd_handshake(peer_device, &rule, &peer_node_id, reason == DISKLESS_PRIMARY);
if (strategy == SPLIT_BRAIN_AUTO_RECOVER && reason == AFTER_UNSTABLE)
@@ -5529,11 +5534,11 @@ static void drbd_resync(struct drbd_peer_device *peer_device,
as well. */
drbd_info(peer_device, "Upgrading local disk to %s after unstable/weak (and no resync).\n",
drbd_disk_str(peer_disk_state));
- change_disk_state(peer_device->device, peer_disk_state, CS_VERBOSE, NULL);
+ change_disk_state(peer_device->device, peer_disk_state, CS_VERBOSE, tag, NULL);
return;
}
- rv = change_repl_state(peer_device, new_repl_state, CS_VERBOSE);
+ rv = change_repl_state(peer_device, new_repl_state, CS_VERBOSE, tag);
if ((rv == SS_NOTHING_TO_DO || rv == SS_RESYNC_RUNNING) &&
(new_repl_state == L_WF_BITMAP_S || new_repl_state == L_WF_BITMAP_T)) {
/* Those events might happen very quickly. In case we are still processing
@@ -5633,7 +5638,7 @@ static int __receive_uuids(struct drbd_peer_device *peer_device, u64 node_mask)
begin_state_change(device->resource, &irq_flags, CS_VERBOSE);
__change_disk_state(device, D_UP_TO_DATE);
__change_peer_disk_state(peer_device, D_UP_TO_DATE);
- end_state_change(device->resource, &irq_flags);
+ end_state_change(device->resource, &irq_flags, "skip-initial-sync");
updated_uuids = 1;
propagate_skip_initial_to_diskless(device);
}
@@ -5668,7 +5673,7 @@ static int __receive_uuids(struct drbd_peer_device *peer_device, u64 node_mask)
if (device->disk_state[NOW] == D_DISKLESS && uuid_match &&
peer_device->disk_state[NOW] == D_CONSISTENT) {
drbd_info(peer_device, "Peer is on same UUID now\n");
- change_peer_disk_state(peer_device, D_UP_TO_DATE, CS_VERBOSE);
+ change_peer_disk_state(peer_device, D_UP_TO_DATE, CS_VERBOSE, "receive-uuids");
}
if (updated_uuids)
@@ -5855,7 +5860,7 @@ static void check_resync_source(struct drbd_device *device, u64 weak_nodes)
return;
abort:
drbd_info(peer_device, "My sync source became a weak node, aborting resync!\n");
- change_repl_state(peer_device, L_ESTABLISHED, CS_VERBOSE);
+ change_repl_state(peer_device, L_ESTABLISHED, CS_VERBOSE, "abort-resync");
drbd_flush_workqueue(&device->resource->work);
wait_event_interruptible(device->misc_wait,
@@ -6091,7 +6096,7 @@ retry:
if (is_connect && connection->agreed_pro_version >= 117)
apply_connect(connection, (flags & CS_PREPARED) && !abort);
- rv = end_state_change(resource, &irq_flags);
+ rv = end_state_change(resource, &irq_flags, "remote");
out:
if ((rv == SS_NO_UP_TO_DATE_DISK && resource->role[NOW] != R_PRIMARY) ||
@@ -6144,7 +6149,7 @@ change_peer_device_state(struct drbd_peer_device *peer_device,
rv = __change_connection_state(connection, mask, val, flags);
if (rv < SS_SUCCESS)
goto fail;
- rv = end_state_change(connection->resource, &irq_flags);
+ rv = end_state_change(connection->resource, &irq_flags, "remote");
out:
return rv;
fail:
@@ -6375,7 +6380,7 @@ far_away_change(struct drbd_connection *connection,
}
/* even if no outdate happens, CS_FORCE_RECALC might be set here */
- return end_state_change(resource, &irq_flags);
+ return end_state_change(resource, &irq_flags, "far-away");
}
static void handle_neighbor_demotion(struct drbd_connection *connection,
@@ -6630,17 +6635,18 @@ cont:
/* peer is secondary */
resync = L_SYNC_SOURCE;
}
- drbd_start_resync(peer_device, resync);
+ drbd_start_resync(peer_device, resync, "resize");
} else {
if (tr->diskful_primary_nodes & NODE_MASK(peer_device->node_id))
- drbd_start_resync(peer_device, L_SYNC_TARGET);
+ drbd_start_resync(peer_device, L_SYNC_TARGET,
+ "resize");
/* else no resync */
}
} else {
if (resource->twopc_parent_nodes & NODE_MASK(peer_device->node_id))
- drbd_start_resync(peer_device, L_SYNC_TARGET);
+ drbd_start_resync(peer_device, L_SYNC_TARGET, "resize");
else if (nodes_to_reach & NODE_MASK(peer_device->node_id))
- drbd_start_resync(peer_device, L_SYNC_SOURCE);
+ drbd_start_resync(peer_device, L_SYNC_SOURCE, "resize");
/* else no resync */
}
}
@@ -7101,7 +7107,7 @@ void drbd_try_to_get_resynced(struct drbd_device *device)
peer_device = best_peer_device;
if (best_strategy == NO_SYNC) {
- change_disk_state(device, D_UP_TO_DATE, CS_VERBOSE, NULL);
+ change_disk_state(device, D_UP_TO_DATE, CS_VERBOSE, "get-resync", NULL);
} else if (peer_device) {
drbd_resync(peer_device, DISKLESS_PRIMARY);
drbd_send_uuids(peer_device, UUID_FLAG_RESYNC | UUID_FLAG_DISKLESS_PRIMARY, 0);
@@ -7225,7 +7231,7 @@ static void diskless_with_peers_different_current_uuids(struct drbd_peer_device
CS_VERBOSE | CS_HARD | CS_FS_IGN_OPENERS);
resource->role[NEW] = R_SECONDARY;
/* resource->fail_io[NEW] gets set via CS_FS_IGN_OPENERS */
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "peer-state");
}
set_bit(CONN_HANDSHAKE_RETRY, &connection->flags);
} else if (data_successor && resource->role[NOW] == R_SECONDARY) {
@@ -7278,7 +7284,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
if (peer_state.role == R_SECONDARY) {
begin_state_change(resource, &irq_flags, CS_HARD | CS_VERBOSE);
__change_peer_role(connection, R_SECONDARY);
- rv = end_state_change(resource, &irq_flags);
+ rv = end_state_change(resource, &irq_flags, "peer-state");
if (rv < SS_SUCCESS)
goto fail;
}
@@ -7374,7 +7380,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
/* Start resync after AHEAD/BEHIND */
if (connection->agreed_pro_version >= 110 &&
peer_state.conn == L_SYNC_SOURCE && old_peer_state.conn == L_BEHIND) {
- drbd_start_resync(peer_device, L_SYNC_TARGET);
+ drbd_start_resync(peer_device, L_SYNC_TARGET, "resync-after-behind");
return 0;
}
@@ -7519,7 +7525,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
begin_state_change(resource, &irq_flags, CS_HARD);
__change_cstate(connection, C_PROTOCOL_ERROR);
__change_io_susp_user(resource, false);
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "abort-connect");
return -EIO;
}
@@ -7581,7 +7587,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
if (repl_state[OLD] < L_ESTABLISHED && repl_state[NEW] >= L_ESTABLISHED)
resource->state_change_flags |= CS_HARD;
- rv = end_state_change(resource, &irq_flags);
+ rv = end_state_change(resource, &irq_flags, "peer-state");
new_repl_state = peer_device->repl_state[NOW];
if (rv < SS_SUCCESS)
@@ -7635,7 +7641,7 @@ static int receive_sync_uuid(struct drbd_connection *connection, struct packet_i
_drbd_uuid_set_bitmap(peer_device, 0UL);
drbd_print_uuids(peer_device, "updated sync uuid");
- drbd_start_resync(peer_device, L_SYNC_TARGET);
+ drbd_start_resync(peer_device, L_SYNC_TARGET, "peer-sync-uuid");
put_ldev(device);
} else
@@ -7935,17 +7941,18 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info
/* We have entered drbd_start_resync() since starting the bitmap exchange. */
drbd_warn(peer_device, "Received bitmap more than once; ignoring\n");
} else if (repl_state == L_WF_BITMAP_S) {
- drbd_start_resync(peer_device, L_SYNC_SOURCE);
+ drbd_start_resync(peer_device, L_SYNC_SOURCE, "receive-bitmap");
} else if (repl_state == L_WF_BITMAP_T) {
if (connection->agreed_pro_version < 110) {
enum drbd_state_rv rv;
/* Omit CS_WAIT_COMPLETE and CS_SERIALIZE with this state
* transition to avoid deadlocks. */
- rv = stable_change_repl_state(peer_device, L_WF_SYNC_UUID, CS_VERBOSE);
+ rv = stable_change_repl_state(peer_device, L_WF_SYNC_UUID, CS_VERBOSE,
+ "receive-bitmap");
D_ASSERT(device, rv == SS_SUCCESS);
} else {
- drbd_start_resync(peer_device, L_SYNC_TARGET);
+ drbd_start_resync(peer_device, L_SYNC_TARGET, "receive-bitmap");
}
} else {
/* admin may have requested C_DISCONNECTING,
@@ -8139,7 +8146,7 @@ static int receive_peer_dagtag(struct drbd_connection *connection, struct packet
__change_repl_state(peer_device, new_repl_state);
set_bit(RECONCILIATION_RESYNC, &peer_device->flags);
}
- rv = end_state_change(resource, &irq_flags);
+ rv = end_state_change(resource, &irq_flags, "receive-peer-dagtag");
if (rv == SS_SUCCESS)
drbd_info(connection, "Reconciliation resync because \'%s\' disappeared. (o=%d)\n",
lost_peer->transport.net_conf->name, (int)dagtag_offset);
@@ -8248,7 +8255,8 @@ static int receive_current_uuid(struct drbd_connection *connection, struct packe
if (resource->remote_state_change)
set_bit(OUTDATE_ON_2PC_COMMIT, &device->flags);
else
- change_disk_state(device, D_OUTDATED, CS_VERBOSE, NULL);
+ change_disk_state(device, D_OUTDATED, CS_VERBOSE,
+ "receive-current-uuid", NULL);
}
put_ldev(device);
} else if (device->disk_state[NOW] == D_DISKLESS && resource->role[NOW] == R_PRIMARY) {
@@ -8328,7 +8336,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
static int receive_disconnect(struct drbd_connection *connection, struct packet_info *pi)
{
- change_cstate(connection, C_DISCONNECTING, CS_HARD);
+ change_cstate_tag(connection, C_DISCONNECTING, CS_HARD, "receive-disconnect", NULL);
return 0;
}
@@ -8594,7 +8602,7 @@ static void conn_disconnect(struct drbd_connection *connection)
* Usually we should be in some network failure state already,
* but just in case we are not, we fix it up here.
*/
- change_cstate(connection, C_NETWORK_FAILURE, CS_HARD);
+ change_cstate_tag(connection, C_NETWORK_FAILURE, CS_HARD, "disconnected", NULL);
del_connect_timer(connection);
@@ -8707,10 +8715,11 @@ static void conn_disconnect(struct drbd_connection *connection)
/* drbd_receiver() has to be restarted after it returns */
drbd_thread_restart_nowait(&connection->receiver);
}
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "disconnected");
if (oc == C_DISCONNECTING)
- change_cstate(connection, C_STANDALONE, CS_VERBOSE | CS_HARD | CS_LOCAL_ONLY);
+ change_cstate_tag(connection, C_STANDALONE, CS_VERBOSE | CS_HARD | CS_LOCAL_ONLY,
+ "disconnected", NULL);
}
/*
diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c
index af2ae66f51b5..41950e46ec27 100644
--- a/drbd/drbd_req.c
+++ b/drbd/drbd_req.c
@@ -2688,7 +2688,7 @@ void request_timer_fn(struct timer_list *t)
continue;
begin_state_change(resource, &irq_flags, CS_VERBOSE | CS_HARD);
__change_cstate(connection, C_TIMEOUT);
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "timeout");
kref_put(&connection->kref, drbd_destroy_connection);
}
@@ -2723,7 +2723,7 @@ void drbd_handle_io_error_(struct drbd_device *device,
if (device->disk_state[NOW] > D_INCONSISTENT) {
begin_state_change_locked(device->resource, CS_HARD);
__change_disk_state(device, D_INCONSISTENT);
- end_state_change_locked(device->resource);
+ end_state_change_locked(device->resource, "local-io-error");
}
break;
}
@@ -2740,7 +2740,7 @@ void drbd_handle_io_error_(struct drbd_device *device,
if (device->disk_state[NOW] > D_FAILED) {
begin_state_change_locked(device->resource, CS_HARD);
__change_disk_state(device, D_FAILED);
- end_state_change_locked(device->resource);
+ end_state_change_locked(device->resource, "local-io-error");
drbd_err(device,
"Local IO failed in %s. Detaching...\n", where);
}
diff --git a/drbd/drbd_sender.c b/drbd/drbd_sender.c
index 93a460dae190..0beeb5f3cb54 100644
--- a/drbd/drbd_sender.c
+++ b/drbd/drbd_sender.c
@@ -472,7 +472,7 @@ int w_resync_timer(struct drbd_work *w, int cancel)
unsigned long irq_flags;
begin_state_change(resource, &irq_flags, 0);
peer_device->resync_active[NEW] = false;
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "resync-inactive");
}
break;
}
@@ -1155,7 +1155,7 @@ static void resync_again(struct drbd_device *device, u64 source_m, u64 target_m)
peer_device->resync_again--;
begin_state_change_locked(device->resource, CS_VERBOSE);
__change_repl_state(peer_device, new_repl_state);
- end_state_change_locked(device->resource);
+ end_state_change_locked(device->resource, "resync-again");
}
}
}
@@ -1225,7 +1225,7 @@ found:
}
drbd_send_uuids(peer_device, 0, 0);
- drbd_start_resync(peer_device, L_SYNC_TARGET);
+ drbd_start_resync(peer_device, L_SYNC_TARGET, "resync-from-primary");
}
static void queue_resync_finished(struct drbd_peer_device *peer_device, enum drbd_disk_state new_peer_disk_state)
@@ -1428,7 +1428,7 @@ void drbd_resync_finished(struct drbd_peer_device *peer_device,
}
out_unlock:
- end_state_change_locked(device->resource);
+ end_state_change_locked(device->resource, "resync-finished");
put_ldev(device);
@@ -1920,7 +1920,8 @@ static bool drbd_pause_after(struct drbd_device *device)
if (!__drbd_may_sync_now(other_peer_device))
__change_resync_susp_dependency(other_peer_device, true);
}
- if (end_state_change_locked(other_device->resource) != SS_NOTHING_TO_DO)
+ if (end_state_change_locked(other_device->resource, "resync-after") !=
+ SS_NOTHING_TO_DO)
changed = true;
}
rcu_read_unlock();
@@ -1957,7 +1958,8 @@ static bool drbd_resume_next(struct drbd_device *device)
__drbd_may_sync_now(other_peer_device))
__change_resync_susp_dependency(other_peer_device, false);
}
- if (end_state_change_locked(other_device->resource) != SS_NOTHING_TO_DO)
+ if (end_state_change_locked(other_device->resource, "resync-after") !=
+ SS_NOTHING_TO_DO)
changed = true;
}
rcu_read_unlock();
@@ -2125,7 +2127,7 @@ static void do_start_resync(struct drbd_peer_device *peer_device)
return;
}
- drbd_start_resync(peer_device, peer_device->start_resync_side);
+ drbd_start_resync(peer_device, peer_device->start_resync_side, "postponed-resync");
clear_bit(AHEAD_TO_SYNC_SOURCE, &peer_device->flags);
}
@@ -2149,7 +2151,7 @@ static void handle_congestion(struct drbd_peer_device *peer_device)
else if (on_congestion == OC_DISCONNECT)
__change_cstate(peer_device->connection, C_DISCONNECTING);
}
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "congestion");
}
rcu_read_unlock();
@@ -2163,7 +2165,8 @@ static void handle_congestion(struct drbd_peer_device *peer_device)
* This function might bring you directly into one of the
* C_PAUSED_SYNC_* states.
*/
-void drbd_start_resync(struct drbd_peer_device *peer_device, enum drbd_repl_state side)
+void drbd_start_resync(struct drbd_peer_device *peer_device, enum drbd_repl_state side,
+ const char *tag)
{
struct drbd_device *device = peer_device->device;
struct drbd_connection *connection = peer_device->connection;
@@ -2258,7 +2261,7 @@ skip_helper:
init_resync_stable_bits(peer_device);
finished_resync_pdsk = peer_device->resync_finished_pdsk;
peer_device->resync_finished_pdsk = D_UNKNOWN;
- r = end_state_change_locked(device->resource);
+ r = end_state_change_locked(device->resource, tag);
repl_state = peer_device->repl_state[NOW];
if (repl_state < L_ESTABLISHED)
@@ -2339,7 +2342,7 @@ static void go_diskless(struct drbd_device *device)
}
drbd_md_sync_if_dirty(device);
- change_disk_state(device, D_DISKLESS, CS_HARD, NULL);
+ change_disk_state(device, D_DISKLESS, CS_HARD, "go-diskless", NULL);
}
static int do_md_sync(struct drbd_device *device)
diff --git a/drbd/drbd_state.c b/drbd/drbd_state.c
index 23eab7f867aa..22cd134be962 100644
--- a/drbd/drbd_state.c
+++ b/drbd/drbd_state.c
@@ -74,8 +74,8 @@ struct change_disk_state_context {
static bool lost_contact_to_peer_data(enum drbd_disk_state *peer_disk_state);
static bool peer_returns_diskless(struct drbd_peer_device *peer_device,
enum drbd_disk_state os, enum drbd_disk_state ns);
-static void print_state_change(struct drbd_resource *resource, const char *prefix);
-static void finish_state_change(struct drbd_resource *);
+static void print_state_change(struct drbd_resource *resource, const char *prefix, const char *tag);
+static void finish_state_change(struct drbd_resource *, const char *tag);
static int w_after_state_change(struct drbd_work *w, int unused);
static enum drbd_state_rv is_valid_soft_transition(struct drbd_resource *);
static enum drbd_state_rv is_valid_transition(struct drbd_resource *resource);
@@ -774,7 +774,7 @@ static void queue_after_state_change_work(struct drbd_resource *resource,
}
static enum drbd_state_rv ___end_state_change(struct drbd_resource *resource, struct completion *done,
- enum drbd_state_rv rv)
+ enum drbd_state_rv rv, const char *tag)
{
enum chg_state_flags flags = resource->state_change_flags;
struct drbd_connection *connection;
@@ -793,14 +793,14 @@ static enum drbd_state_rv ___end_state_change(struct drbd_resource *resource, st
if (flags & CS_VERBOSE) {
drbd_err(resource, "State change failed: %s (%d)\n",
drbd_set_st_err_str(rv), rv);
- print_state_change(resource, "Failed: ");
+ print_state_change(resource, "Failed: ", tag);
}
goto out;
}
if (flags & CS_PREPARE)
goto out;
- finish_state_change(resource);
+ finish_state_change(resource, tag);
update_members(resource);
/* Check whether we are establishing a connection before applying the change. */
@@ -939,9 +939,9 @@ void begin_state_change_locked(struct drbd_resource *resource, enum chg_state_fl
__begin_state_change(resource);
}
-enum drbd_state_rv end_state_change_locked(struct drbd_resource *resource)
+enum drbd_state_rv end_state_change_locked(struct drbd_resource *resource, const char *tag)
{
- return ___end_state_change(resource, NULL, SS_SUCCESS);
+ return ___end_state_change(resource, NULL, SS_SUCCESS, tag);
}
void begin_state_change(struct drbd_resource *resource, unsigned long *irq_flags, enum chg_state_flags flags)
@@ -952,7 +952,8 @@ void begin_state_change(struct drbd_resource *resource, unsigned long *irq_flags
static enum drbd_state_rv __end_state_change(struct drbd_resource *resource,
unsigned long *irq_flags,
- enum drbd_state_rv rv)
+ enum drbd_state_rv rv,
+ const char *tag)
{
enum chg_state_flags flags = resource->state_change_flags;
struct completion __done, *done = NULL;
@@ -961,26 +962,27 @@ static enum drbd_state_rv __end_state_change(struct drbd_resource *resource,
done = &__done;
init_completion(done);
}
- rv = ___end_state_change(resource, done, rv);
+ rv = ___end_state_change(resource, done, rv, tag);
__state_change_unlock(resource, irq_flags, rv >= SS_SUCCESS ? done : NULL);
return rv;
}
-enum drbd_state_rv end_state_change(struct drbd_resource *resource, unsigned long *irq_flags)
+enum drbd_state_rv end_state_change(struct drbd_resource *resource, unsigned long *irq_flags,
+ const char *tag)
{
- return __end_state_change(resource, irq_flags, SS_SUCCESS);
+ return __end_state_change(resource, irq_flags, SS_SUCCESS, tag);
}
void abort_state_change(struct drbd_resource *resource, unsigned long *irq_flags)
{
resource->state_change_flags &= ~CS_VERBOSE;
- __end_state_change(resource, irq_flags, SS_UNKNOWN_ERROR);
+ __end_state_change(resource, irq_flags, SS_UNKNOWN_ERROR, NULL);
}
void abort_state_change_locked(struct drbd_resource *resource)
{
resource->state_change_flags &= ~CS_VERBOSE;
- ___end_state_change(resource, NULL, SS_UNKNOWN_ERROR);
+ ___end_state_change(resource, NULL, SS_UNKNOWN_ERROR, NULL);
}
static void begin_remote_state_change(struct drbd_resource *resource, unsigned long *irq_flags)
@@ -1159,15 +1161,19 @@ static int scnprintf_io_suspend_flags(char *buffer, size_t size,
return b - buffer;
}
-static void print_state_change(struct drbd_resource *resource, const char *prefix)
+static void print_state_change(struct drbd_resource *resource, const char *prefix, const char *tag)
{
char buffer[150], *b, *end = buffer + sizeof(buffer);
+ char tag_buffer[30] = "";
struct drbd_connection *connection;
struct drbd_device *device;
enum drbd_role *role = resource->role;
bool *fail_io = resource->fail_io;
int vnr;
+ if (tag)
+ scnprintf(tag_buffer, 30, " [%s]", tag);
+
b = buffer;
if (role[OLD] != role[NEW])
b += scnprintf(b, end - b, "role( %s -> %s ) ",
@@ -1186,7 +1192,7 @@ static void print_state_change(struct drbd_resource *resource, const char *prefi
fail_io[NEW] ? "yes" : "no");
if (b != buffer) {
*(b-1) = 0;
- drbd_info(resource, "%s%s\n", prefix, buffer);
+ drbd_info(resource, "%s%s%s\n", prefix, buffer, tag_buffer);
}
for_each_connection(connection, resource) {
@@ -1205,7 +1211,7 @@ static void print_state_change(struct drbd_resource *resource, const char *prefi
if (b != buffer) {
*(b-1) = 0;
- drbd_info(connection, "%s%s\n", prefix, buffer);
+ drbd_info(connection, "%s%s%s\n", prefix, buffer, tag_buffer);
}
}
@@ -1225,7 +1231,7 @@ static void print_state_change(struct drbd_resource *resource, const char *prefi
have_quorum[NEW] ? "yes" : "no");
if (b != buffer) {
*(b-1) = 0;
- drbd_info(device, "%s%s\n", prefix, buffer);
+ drbd_info(device, "%s%s%s\n", prefix, buffer, tag_buffer);
}
for_each_peer_device(peer_device, device) {
@@ -1253,7 +1259,7 @@ static void print_state_change(struct drbd_resource *resource, const char *prefi
if (b != buffer) {
*(b-1) = 0;
- drbd_info(peer_device, "%s%s\n", prefix, buffer);
+ drbd_info(peer_device, "%s%s%s\n", prefix, buffer, tag_buffer);
}
}
}
@@ -2543,7 +2549,7 @@ static bool should_try_become_up_to_date(struct drbd_device *device, enum drbd_d
/**
* finish_state_change - carry out actions triggered by a state change
*/
-static void finish_state_change(struct drbd_resource *resource)
+static void finish_state_change(struct drbd_resource *resource, const char *tag)
{
enum drbd_role *role = resource->role;
struct drbd_device *device;
@@ -2557,7 +2563,7 @@ static void finish_state_change(struct drbd_resource *resource)
bool unfreeze_io = false;
int vnr;
- print_state_change(resource, "");
+ print_state_change(resource, "", tag);
resource_suspended[OLD] = resource_is_suspended(resource, OLD);
resource_suspended[NEW] = resource_is_suspended(resource, NEW);
@@ -3025,7 +3031,7 @@ static void abw_start_sync(struct drbd_device *device,
if (rv) {
drbd_err(device, "Writing the bitmap failed not starting resync.\n");
- stable_change_repl_state(peer_device, L_ESTABLISHED, CS_VERBOSE);
+ stable_change_repl_state(peer_device, L_ESTABLISHED, CS_VERBOSE, "start-sync");
return;
}
@@ -3039,12 +3045,13 @@ static void abw_start_sync(struct drbd_device *device,
rcu_read_unlock();
if (peer_device->connection->agreed_pro_version < 110)
- stable_change_repl_state(peer_device, L_WF_SYNC_UUID, CS_VERBOSE);
+ stable_change_repl_state(peer_device, L_WF_SYNC_UUID, CS_VERBOSE,
+ "start-sync");
else
- drbd_start_resync(peer_device, L_SYNC_TARGET);
+ drbd_start_resync(peer_device, L_SYNC_TARGET, "start-sync");
break;
case L_STARTING_SYNC_S:
- drbd_start_resync(peer_device, L_SYNC_SOURCE);
+ drbd_start_resync(peer_device, L_SYNC_SOURCE, "start-sync");
break;
default:
break;
@@ -3531,7 +3538,7 @@ static void check_may_resume_io_after_fencing(struct drbd_state_change *state_ch
rcu_read_unlock();
begin_state_change(resource, &irq_flags, CS_VERBOSE);
__change_io_susp_fencing(connection, false);
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "after-fencing");
}
/* case2: The connection was established again: */
if (all_peer_disks_connected) {
@@ -3543,7 +3550,7 @@ static void check_may_resume_io_after_fencing(struct drbd_state_change *state_ch
rcu_read_unlock();
begin_state_change(resource, &irq_flags, CS_VERBOSE);
__change_io_susp_fencing(connection, false);
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "after-fencing");
}
}
@@ -4191,7 +4198,7 @@ static int w_after_state_change(struct drbd_work *w, int unused)
begin_state_change(resource, &irq_flags, CS_VERBOSE);
resource->susp_uuid[NEW] = false;
- end_state_change(resource, &irq_flags);
+ end_state_change(resource, &irq_flags, "susp-uuid");
}
if (try_become_up_to_date)
@@ -4673,7 +4680,7 @@ static void twopc_phase2(struct drbd_resource *resource,
*/
static enum drbd_state_rv
change_cluster_wide_state(bool (*change)(struct change_context *, enum change_phase),
- struct change_context *context)
+ struct change_context *context, const char *tag)
{
struct drbd_resource *resource = context->resource;
unsigned long irq_flags;
@@ -4692,11 +4699,11 @@ change_cluster_wide_state(bool (*change)(struct change_context *, enum change_ph
if (local_state_change(context->flags)) {
/* Not a cluster-wide state change. */
change(context, PH_LOCAL_COMMIT);
- return end_state_change(resource, &irq_flags);
+ return end_state_change(resource, &irq_flags, tag);
} else {
if (!change(context, PH_PREPARE)) {
/* Not a cluster-wide state change. */
- return end_state_change(resource, &irq_flags);
+ return end_state_change(resource, &irq_flags, tag);
}
rv = try_state_change(resource);
if (rv != SS_SUCCESS) {
@@ -4704,7 +4711,7 @@ change_cluster_wide_state(bool (*change)(struct change_context *, enum change_ph
/* abort_state_change(resource, &irq_flags); */
if (rv == SS_NOTHING_TO_DO)
resource->state_change_flags &= ~CS_VERBOSE;
- return __end_state_change(resource, &irq_flags, rv);
+ return __end_state_change(resource, &irq_flags, rv, tag);
}
/* Really a cluster-wide state change. */
}
@@ -4720,12 +4727,12 @@ change_cluster_wide_state(bool (*change)(struct change_context *, enum change_ph
}
if (rv >= SS_SUCCESS)
change(context, PH_84_COMMIT);
- return __end_state_change(resource, &irq_flags, rv);
+ return __end_state_change(resource, &irq_flags, rv, tag);
}
if (!expect(resource, context->flags & CS_SERIALIZE)) {
rv = SS_CW_FAILED_BY_PEER;
- return __end_state_change(resource, &irq_flags, rv);
+ return __end_state_change(resource, &irq_flags, rv, tag);
}
rcu_read_lock();
@@ -4740,7 +4747,7 @@ change_cluster_wide_state(bool (*change)(struct change_context *, enum change_ph
retry:
if (current == resource->worker.task && resource->remote_state_change)
- return __end_state_change(resource, &irq_flags, SS_CONCURRENT_ST_CHG);
+ return __end_state_change(resource, &irq_flags, SS_CONCURRENT_ST_CHG, tag);
complete_remote_state_change(resource, &irq_flags);
start_time = jiffies;
@@ -4756,7 +4763,7 @@ change_cluster_wide_state(bool (*change)(struct change_context *, enum change_ph
connection = drbd_get_connection_by_node_id(resource, context->target_node_id);
if (!connection) {
rv = SS_NEED_CONNECTION;
- return __end_state_change(resource, &irq_flags, rv);
+ return __end_state_change(resource, &irq_flags, rv, tag);
}
kref_debug_get(&connection->kref_debug, 8);
@@ -4768,7 +4775,7 @@ change_cluster_wide_state(bool (*change)(struct change_context *, enum change_ph
kref_debug_put(&connection->kref_debug, 8);
kref_put(&connection->kref, drbd_destroy_connection);
- return __end_state_change(resource, &irq_flags, rv);
+ return __end_state_change(resource, &irq_flags, rv, tag);
}
target_connection = connection;
@@ -4959,7 +4966,7 @@ change_cluster_wide_state(bool (*change)(struct change_context *, enum change_ph
clear_bit(TWOPC_STATE_CHANGE_PENDING, &resource->flags);
if (rv >= SS_SUCCESS) {
change(context, PH_COMMIT);
- rv = end_state_change(resource, &irq_flags);
+ rv = end_state_change(resource, &irq_flags, tag);
if (rv < SS_SUCCESS)
drbd_err(resource, "FATAL: Local commit of already committed %u failed! \n",
request.tid);
@@ -5276,6 +5283,7 @@ static bool do_change_role(struct change_context *context, enum change_phase pha
enum drbd_state_rv change_role(struct drbd_resource *resource,
enum drbd_role role,
enum chg_state_flags flags,
+ const char *tag,
const char **err_str)
{
struct change_context role_context = {
@@ -5310,7 +5318,7 @@ enum drbd_state_rv change_role(struct drbd_resource *resource,
}
role_context.change_local_state_last = true;
}
- rv = change_cluster_wide_state(do_change_role, &role_context);
+ rv = change_cluster_wide_state(do_change_role, &role_context, tag);
out:
if (got_state_sem)
up(&resource->state_sem);
@@ -5330,7 +5338,7 @@ enum drbd_state_rv change_io_susp_user(struct drbd_resource *resource,
begin_state_change(resource, &irq_flags, flags);
__change_io_susp_user(resource, value);
- return end_state_change(resource, &irq_flags);
+ return end_state_change(resource, &irq_flags, value ? "suspend-io" : "resume-io");
}
void __change_io_susp_no_data(struct drbd_resource *resource, bool value)
@@ -5465,7 +5473,7 @@ enum drbd_state_rv twopc_after_lost_peer(struct drbd_resource *resource,
/* The other nodes get the request for an empty state change. I.e. they
will agree to this change request. At commit time we know where to
go from the D_CONSISTENT, since we got the primary mask. */
- return change_cluster_wide_state(do_twopc_after_lost_peer, &context);
+ return change_cluster_wide_state(do_twopc_after_lost_peer, &context, "lost-peer");
}
static bool do_change_disk_state(struct change_context *context, enum change_phase phase)
@@ -5496,6 +5504,7 @@ static bool do_change_disk_state(struct change_context *context, enum change_pha
enum drbd_state_rv change_disk_state(struct drbd_device *device,
enum drbd_disk_state disk_state,
enum chg_state_flags flags,
+ const char *tag,
const char **err_str)
{
struct change_disk_state_context disk_state_context = {
@@ -5513,7 +5522,7 @@ enum drbd_state_rv change_disk_state(struct drbd_device *device,
};
return change_cluster_wide_state(do_change_disk_state,
- &disk_state_context.context);
+ &disk_state_context.context, tag);
}
void __change_cstate(struct drbd_connection *connection, enum drbd_conn_state cstate)
@@ -5683,11 +5692,11 @@ static bool do_change_cstate(struct change_context *context, enum change_phase p
* peer disks depending on the fencing policy. This cannot easily be split
* into two state changes.
*/
-enum drbd_state_rv change_cstate_es(struct drbd_connection *connection,
+enum drbd_state_rv change_cstate_tag(struct drbd_connection *connection,
enum drbd_conn_state cstate,
enum chg_state_flags flags,
- const char **err_str
- )
+ const char *tag,
+ const char **err_str)
{
struct change_cstate_context cstate_context = {
.context = {
@@ -5717,7 +5726,7 @@ enum drbd_state_rv change_cstate_es(struct drbd_connection *connection,
if (!(flags & CS_HARD))
cstate_context.context.flags |= CS_SERIALIZE;
- return change_cluster_wide_state(do_change_cstate, &cstate_context.context);
+ return change_cluster_wide_state(do_change_cstate, &cstate_context.context, tag);
}
void __change_peer_role(struct drbd_connection *connection, enum drbd_role peer_role)
@@ -5759,7 +5768,8 @@ static bool do_change_repl_state(struct change_context *context, enum change_pha
enum drbd_state_rv change_repl_state(struct drbd_peer_device *peer_device,
enum drbd_repl_state new_repl_state,
- enum chg_state_flags flags)
+ enum chg_state_flags flags,
+ const char *tag)
{
struct change_repl_context repl_context = {
.context = {
@@ -5776,15 +5786,16 @@ enum drbd_state_rv change_repl_state(struct drbd_peer_device *peer_device,
if (new_repl_state == L_WF_BITMAP_S || new_repl_state == L_VERIFY_S)
repl_context.context.change_local_state_last = true;
- return change_cluster_wide_state(do_change_repl_state, &repl_context.context);
+ return change_cluster_wide_state(do_change_repl_state, &repl_context.context, tag);
}
enum drbd_state_rv stable_change_repl_state(struct drbd_peer_device *peer_device,
enum drbd_repl_state repl_state,
- enum chg_state_flags flags)
+ enum chg_state_flags flags,
+ const char *tag)
{
return stable_state_change(peer_device->device->resource,
- change_repl_state(peer_device, repl_state, flags));
+ change_repl_state(peer_device, repl_state, flags, tag));
}
void __change_peer_disk_state(struct drbd_peer_device *peer_device, enum drbd_disk_state disk_state)
@@ -5807,14 +5818,15 @@ void __downgrade_peer_disk_states(struct drbd_connection *connection, enum drbd_
enum drbd_state_rv change_peer_disk_state(struct drbd_peer_device *peer_device,
enum drbd_disk_state disk_state,
- enum chg_state_flags flags)
+ enum chg_state_flags flags,
+ const char *tag)
{
struct drbd_resource *resource = peer_device->device->resource;
unsigned long irq_flags;
begin_state_change(resource, &irq_flags, flags);
__change_peer_disk_state(peer_device, disk_state);
- return end_state_change(resource, &irq_flags);
+ return end_state_change(resource, &irq_flags, tag);
}
void __change_resync_susp_user(struct drbd_peer_device *peer_device,
@@ -5832,7 +5844,7 @@ enum drbd_state_rv change_resync_susp_user(struct drbd_peer_device *peer_device,
begin_state_change(resource, &irq_flags, flags);
__change_resync_susp_user(peer_device, value);
- return end_state_change(resource, &irq_flags);
+ return end_state_change(resource, &irq_flags, value ? "pause-sync" : "resume-sync");
}
void __change_resync_susp_peer(struct drbd_peer_device *peer_device,
diff --git a/drbd/drbd_state.h b/drbd/drbd_state.h
index 6942f85123dd..952dea156697 100644
--- a/drbd/drbd_state.h
+++ b/drbd/drbd_state.h
@@ -56,12 +56,13 @@ extern void state_change_lock(struct drbd_resource *, unsigned long *, enum chg_
extern void state_change_unlock(struct drbd_resource *, unsigned long *);
extern void begin_state_change(struct drbd_resource *, unsigned long *, enum chg_state_flags);
-extern enum drbd_state_rv end_state_change(struct drbd_resource *, unsigned long *);
+extern enum drbd_state_rv end_state_change(struct drbd_resource *resource, unsigned long *irq_flags,
+ const char *tag);
extern void abort_state_change(struct drbd_resource *, unsigned long *);
extern void abort_state_change_locked(struct drbd_resource *resource);
extern void begin_state_change_locked(struct drbd_resource *, enum chg_state_flags);
-extern enum drbd_state_rv end_state_change_locked(struct drbd_resource *);
+extern enum drbd_state_rv end_state_change_locked(struct drbd_resource *resource, const char *tag);
extern void clear_remote_state_change(struct drbd_resource *resource);
extern void __clear_remote_state_change(struct drbd_resource *resource);
@@ -89,7 +90,11 @@ extern enum drbd_state_rv nested_twopc_request(struct drbd_resource *res, struct
extern bool drbd_twopc_between_peer_and_me(struct drbd_connection *connection);
extern bool cluster_wide_reply_ready(struct drbd_resource *);
-extern enum drbd_state_rv change_role(struct drbd_resource *, enum drbd_role, enum chg_state_flags, const char **);
+extern enum drbd_state_rv change_role(struct drbd_resource *resource,
+ enum drbd_role role,
+ enum chg_state_flags flags,
+ const char *tag,
+ const char **err_str);
extern void __change_io_susp_user(struct drbd_resource *, bool);
extern enum drbd_state_rv change_io_susp_user(struct drbd_resource *, bool, enum chg_state_flags);
@@ -99,27 +104,44 @@ extern void __change_io_susp_quorum(struct drbd_resource *, bool);
extern void __change_disk_state(struct drbd_device *, enum drbd_disk_state);
extern void __downgrade_disk_states(struct drbd_resource *, enum drbd_disk_state);
-extern enum drbd_state_rv change_disk_state(struct drbd_device *, enum drbd_disk_state, enum chg_state_flags, const char **);
+extern enum drbd_state_rv change_disk_state(struct drbd_device *device,
+ enum drbd_disk_state disk_state,
+ enum chg_state_flags flags,
+ const char *tag,
+ const char **err_str);
extern void __change_cstate(struct drbd_connection *, enum drbd_conn_state);
-extern enum drbd_state_rv change_cstate_es(struct drbd_connection *, enum drbd_conn_state, enum chg_state_flags, const char **);
+extern enum drbd_state_rv change_cstate_tag(struct drbd_connection *connection,
+ enum drbd_conn_state cstate,
+ enum chg_state_flags flags,
+ const char *tag,
+ const char **err_str);
static inline enum drbd_state_rv change_cstate(struct drbd_connection *connection,
enum drbd_conn_state cstate,
enum chg_state_flags flags)
{
- return change_cstate_es(connection, cstate, flags, NULL);
+ return change_cstate_tag(connection, cstate, flags, NULL, NULL);
}
extern void __change_peer_role(struct drbd_connection *, enum drbd_role);
extern void __change_repl_state(struct drbd_peer_device *, enum drbd_repl_state);
-extern enum drbd_state_rv change_repl_state(struct drbd_peer_device *, enum drbd_repl_state, enum chg_state_flags);
-extern enum drbd_state_rv stable_change_repl_state(struct drbd_peer_device *, enum drbd_repl_state, enum chg_state_flags);
+extern enum drbd_state_rv change_repl_state(struct drbd_peer_device *peer_device,
+ enum drbd_repl_state new_repl_state,
+ enum chg_state_flags flags,
+ const char *tag);
+extern enum drbd_state_rv stable_change_repl_state(struct drbd_peer_device *peer_device,
+ enum drbd_repl_state repl_state,
+ enum chg_state_flags flags,
+ const char *tag);
extern void __change_peer_disk_state(struct drbd_peer_device *, enum drbd_disk_state);
extern void __downgrade_peer_disk_states(struct drbd_connection *, enum drbd_disk_state);
extern void __outdate_myself(struct drbd_resource *resource);
-extern enum drbd_state_rv change_peer_disk_state(struct drbd_peer_device *, enum drbd_disk_state, enum chg_state_flags);
+extern enum drbd_state_rv change_peer_disk_state(struct drbd_peer_device *peer_device,
+ enum drbd_disk_state disk_state,
+ enum chg_state_flags flags,
+ const char *tag);
enum drbd_state_rv twopc_after_lost_peer(struct drbd_resource *resource, enum chg_state_flags);
--
2.35.3