From e15ba3799caf4cdcdf151078efe01e9a3f3ab206 Mon Sep 17 00:00:00 2001 From: Nick Wang Date: Fri, 10 Jul 2015 17:28:18 +0800 Subject: [PATCH] drbd: Support zeroout device instead of initial full sync Patch set for zeroing out device on both side instead of initial full sync. Useful for high latency network environment. Implement --zeroout-devices and --discard-devices for new-current-uuid --- drbd/drbd_int.h | 15 +++++++ drbd/drbd_main.c | 60 +++++++++++++++++++++++++++- drbd/drbd_nl.c | 41 +++++++++++++++++-- drbd/drbd_protocol.h | 2 + drbd/drbd_receiver.c | 86 +++++++++++++++++++++++++++++++++++++++- drbd/drbd_worker.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ drbd/linux/drbd_genl.h | 2 + 7 files changed, 305 insertions(+), 6 deletions(-) diff --git a/drbd/drbd_int.h b/drbd/drbd_int.h index d1e2bc0..555b24c 100644 --- a/drbd/drbd_int.h +++ b/drbd/drbd_int.h @@ -621,6 +621,13 @@ enum { RS_START, /* tell worker to start resync/OV */ RS_PROGRESS, /* tell worker that resync made significant progress */ RS_DONE, /* tell worker that resync is done */ + P_ZERO_START, /* tell worker to zero out device */ + S_ZERO_START, /* tell worker to zero out device as requested*/ + + /* used for zero out/discard device */ + DISCARD_DISK, /* flag to discard device */ + ZERO_DONE, /* succeed on zero out a device */ + ZERO_FAIL, /* fail to zero out a device */ }; struct drbd_bitmap; /* opaque for drbd_device */ @@ -1204,6 +1211,11 @@ extern int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_pa extern int drbd_send_protocol(struct drbd_connection *connection); extern int drbd_send_uuids(struct drbd_peer_device *); extern int drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *); +extern int drbd_send_zeroout_start(struct drbd_peer_device *); +extern int drbd_send_discard_start(struct drbd_peer_device *); +extern int drbd_send_zeroout_finish(struct drbd_peer_device *); +extern int drbd_send_zeroout_ok(struct drbd_peer_device *); +extern int drbd_send_zeroout_fail(struct drbd_peer_device *); extern void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *); extern int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags); extern int drbd_send_state(struct drbd_peer_device *, union drbd_state); @@ -1661,6 +1673,9 @@ extern void drbd_send_acks_wf(struct work_struct *ws); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, bool throttle_if_app_is_waiting); +extern int zeroout_local_device(struct drbd_device *device, bool discard); +extern void require_zeroout_local_device(struct drbd_device *device); +extern void receive_zeroout_local_device(struct drbd_device *device); extern int drbd_submit_peer_request(struct drbd_device *, struct drbd_peer_request *, const unsigned, const int); diff --git a/drbd/drbd_main.c b/drbd/drbd_main.c index 31bf43f..badc719 100644 --- a/drbd/drbd_main.c +++ b/drbd/drbd_main.c @@ -908,6 +908,62 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *peer_device) return _drbd_send_uuids(peer_device, 8); } + +/** + * drbd_send_zeroout_start() - Notify peer node to zeroout device + * @peer_device: DRBD peer device. + */ +int drbd_send_zeroout_start(struct drbd_peer_device *peer_device) +{ + return _drbd_send_uuids(peer_device, 16); +} + +/** + * drbd_send_discard_start() - Notify peer node to discard device + * @peer_device: DRBD peer device. + */ +int drbd_send_discard_start(struct drbd_peer_device *peer_device) +{ + return _drbd_send_uuids(peer_device, 32); +} + +/** + * drbd_send_zeroout_finish() - Notify both node finished zeroing out + * @peer_device: DRBD peer device. + */ +int drbd_send_zeroout_finish(struct drbd_peer_device *peer_device) +{ + return _drbd_send_uuids(peer_device, 64); +} + +/** + * _drbd_send_zeroout_state() - Sends the drbd state to the peer + * @peer_device: DRBD peer device. + * @state: Device zero out status. + */ +static int _drbd_send_zeroout_state(struct drbd_peer_device *peer_device, unsigned int status) +{ + struct drbd_socket *sock; + struct p_state *p; + + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); + if (!p) + return -EIO; + p->state = cpu_to_be32(status); + return drbd_send_command(peer_device, sock, P_ZERO_OUT, sizeof(*p), NULL, 0); +} + +int drbd_send_zeroout_ok(struct drbd_peer_device *peer_device) +{ + return _drbd_send_zeroout_state(peer_device, 0); +} + +int drbd_send_zeroout_fail(struct drbd_peer_device *peer_device) +{ + return _drbd_send_zeroout_state(peer_device, 1); +} + void drbd_print_uuids(struct drbd_device *device, const char *text) { if (get_ldev_if_state(device, D_NEGOTIATING)) { @@ -3466,8 +3522,8 @@ void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local) { int i; - for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) - device->ldev->md.uuid[i+1] = device->ldev->md.uuid[i]; + for (i = UI_HISTORY_END; i > UI_HISTORY_START; i--) + device->ldev->md.uuid[i] = device->ldev->md.uuid[i-1]; } void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local) diff --git a/drbd/drbd_nl.c b/drbd/drbd_nl.c index bb7e1b0..dc2bfec 100644 --- a/drbd/drbd_nl.c +++ b/drbd/drbd_nl.c @@ -4015,8 +4015,11 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; struct drbd_device *device; + struct drbd_peer_device *peer_device = NULL; enum drbd_ret_code retcode; int skip_initial_sync = 0; + int zeroout_devices = 0; + int discard_devices = 0; int err; struct new_c_uuid_parms args; @@ -4045,12 +4048,28 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) goto out; } + peer_device = first_peer_device(device); + /* this is "skip initial sync", assume to be clean */ if (device->state.conn == C_CONNECTED && - first_peer_device(device)->connection->agreed_pro_version >= 90 && + peer_device->connection->agreed_pro_version >= 90 && device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { drbd_info(device, "Preparing to skip initial sync\n"); skip_initial_sync = 1; + /* this is "zero out/discard" devices to make it all zero. + * ignore "zero out" if both "clear_bm" and "zeroout_devices/discard_devices" set. */ + } else if (device->state.conn == C_CONNECTED && + peer_device->connection->agreed_features & FF_DISCARD && + device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + args.zeroout_devices) { + drbd_info(device, "Preparing to zero out devices, will take a long time\n"); + zeroout_devices = 1; + } else if (device->state.conn == C_CONNECTED && + peer_device->connection->agreed_features & FF_DISCARD && + device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + args.discard_devices) { + drbd_info(device, "Preparing to discard devices, will take a long time\n"); + discard_devices = 1; } else if (device->state.conn != C_STANDALONE) { retcode = ERR_CONNECTED; goto out_dec; @@ -4059,7 +4078,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */ - if (args.clear_bm) { + if (args.clear_bm || args.zeroout_devices || args.discard_devices) { err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid", BM_LOCKED_MASK); if (err) { @@ -4067,7 +4086,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) retcode = ERR_IO_MD_DISK; } if (skip_initial_sync) { - drbd_send_uuids_skip_initial_sync(first_peer_device(device)); + drbd_send_uuids_skip_initial_sync(peer_device); _drbd_uuid_set(device, UI_BITMAP, 0); drbd_print_uuids(device, "cleared bitmap UUID"); spin_lock_irq(&device->resource->req_lock); @@ -4075,6 +4094,22 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) CS_VERBOSE, NULL); spin_unlock_irq(&device->resource->req_lock); } + if (zeroout_devices || discard_devices) { + if (discard_devices) { + drbd_send_discard_start(peer_device); + set_bit(DISCARD_DISK, &device->flags); + } else { + drbd_send_zeroout_start(peer_device); + clear_bit(DISCARD_DISK, &device->flags); + } + _drbd_uuid_set(device, UI_BITMAP, 0); + drbd_print_uuids(device, "cleared bitmap UUID for zeroing device"); + + /* CLear bit flag of zero out */ + clear_bit(ZERO_DONE, &device->flags); + clear_bit(ZERO_FAIL, &device->flags); + drbd_device_post_work(device, P_ZERO_START); + } } drbd_md_sync(device); diff --git a/drbd/drbd_protocol.h b/drbd/drbd_protocol.h index 405b181..d9db0ce 100644 --- a/drbd/drbd_protocol.h +++ b/drbd/drbd_protocol.h @@ -59,6 +59,7 @@ enum drbd_packet { /* REQ_DISCARD. We used "discard" in different contexts before, * which is why I chose TRIM here, to disambiguate. */ P_TRIM = 0x31, + P_ZERO_OUT = 0x32, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, @@ -161,6 +162,7 @@ struct p_block_req { */ #define FF_TRIM 1 +#define FF_DISCARD 4 struct p_connection_features { u32 protocol_min; diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c index 06e5667..ec324ef 100644 --- a/drbd/drbd_receiver.c +++ b/drbd/drbd_receiver.c @@ -47,7 +47,7 @@ #include "drbd_vli.h" #include -#define PRO_FEATURES (FF_TRIM) +#define PRO_FEATURES (FF_TRIM | FF_DISCARD) struct flush_work { struct drbd_work w; @@ -4317,6 +4317,20 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info peer_device->connection->agreed_pro_version >= 90 && device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && (p_uuid[UI_FLAGS] & 8); + int zeroout_devices = + device->state.conn == C_CONNECTED && + peer_device->connection->agreed_pro_version >= 90 && + device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + (p_uuid[UI_FLAGS] & 16); + int discard_devices = + device->state.conn == C_CONNECTED && + peer_device->connection->agreed_pro_version >= 90 && + device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + (p_uuid[UI_FLAGS] & 32); + int zeroout_finish = + device->state.conn == C_CONNECTED && + peer_device->connection->agreed_pro_version >= 90 && + (p_uuid[UI_FLAGS] & 64); if (skip_initial_sync) { drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); drbd_bitmap_io(device, &drbd_bmio_clear_n_write, @@ -4324,11 +4338,42 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info BM_LOCKED_TEST_ALLOWED); _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); _drbd_uuid_set(device, UI_BITMAP, 0); + spin_lock_irq(&device->resource->req_lock); _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); + spin_unlock_irq(&device->resource->req_lock); drbd_md_sync(device); updated_uuids = 1; } + + if (zeroout_devices || discard_devices) { + if (discard_devices) { + drbd_info(device, "Accepted to discard devices, will take a long time\n"); + set_bit(DISCARD_DISK, &device->flags); + } else { + drbd_info(device, "Accepted to zeroout devices, will take a long time\n"); + clear_bit(DISCARD_DISK, &device->flags); + } + + drbd_bitmap_io(device, &drbd_bmio_clear_n_write, + "clear_n_write from receive_uuids", + BM_LOCKED_TEST_ALLOWED); + _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); + _drbd_uuid_set(device, UI_BITMAP, 0); + drbd_print_uuids(device, "cleared bitmap UUID for zeroing device"); + + drbd_device_post_work(device, S_ZERO_START); + updated_uuids = 1; + } + + if (zeroout_finish) { + drbd_info(device, "Both side finished zero out devices.\n"); + spin_lock_irq(&device->resource->req_lock); + _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), + CS_VERBOSE, NULL); + spin_unlock_irq(&device->resource->req_lock); + } + put_ldev(device); } else if (device->state.disk < D_INCONSISTENT && device->state.role == R_PRIMARY) { @@ -4383,6 +4428,41 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } +static int receive_zeroout_state(struct drbd_connection *connection, struct packet_info *pi) +{ + struct drbd_peer_device *peer_device; + struct drbd_device *device; + struct p_state *p = pi->data; + unsigned int isfail; + + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) + return -EIO; + device = peer_device->device; + + isfail = be32_to_cpu(p->state); + + if (isfail) { + drbd_info(device, "Failed to zero out peer device\n"); + set_bit(ZERO_FAIL, &device->flags); + } else { + drbd_info(device, "Finished zero out peer device\n"); + if (test_and_clear_bit(ZERO_DONE, &device->flags)) { + drbd_info(device, "Both side finished zeroing.\n"); + spin_lock_irq(&device->resource->req_lock); + _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, + pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); + spin_unlock_irq(&device->resource->req_lock); + drbd_send_zeroout_finish(peer_device); + } else { + /* waiting for local device finished */ + set_bit(ZERO_DONE, &device->flags); + } + } + + return 0; +} + static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi) { struct drbd_peer_device *peer_device; @@ -5008,6 +5088,7 @@ static struct data_cmd drbd_cmd_handler[] = { [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, + [P_ZERO_OUT] = { 0, sizeof(struct p_state), receive_zeroout_state }, }; static void drbdd(struct drbd_connection *connection) @@ -5287,6 +5368,9 @@ static int drbd_do_features(struct drbd_connection *connection) drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n", connection->agreed_features & FF_TRIM ? " " : " not "); + drbd_info(connection, "Agreed to%ssupport DISCARD DEVICE on protocol level\n", + connection->agreed_features & FF_DISCARD ? " " : " not "); + return 1; incompat: diff --git a/drbd/drbd_worker.c b/drbd/drbd_worker.c index 2a15aeb..a8e231f 100644 --- a/drbd/drbd_worker.c +++ b/drbd/drbd_worker.c @@ -1653,6 +1653,105 @@ void drbd_rs_controller_reset(struct drbd_device *device) rcu_read_unlock(); } +/** + * zeroout_local_device() + * @device: DRBD device. + * @discard: whether to discard the block range. + * + * Description: + * Zero out drbd backing device when creating new uuid. + * +**/ +int zeroout_local_device(struct drbd_device *device, bool discard) +{ + struct block_device *bdev; + + bdev = device->ldev->backing_bdev; + if (device->ldev->known_size != drbd_get_capacity(bdev)) + device->ldev->known_size = drbd_get_capacity(bdev); + + if (discard){ + /* zero out the backing device by discarding blocks */ + return blkdev_issue_zeroout(bdev, 0, + device->ldev->known_size, GFP_NOIO, true); + } else { + /* zero out the backing device with WRITE call*/ + return blkdev_issue_zeroout(bdev, 0, + device->ldev->known_size, GFP_NOIO, false); + } +} + +/** + * require_zeroout_local_device() + * @device: DRBD device. + * + * Description: + * Start to zero out local device. Update + * status if peer node (secondary) finished + * zeroing. + * +**/ +void require_zeroout_local_device(struct drbd_device *device) +{ + int zeroout_err = 0; + + if (test_and_clear_bit(DISCARD_DISK, &device->flags)) { + zeroout_err = zeroout_local_device(device, true); + } else { + zeroout_err = zeroout_local_device(device, false); + } + + if (zeroout_err) { + drbd_err(device, "Failed to zero out local device\n"); + set_bit(ZERO_FAIL, &device->flags); + drbd_chk_io_error(device, 1, DRBD_WRITE_ERROR); + } else { + drbd_info(device, "Finished zero out local device.\n"); + + if (test_and_clear_bit(ZERO_DONE, &device->flags)) { + spin_lock_irq(&device->resource->req_lock); + _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, + pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); + spin_unlock_irq(&device->resource->req_lock); + drbd_send_zeroout_finish(first_peer_device(device)); + } else if (test_and_clear_bit(ZERO_FAIL, &device->flags)) { + drbd_info(device, "Peer device has already failed on zero out\n"); + } else { + /* waiting for peer device finished */ + set_bit(ZERO_DONE, &device->flags); + } + } +} + +/** + * receive_zeroout_local_device() + * @device: DRBD device. + * + * Description: + * Start to zero out local device. + * Notify peer node the zeroing result. + * +**/ +void receive_zeroout_local_device(struct drbd_device *device) +{ + int zeroout_err = 0; + struct drbd_peer_device *const peer_device = first_peer_device(device); + + if (test_and_clear_bit(DISCARD_DISK, &device->flags)) { + zeroout_err = zeroout_local_device(device, true); + } else { + zeroout_err = zeroout_local_device(device, false); + } + if (zeroout_err) { + drbd_err(device, "Failed to zero out local device\n"); + drbd_send_zeroout_fail(peer_device); + drbd_chk_io_error(device, 1, DRBD_WRITE_ERROR); + } else { + drbd_info(device, "Finished zero out local device.\n"); + drbd_send_zeroout_ok(peer_device); + } +} + void start_resync_timer_fn(unsigned long data) { struct drbd_device *device = (struct drbd_device *) data; @@ -1986,6 +2085,10 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo) drbd_ldev_destroy(device); if (test_bit(RS_START, &todo)) do_start_resync(device); + if (test_bit(P_ZERO_START, &todo)) + require_zeroout_local_device(device); + if (test_bit(S_ZERO_START, &todo)) + receive_zeroout_local_device(device); } #define DRBD_DEVICE_WORK_MASK \ @@ -1995,6 +2098,8 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo) |(1UL << RS_START) \ |(1UL << RS_PROGRESS) \ |(1UL << RS_DONE) \ + |(1UL << P_ZERO_START) \ + |(1UL << S_ZERO_START) \ ) static unsigned long get_work_bits(unsigned long *flags) diff --git a/drbd/linux/drbd_genl.h b/drbd/linux/drbd_genl.h index 5db53f5..5a31a5c 100644 --- a/drbd/linux/drbd_genl.h +++ b/drbd/linux/drbd_genl.h @@ -240,6 +240,8 @@ GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, __flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm) + __flg_field(2, 0, zeroout_devices) + __flg_field(3, 0, discard_devices) ) GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, -- 2.1.4