SHA256
1
0
forked from pool/drbd
drbd/0001-drbd-properly-rate-limit-resync-progress-reports.patch
heming zhao 6ee9ba5898 - Update DRBD version from 9.1.22 to 9.1.23 (boo#1234849)
* Changelog from Linbit:
    9.1.23 (api:genl2/proto:86-101,118-121/transport:18)
    --------
     * Fix a corner case that can happen when DRBD establishes multiple
       connections in parallel, which could lead one connection to end up in
       an inconsistent replication state of WFBitMapT/Established
     * Fix a corner case in which a reconciliation resync ends up in
       WFBitMapT/Established
     * Restrict protocol compatibility to the most recent 8.4 and 9.0 releases
     * Fix a corner case causing a module ref leak on drbd_transport_tcp;
       if it hits, you can not rmmod it
     * rate-limit resync progress while resync is paused
     * resync-target inherits history UUIDs when resync finishes,
       this can prevent unexpected "unrelared data" events later
     * Updated compatibility code for Linux 6.11 and 6.12
  * remove patches which already included in the new version:
     0001-drbd-properly-rate-limit-resync-progress-reports.patch
     0002-drbd-inherit-history-UUIDs-from-sync-source-when-res.patch
     0003-build-compat-fix-line-offset-in-annotation-pragmas-p.patch
     0004-drbd-fix-exposed_uuid-going-backward.patch
     0005-drbd-Proper-locking-around-new_current_uuid-on-a-dis.patch
     0006-build-CycloneDX-fix-bom-ref-add-purl.patch
     0007-build-Another-update-to-the-spdx-files.patch
     0008-build-generate-spdx.json-not-tag-value-format.patch
     0009-compat-fix-gen_patch_names-for-bdev_file_open_by_pat.patch
     0010-compat-fix-nla_nest_start_noflag-test.patch
     0011-compat-fix-blk_alloc_disk-rule.patch
     0012-drbd-remove-const-from-function-return-type.patch
     0013-drbd-don-t-set-max_write_zeroes_sectors-in-decide_on.patch
     0014-drbd-split-out-a-drbd_discard_supported-helper.patch
     0015-drbd-atomically-update-queue-limits-in-drbd_reconsid.patch
     0016-compat-test-and-patch-for-queue_limits_start_update.patch
     0017-compat-specify-which-essential-change-was-not-made.patch
     0018-gen_patch_names-reorder-blk_mode_t.patch
     0019-compat-fix-blk_queue_update_readahead-patch.patch
     0020-compat-test-and-patch-for-que_limits-max_hw_discard_.patch
     0021-compat-fixup-write_zeroes__no_capable.patch
     0022-compat-fixup-queue_flag_discard__yes_present.patch
     0023-drbd-move-flags-to-queue_limits.patch
     0024-compat-test-and-patch-for-queue_limits.features.patch
     0025-drbd-Annotate-struct-fifo_buffer-with-__counted_by.patch
     0026-compat-test-and-patch-for-__counted_by.patch
     0027-drbd-fix-function-cast-warnings-in-state-machine.patch
     0028-Add-missing-documentation-of-peer_device-parameter-t.patch
     0030-drbd-kref_put-path-when-kernel_accept-fails.patch
     0031-build-fix-typo-in-Makefile.spatch.patch
     0032-drbd-open-do-not-delay-open-if-already-Primary.patch
  * removed patch which is not needed anymore:
     boo1231290_fix_drbd_build_error_against_kernel_v6.11.0.patch
     boo1233222_fix_drbd_build_error_against_kernel_v6.11.6.patch
  * update:
     drbd_git_revision
     drbd.spec
  * add upstream patches to align commit d64ebe7eb7df:
     0001-drbd-Fix-memory-leak.patch

OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/drbd?expand=0&rev=155
2024-12-27 03:43:25 +00:00

120 lines
4.8 KiB
Diff

From aab03bfc73a62f95011316545a5c0fbb4817741b Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 14 Aug 2024 11:49:42 +0200
Subject: [PATCH 01/32] drbd: properly rate-limit resync progress reports
A peer_device in "paused" sync would have flooded the "drbd events2"
generic netlink broadcast with "resync progress reports",
if it cleared significant out-of-sync bits,
as is the case with application writes,
or several peers syncing from the same sync source
and having a "paused sync" replication state between themselves.
If you have "many" such resources, this storm may even overflow receive buffers.
At most one progress report every three seconds should be enough,
and is what was intended.
Use a new "last progress report time stamp" to throttle
advancing resync progress marks and progress report broadcasts.
---
drbd/drbd_actlog.c | 35 +++++++++++++++++++++++------------
drbd/drbd_int.h | 1 +
drbd/drbd_receiver.c | 1 +
drbd/drbd_state.c | 2 ++
4 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/drbd/drbd_actlog.c b/drbd/drbd_actlog.c
index b96560843878..646dcb29e1d9 100644
--- a/drbd/drbd_actlog.c
+++ b/drbd/drbd_actlog.c
@@ -1020,19 +1020,30 @@ static bool update_rs_extent(struct drbd_peer_device *peer_device,
void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go)
{
- unsigned long now = jiffies;
- unsigned long last = peer_device->rs_mark_time[peer_device->rs_last_mark];
- int next = (peer_device->rs_last_mark + 1) % DRBD_SYNC_MARKS;
- if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
- if (peer_device->rs_mark_left[peer_device->rs_last_mark] != still_to_go &&
- peer_device->repl_state[NOW] != L_PAUSED_SYNC_T &&
- peer_device->repl_state[NOW] != L_PAUSED_SYNC_S) {
- peer_device->rs_mark_time[next] = now;
- peer_device->rs_mark_left[next] = still_to_go;
- peer_device->rs_last_mark = next;
- }
- drbd_peer_device_post_work(peer_device, RS_PROGRESS);
+ unsigned long now;
+ int next;
+
+ /* report progress and advance marks only if we made progress */
+ if (peer_device->rs_mark_left[peer_device->rs_last_mark] == still_to_go)
+ return;
+
+ /* report progress and advance marks at most once every DRBD_SYNC_MARK_STEP (3 seconds) */
+ now = jiffies;
+ if (!time_after_eq(now, peer_device->rs_last_progress_report_ts + DRBD_SYNC_MARK_STEP))
+ return;
+
+ /* Do not advance marks if we are "paused" */
+ if (peer_device->repl_state[NOW] != L_PAUSED_SYNC_T &&
+ peer_device->repl_state[NOW] != L_PAUSED_SYNC_S) {
+ next = (peer_device->rs_last_mark + 1) % DRBD_SYNC_MARKS;
+ peer_device->rs_mark_time[next] = now;
+ peer_device->rs_mark_left[next] = still_to_go;
+ peer_device->rs_last_mark = next;
}
+
+ /* But still report progress even if paused. */
+ peer_device->rs_last_progress_report_ts = now;
+ drbd_peer_device_post_work(peer_device, RS_PROGRESS);
}
/* It is called lazy update, so don't do write-out too often. */
diff --git a/drbd/drbd_int.h b/drbd/drbd_int.h
index 49bd7b0c407c..c18407899f59 100644
--- a/drbd/drbd_int.h
+++ b/drbd/drbd_int.h
@@ -1285,6 +1285,7 @@ struct drbd_peer_device {
unsigned long rs_paused;
/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
unsigned long rs_same_csum;
+ unsigned long rs_last_progress_report_ts;
#define DRBD_SYNC_MARKS 8
#define DRBD_SYNC_MARK_STEP (3*HZ)
/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c
index 19634f6423bd..ee54cf3ac116 100644
--- a/drbd/drbd_receiver.c
+++ b/drbd/drbd_receiver.c
@@ -3409,6 +3409,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
peer_device->ov_skipped = 0;
peer_device->rs_total = ov_left;
peer_device->rs_last_writeout = now;
+ peer_device->rs_last_progress_report_ts = now;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
peer_device->rs_mark_left[i] = ov_left;
peer_device->rs_mark_time[i] = now;
diff --git a/drbd/drbd_state.c b/drbd/drbd_state.c
index be1de8f0653b..44f55ee5c939 100644
--- a/drbd/drbd_state.c
+++ b/drbd/drbd_state.c
@@ -2483,6 +2483,7 @@ static void initialize_resync_progress_marks(struct drbd_peer_device *peer_devic
unsigned long now = jiffies;
int i;
+ peer_device->rs_last_progress_report_ts = now;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
peer_device->rs_mark_left[i] = tw;
peer_device->rs_mark_time[i] = now;
@@ -2730,6 +2731,7 @@ static void finish_state_change(struct drbd_resource *resource, const char *tag)
peer_device->ov_last_skipped_size = 0;
peer_device->ov_last_skipped_start = 0;
peer_device->rs_last_writeout = now;
+ peer_device->rs_last_progress_report_ts = now;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
peer_device->rs_mark_left[i] = peer_device->rs_total;
peer_device->rs_mark_time[i] = now;
--
2.35.3