From 6649b328a4ab7d0419a1ca594c3bc8247f0c075d0565302bfc2a47a41bec56dd Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Mon, 8 Jan 2018 02:59:01 +0000 Subject: [PATCH 1/2] Accepting request 562378 from home:BinLiu:branches:network:ha-clustering:Factory - issue with partial packets assembly when multiple nodes are sending big packets(bsc#1074929) Added: 0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch OBS-URL: https://build.opensuse.org/request/show/562378 OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/corosync?expand=0&rev=130 --- ...sue-with-partial-big-packet-assembly.patch | 179 ++++++++++++++++++ corosync-2.4.3.tar.gz | 4 +- corosync.changes | 6 + corosync.spec | 2 + 4 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch diff --git a/0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch b/0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch new file mode 100644 index 0000000..c4b2838 --- /dev/null +++ b/0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch @@ -0,0 +1,179 @@ +From 86579ff5f8f8d93e3173731bdf632827d4d1c711 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Rytis=20Karpu=C5=A1ka?= +Date: Thu, 28 Dec 2017 15:17:12 +0200 +Subject: [PATCH 1/2] libcpg: Fix issue with partial big packet assembly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Packet assembly is done seperately for each nodeid, pid pair, +therefore multiple packets are not mixed into single buffer. + +(backported from master c9dd11772cd6660d7651b6781df963efa914652e) + +Signed-off-by: Rytis Karpuška +Reviewed-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + lib/cpg.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++------------- + 1 file changed, 76 insertions(+), 20 deletions(-) + +diff --git a/lib/cpg.c b/lib/cpg.c +index c831390b..a0e662f0 100644 +--- a/lib/cpg.c ++++ b/lib/cpg.c +@@ -80,6 +80,15 @@ + */ + #define CPG_MEMORY_MAP_UMASK 077 + ++struct cpg_assembly_data ++{ ++ struct list_head list; ++ uint32_t nodeid; ++ uint32_t pid; ++ char *assembly_buf; ++ uint32_t assembly_buf_ptr; ++}; ++ + struct cpg_inst { + qb_ipcc_connection_t *c; + int finalize; +@@ -89,14 +98,8 @@ struct cpg_inst { + cpg_model_v1_data_t model_v1_data; + }; + struct list_head iteration_list_head; +- uint32_t max_msg_size; +- char *assembly_buf; +- uint32_t assembly_buf_ptr; +- int assembling; /* Flag that says we have started assembling a message. +- * It's here to catch the situation where a node joins +- * the cluster/group in the middle of a CPG message send +- * so we don't pass on a partial message to the client. +- */ ++ uint32_t max_msg_size; ++ struct list_head assembly_list_head; + }; + static void cpg_inst_free (void *inst); + +@@ -231,6 +234,8 @@ cs_error_t cpg_model_initialize ( + + list_init(&cpg_inst->iteration_list_head); + ++ list_init(&cpg_inst->assembly_list_head); ++ + hdb_handle_put (&cpg_handle_t_db, *handle); + + return (CS_OK); +@@ -382,6 +387,8 @@ cs_error_t cpg_dispatch ( + struct cpg_address left_list[CPG_MEMBERS_MAX]; + struct cpg_address joined_list[CPG_MEMBERS_MAX]; + struct cpg_name group_name; ++ struct cpg_assembly_data *assembly_data; ++ struct list_head *iter, *tmp_iter; + mar_cpg_address_t *left_list_start; + mar_cpg_address_t *joined_list_start; + unsigned int i; +@@ -471,32 +478,63 @@ cs_error_t cpg_dispatch ( + &group_name, + &res_cpg_partial_deliver_callback->group_name); + ++ /* ++ * Search for assembly data for current messages (nodeid, pid) pair in list of assemblies ++ */ ++ assembly_data = NULL; ++ for (iter = cpg_inst->assembly_list_head.next; iter != &cpg_inst->assembly_list_head; iter = iter->next) { ++ struct cpg_assembly_data *current_assembly_data = list_entry (iter, struct cpg_assembly_data, list); ++ if (current_assembly_data->nodeid == res_cpg_partial_deliver_callback->nodeid && current_assembly_data->pid == res_cpg_partial_deliver_callback->pid) { ++ assembly_data = current_assembly_data; ++ break; ++ } ++ } ++ + if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_FIRST) { ++ + /* +- * Allocate a buffer to contain a full message. ++ * As this is LIBCPG_PARTIAL_FIRST packet, check that there is no ongoing assembly + */ +- cpg_inst->assembly_buf = malloc(res_cpg_partial_deliver_callback->msglen); +- if (!cpg_inst->assembly_buf) { ++ if (assembly_data) { ++ error = CS_ERR_MESSAGE_ERROR; ++ goto error_put; ++ } ++ ++ assembly_data = malloc(sizeof(struct cpg_assembly_data)); ++ if (!assembly_data) { + error = CS_ERR_NO_MEMORY; + goto error_put; + } +- cpg_inst->assembling = 1; +- cpg_inst->assembly_buf_ptr = 0; ++ ++ assembly_data->nodeid = res_cpg_partial_deliver_callback->nodeid; ++ assembly_data->pid = res_cpg_partial_deliver_callback->pid; ++ assembly_data->assembly_buf = malloc(res_cpg_partial_deliver_callback->msglen); ++ if (!assembly_data->assembly_buf) { ++ free(assembly_data); ++ error = CS_ERR_NO_MEMORY; ++ goto error_put; ++ } ++ assembly_data->assembly_buf_ptr = 0; ++ list_init (&assembly_data->list); ++ ++ list_add (&assembly_data->list, &cpg_inst->assembly_list_head); + } +- if (cpg_inst->assembling) { +- memcpy(cpg_inst->assembly_buf + cpg_inst->assembly_buf_ptr, +- res_cpg_partial_deliver_callback->message, res_cpg_partial_deliver_callback->fraglen); +- cpg_inst->assembly_buf_ptr += res_cpg_partial_deliver_callback->fraglen; ++ if (assembly_data) { ++ memcpy(assembly_data->assembly_buf + assembly_data->assembly_buf_ptr, ++ res_cpg_partial_deliver_callback->message, res_cpg_partial_deliver_callback->fraglen); ++ assembly_data->assembly_buf_ptr += res_cpg_partial_deliver_callback->fraglen; + + if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_LAST) { + cpg_inst_copy.model_v1_data.cpg_deliver_fn (handle, + &group_name, + res_cpg_partial_deliver_callback->nodeid, + res_cpg_partial_deliver_callback->pid, +- cpg_inst->assembly_buf, ++ assembly_data->assembly_buf, + res_cpg_partial_deliver_callback->msglen); +- free(cpg_inst->assembly_buf); +- cpg_inst->assembling = 0; ++ ++ list_del (&assembly_data->list); ++ free(assembly_data->assembly_buf); ++ free(assembly_data); + } + } + break; +@@ -538,6 +576,24 @@ cs_error_t cpg_dispatch ( + joined_list, + res_cpg_confchg_callback->joined_list_entries); + ++ /* ++ * If member left while his partial packet was being assembled, assembly data must be removed from list ++ */ ++ for (i = 0; i < res_cpg_confchg_callback->left_list_entries; i++) { ++ for (iter = cpg_inst->assembly_list_head.next; iter != &cpg_inst->assembly_list_head;iter = tmp_iter) { ++ struct cpg_assembly_data *current_assembly_data = list_entry (iter, struct cpg_assembly_data, list); ++ ++ tmp_iter = iter->next; ++ ++ if (current_assembly_data->nodeid != left_list[i].nodeid || current_assembly_data->pid != left_list[i].pid) ++ continue; ++ ++ list_del (¤t_assembly_data->list); ++ free(current_assembly_data->assembly_buf); ++ free(current_assembly_data); ++ } ++ } ++ + break; + case MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK: + if (cpg_inst_copy.model_v1_data.cpg_totem_confchg_fn == NULL) { +-- +2.13.6 + diff --git a/corosync-2.4.3.tar.gz b/corosync-2.4.3.tar.gz index 0da37ad..531d0fe 100644 --- a/corosync-2.4.3.tar.gz +++ b/corosync-2.4.3.tar.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b1e7e422e8d2f7af8e5315fa329b1dbd1175fff32efaa55e8054584c80a2c597 -size 1195419 +oid sha256:2752c8707f95e510fd0a4de47b55f0055cd3e16be8f5bfcef0ef8d2e85b84b82 +size 578510 diff --git a/corosync.changes b/corosync.changes index 9490db3..2e335bd 100644 --- a/corosync.changes +++ b/corosync.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Thu Jan 4 04:36:41 UTC 2018 - bliu@suse.com + +- issue with partial packets assembly when multiple nodes are sending big packets(bsc#1074929) + Added: 0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch + ------------------------------------------------------------------- Tue Jan 2 06:15:29 UTC 2018 - bliu@suse.com diff --git a/corosync.spec b/corosync.spec index 0378e64..b08e967 100644 --- a/corosync.spec +++ b/corosync.spec @@ -72,6 +72,7 @@ Patch14: 0007-sync-Call-sync_init-of-all-services-at-once.patch Patch15: 0008-wd-fix-snprintf-warnings.patch Patch16: 0009-add-config-for-corosync-qnetd.patch Patch17: 0010-qdevice-mv-free-str-after-port-validation.patch +Patch18: 0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build # openais is indeed gone and should be uninstalled. Yes, we do not @@ -154,6 +155,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build %patch15 -p1 %patch16 -p1 %patch17 -p1 +%patch18 -p1 %build %if %{with runautogen} From 1cb59d80950297bae580f56daf1391fbf91175dd6a86cdbf1da9fc93c241a6ce Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 10 Jan 2018 09:59:41 +0000 Subject: [PATCH 2/2] Accepting request 563223 from home:BinLiu:branches:network:ha-clustering:Factory - totemudp[u]: Drop truncated packets on receive(bsc#1075300) Added: 0012-totemudp-u-Drop-truncated-packets-on-receive.patch OBS-URL: https://build.opensuse.org/request/show/563223 OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/corosync?expand=0&rev=131 --- ...-u-Drop-truncated-packets-on-receive.patch | 109 ++++++++++++++++++ corosync.changes | 6 + corosync.spec | 2 + 3 files changed, 117 insertions(+) create mode 100644 0012-totemudp-u-Drop-truncated-packets-on-receive.patch diff --git a/0012-totemudp-u-Drop-truncated-packets-on-receive.patch b/0012-totemudp-u-Drop-truncated-packets-on-receive.patch new file mode 100644 index 0000000..5843f52 --- /dev/null +++ b/0012-totemudp-u-Drop-truncated-packets-on-receive.patch @@ -0,0 +1,109 @@ +From 89d36a80a4899cb9536ad22cb16615c9eff043de Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Fri, 5 Jan 2018 16:38:43 +0100 +Subject: [PATCH 1/2] totemudp[u]: Drop truncated packets on receive + +This is backport of part of "totemudpu: Scale receive buffer" patch in +master branch. + +We shouldn't not need to enlarge buffer because maximum number of nodes +for needle is 2 so join message is smaller. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/totemudp.c | 26 ++++++++++++++++++++++++++ + exec/totemudpu.c | 26 ++++++++++++++++++++++++++ + 2 files changed, 52 insertions(+) + +diff --git a/exec/totemudp.c b/exec/totemudp.c +index 2f36b5d9..40e99f93 100644 +--- a/exec/totemudp.c ++++ b/exec/totemudp.c +@@ -452,6 +452,7 @@ static int net_deliver_fn ( + struct sockaddr_storage system_from; + int bytes_received; + int res = 0; ++ int truncated_packet; + + if (instance->flushing == 1) { + iovec = &instance->totemudp_iov_recv_flush; +@@ -489,6 +490,31 @@ static int net_deliver_fn ( + instance->stats_recv += bytes_received; + } + ++ truncated_packet = 0; ++ ++#ifdef HAVE_MSGHDR_FLAGS ++ if (msg_recv.msg_flags & MSG_TRUNC) { ++ truncated_packet = 1; ++ } ++#else ++ /* ++ * We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that ++ * if bytes_received == FRAME_SIZE_MAX then packet is truncated ++ */ ++ if (bytes_received == FRAME_SIZE_MAX) { ++ truncated_packet = 1; ++ } ++#endif ++ ++ if (truncated_packet) { ++ log_printf(instance->totemudp_log_level_error, ++ "Received too big message. This may be because something bad is happening" ++ "on the network (attack?), or you tried join more nodes than corosync is" ++ "compiled with (%u) or bug in the code (bad estimation of " ++ "the FRAME_SIZE_MAX). Dropping packet.", PROCESSOR_COUNT_MAX); ++ return (0); ++ } ++ + /* + * Authenticate and if authenticated, decrypt datagram + */ +diff --git a/exec/totemudpu.c b/exec/totemudpu.c +index 9e076423..569e67a0 100644 +--- a/exec/totemudpu.c ++++ b/exec/totemudpu.c +@@ -446,6 +446,7 @@ static int net_deliver_fn ( + struct sockaddr_storage system_from; + int bytes_received; + int res = 0; ++ int truncated_packet; + + iovec = &instance->totemudpu_iov_recv; + +@@ -479,6 +480,31 @@ static int net_deliver_fn ( + instance->stats_recv += bytes_received; + } + ++ truncated_packet = 0; ++ ++#ifdef HAVE_MSGHDR_FLAGS ++ if (msg_recv.msg_flags & MSG_TRUNC) { ++ truncated_packet = 1; ++ } ++#else ++ /* ++ * We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that ++ * if bytes_received == FRAME_SIZE_MAX then packet is truncated ++ */ ++ if (bytes_received == FRAME_SIZE_MAX) { ++ truncated_packet = 1; ++ } ++#endif ++ ++ if (truncated_packet) { ++ log_printf(instance->totemudpu_log_level_error, ++ "Received too big message. This may be because something bad is happening" ++ "on the network (attack?), or you tried join more nodes than corosync is" ++ "compiled with (%u) or bug in the code (bad estimation of " ++ "the FRAME_SIZE_MAX). Dropping packet.", PROCESSOR_COUNT_MAX); ++ return (0); ++ } ++ + /* + * Authenticate and if authenticated, decrypt datagram + */ +-- +2.13.6 + diff --git a/corosync.changes b/corosync.changes index 2e335bd..ef175a4 100644 --- a/corosync.changes +++ b/corosync.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Wed Jan 10 02:36:24 UTC 2018 - bliu@suse.com + +- totemudp[u]: Drop truncated packets on receive(bsc#1075300) + Added: 0012-totemudp-u-Drop-truncated-packets-on-receive.patch + ------------------------------------------------------------------- Thu Jan 4 04:36:41 UTC 2018 - bliu@suse.com diff --git a/corosync.spec b/corosync.spec index b08e967..1206049 100644 --- a/corosync.spec +++ b/corosync.spec @@ -73,6 +73,7 @@ Patch15: 0008-wd-fix-snprintf-warnings.patch Patch16: 0009-add-config-for-corosync-qnetd.patch Patch17: 0010-qdevice-mv-free-str-after-port-validation.patch Patch18: 0011-libcpg-Fix-issue-with-partial-big-packet-assembly.patch +Patch19: 0012-totemudp-u-Drop-truncated-packets-on-receive.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build # openais is indeed gone and should be uninstalled. Yes, we do not @@ -156,6 +157,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build %patch16 -p1 %patch17 -p1 %patch18 -p1 +%patch19 -p1 %build %if %{with runautogen}