Accepting request 371334 from home:wanghaisu:branches:network:ha-clustering:Factory

bsc#955177, fence when another node pacemaker shutdown and fate#320495. Support ha resource agent of drbd9.

OBS-URL: https://build.opensuse.org/request/show/371334
OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/drbd-utils?expand=0&rev=21
This commit is contained in:
nick wang 2016-03-14 08:13:37 +00:00 committed by Git OBS Bridge
parent cc9f1e0e59
commit 0d6d9ab1ea
6 changed files with 366 additions and 4 deletions

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2ca852d250df15089cdfe63c1bfb0aa9203bdf82c3cfd54b86108cc7135aeccd
size 789613

3
drbd-utils-8.9.6.tar.gz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:297b77c9b3f88de2e7dae459234f2753ea4fc2805282b2d276e35cf51e292913
size 788183

View File

@ -1,3 +1,25 @@
-------------------------------------------------------------------
Mon Mar 14 08:01:22 UTC 2016 - nwang@suse.com
- fate#320495. Support ha resource agent of drbd9.
Add patch support-drbd9-ra.patch
-------------------------------------------------------------------
Mon Mar 7 08:18:08 UTC 2016 - nwang@suse.com
- bsc#955177, fence when another node pacemaker shutdown.
add patch fence-after-pacemaker-down.patch
-------------------------------------------------------------------
Mon Mar 7 08:13:43 UTC 2016 - nwang@suse.com
- Update to v8.9.6
* Call "drbdsetup resize" only as often as necessary on
"drbdadm resize"
* Disconnect connection first on single path deletion from connection
* Add unfence-peer handler
* Fix "drbdadm adjust" for proxy configurations
-------------------------------------------------------------------
Tue Feb 23 05:19:07 UTC 2016 - nwang@suse.com

View File

@ -17,7 +17,7 @@
Name: drbd-utils
Version: 8.9.5
Version: 8.9.6
Release: 0
Summary: Distributed Replicated Block Device
License: GPL-2.0+
@ -30,6 +30,8 @@ Patch1: init-script-fixes.diff
Patch2: fix-libdir-in-Makefile.patch
Patch3: zeroout-discard-devices.patch
Patch4: service-mod.patch
Patch5: fence-after-pacemaker-down.patch
Patch6: support-drbd9-ra.patch
Provides: drbd-bash-completion = %{version}
Provides: drbd-pacemaker = %{version}
@ -75,6 +77,8 @@ raid 1. It is a building block for setting up clusters.
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%build
./autogen.sh

View File

@ -0,0 +1,29 @@
diff -Naur drbd-utils-8.9.6.orig/scripts/crm-fence-peer.sh drbd-utils-8.9.6/scripts/crm-fence-peer.sh
--- drbd-utils-8.9.6.orig/scripts/crm-fence-peer.sh 2016-03-14 15:54:38.701360775 +0800
+++ drbd-utils-8.9.6/scripts/crm-fence-peer.sh 2016-03-14 16:00:06.333338664 +0800
@@ -531,6 +531,25 @@
[[ $crmd = "banned" ]] && will_fence=true
if [[ ${expected-down} = "down" && $in_ccm = "false" && $crmd != "online" ]]; then
: "pacemaker considers this as clean down"
+ elif [[ $crmd/$join/$expected = "offline/down/down" ]] ; then
+ # Check if pacemaker is simply shutdown, but membership/quorum is
+ # possibly still established (corosync2/cman)
+ # 1.1.11 will set expected="down" on a clean shutdown too
+ # Look for "shutdown" transient node attribute
+ local node_attributes=$(set +x; echo "$cib_xml" |
+ awk "/<node_state [^\n]*uname=\"$DRBD_PEER\"/,/<\/instance_attributes>/"|
+ grep -F -e "<nvpair ")
+ if [ -n "${node_attributes}" ] ; then
+ local shut_down=$(set +x; echo "$node_attributes" |
+ awk '/ name="shutdown"/ {if (match($0, /value=\"([[:digit:]]+)\"/, values)) {print values[1]} }')
+ if [ -n "${shut_down}" ] ; then
+ : "pacemaker considers this as clean down"
+ else
+ will_fence=true
+ fi
+ else
+ will_fence=true
+ fi
elif [[ $in_ccm = false ]] || [[ $crmd != "online" ]]; then
will_fence=true
fi

307
support-drbd9-ra.patch Normal file
View File

@ -0,0 +1,307 @@
diff --git a/scripts/drbd.ocf b/scripts/drbd.ocf
index 632e16e..91990fc 100755
--- a/scripts/drbd.ocf
+++ b/scripts/drbd.ocf
@@ -328,6 +328,23 @@ remove_master_score() {
do_cmd ${HA_SBIN_DIR}/crm_master -l reboot -D
}
+_peer_node_process() {
+ # _since drbd9 support multiple connections
+ : ${_peer_node_id:=0}
+ DRBD_PER_NAME[$_peer_node_id]=$_conn_name
+ DRBD_PER_ID[$_peer_node_id]=$_peer_node_id
+ DRBD_PER_CSTATE[$_peer_node_id]=$_cstate
+ DRBD_PER_ROLE_REMOTE[$_peer_node_id]=${_peer:-Unknown}
+ DRBD_PER_DSTATE_REMOTE[$_peer_node_id]=${_pdsk:-DUnknown}
+
+ : == DEBUG == _peer_node_id == ${_peer_node_id} ==
+ : == DEBUG == DRBD_PER_NAME[_peer_node_id] == ${DRBD_PER_NAME[${_peer_node_id}]} ==
+ : == DEBUG == DRBD_PER_ID[_peer_node_id] == ${DRBD_PER_ID[${_peer_node_id}]} ==
+ : == DEBUG == DRBD_PER_CSTATE[_peer_node_id] == ${DRBD_PER_CSTATE[${_peer_node_id}]} ==
+ : == DEBUG == DRBD_PER_ROLE_REMOTE[_peer_node_id] == ${DRBD_PER_ROLE_REMOTE[${_peer_node_id}]} ==
+ : == DEBUG == DRBD_PER_DSTATE_REMOTE[_peer_node_id] == ${DRBD_PER_DSTATE_REMOTE[${_peer_node_id}]} ==
+}
+
_sh_status_process() {
# _volume not present should not happen,
# but may help make this agent work even if it talks to drbd 8.3.
@@ -335,11 +352,36 @@ _sh_status_process() {
# not-yet-created volumes are reported as -1
(( _volume >= 0 )) || _volume=$[1 << 16]
DRBD_ROLE_LOCAL[$_volume]=${_role:-Unconfigured}
- DRBD_ROLE_REMOTE[$_volume]=${_peer:-Unknown}
- DRBD_CSTATE[$_volume]=$_cstate
DRBD_DSTATE_LOCAL[$_volume]=${_disk:-Unconfigured}
- DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown}
+
+ if $DRBD_VERSION_9 ; then
+ #Get from _peer_node_process
+ DRBD_NAME[$_volume]=${DRBD_PER_NAME[@]}
+ DRBD_ID[$_volume]=${DRBD_PER_ID[@]}
+ DRBD_VOLUME[$_volume]=${_volume}
+ DRBD_CSTATE[$_volume]=${DRBD_PER_CSTATE[@]}
+ DRBD_ROLE_REMOTE[$_volume]=${DRBD_PER_ROLE_REMOTE[@]}
+ DRBD_DSTATE_REMOTE[$_volume]=${DRBD_PER_DSTATE_REMOTE[@]}
+
+ DRBD_PER_NAME=()
+ DRBD_PER_ID=()
+ DRBD_PER_CSTATE=()
+ DRBD_PER_ROLE_REMOTE=()
+ DRBD_PER_DSTATE_REMOTE=()
+
+ : == DEBUG == _volume == ${_volume} ==
+ : == DEBUG == DRBD_ROLE_LOCAL == ${DRBD_ROLE_LOCAL[${_volume}]} ==
+ : == DEBUG == DRBD_DSTATE_LOCAL == ${DRBD_DSTATE_LOCAL[${_volume}]} ==
+ : == DEBUG == DRBD_CSTATE == ${DRBD_CSTATE[${_volume}]} ==
+ : == DEBUG == DRBD_ROLE_REMOTE == ${DRBD_ROLE_REMOTE[${_volume}]} ==
+ : == DEBUG == DRBD_DSTATE_REMOTE == ${DRBD_DSTATE_REMOTE[${_volume}]} ==
+ else
+ DRBD_CSTATE[$_volume]=$_cstate
+ DRBD_ROLE_REMOTE[$_volume]=${_peer:-Unknown}
+ DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown}
+ fi
}
+
drbd_set_status_variables() {
# drbdsetup sh-status prints these values to stdout,
# and then prints _sh_status_process.
@@ -352,6 +394,15 @@ drbd_set_status_variables() {
local _resynced_percent
local out
+ if $DRBD_VERSION_9 ; then
+ local _peer_node_id _conn_name
+ DRBD_PER_NAME=()
+ DRBD_PER_ID=()
+ DRBD_PER_CSTATE=()
+ DRBD_PER_ROLE_REMOTE=()
+ DRBD_PER_DSTATE_REMOTE=()
+ fi
+
DRBD_ROLE_LOCAL=()
DRBD_ROLE_REMOTE=()
DRBD_CSTATE=()
@@ -369,16 +420,20 @@ drbd_set_status_variables() {
# if there was no output at all, or a weird output
# make sure the status arrays won't be empty.
[[ ${#DRBD_ROLE_LOCAL[@]} != 0 ]] || DRBD_ROLE_LOCAL=(Unconfigured)
- [[ ${#DRBD_ROLE_REMOTE[@]} != 0 ]] || DRBD_ROLE_REMOTE=(Unknown)
- [[ ${#DRBD_CSTATE[@]} != 0 ]] || DRBD_CSTATE=(Unconfigured)
[[ ${#DRBD_DSTATE_LOCAL[@]} != 0 ]] || DRBD_DSTATE_LOCAL=(Unconfigured)
+ [[ ${#DRBD_CSTATE[@]} != 0 ]] || DRBD_CSTATE=(Unconfigured)
+ [[ ${#DRBD_ROLE_REMOTE[@]} != 0 ]] || DRBD_ROLE_REMOTE=(Unknown)
[[ ${#DRBD_DSTATE_REMOTE[@]} != 0 ]] || DRBD_DSTATE_REMOTE=(DUnknown)
-
+ if $DRBD_VERSION_9 ; then
+ : == DEBUG == DRBD_NAME == ${DRBD_NAME[@]} ==
+ : == DEBUG == DRBD_ID == ${DRBD_ID[@]} ==
+ : == DEBUG == DRBD_VOLUME == ${DRBD_VOLUME[@]} ==
+ fi
: == DEBUG == DRBD_ROLE_LOCAL == ${DRBD_ROLE_LOCAL[@]} ==
- : == DEBUG == DRBD_ROLE_REMOTE == ${DRBD_ROLE_REMOTE[@]} ==
- : == DEBUG == DRBD_CSTATE == ${DRBD_CSTATE[@]} ==
: == DEBUG == DRBD_DSTATE_LOCAL == ${DRBD_DSTATE_LOCAL[@]} ==
+ : == DEBUG == DRBD_CSTATE == ${DRBD_CSTATE[@]} ==
+ : == DEBUG == DRBD_ROLE_REMOTE == ${DRBD_ROLE_REMOTE[@]} ==
: == DEBUG == DRBD_DSTATE_REMOTE == ${DRBD_DSTATE_REMOTE[@]} ==
}
@@ -414,6 +469,9 @@ maybe_outdate_self()
ocf_is_true $OCF_RESKEY_stop_outdates_secondary || return 1
local host stop_uname
+ if $DRBD_VERSION_9 ; then
+ local master temp_nmber outdate_self
+ fi
# We ignore $OCF_RESKEY_CRM_meta_notify_promote_uname here
# because: if demote and promote for a _stacked_ resource
# (or a "floating" one, where DRBD sits on top of some SAN)
@@ -437,6 +495,29 @@ maybe_outdate_self()
return 1
done
+ if $DRBD_VERSION_9 ; then
+ temp_name=($DRBD_NAME[@])
+ temp_dstate=($DRBD_DSTATE_REMOTE[@])
+ temp_number=${#temp_name[@]}
+ outdate_self=false
+
+ for master in $OCF_RESKEY_CRM_meta_notify_master_uname; do
+ for i in `seq 0 $((temp_number-1))`; do
+ if [[ ${temp_name[$i]} == "$master" ]] &&
+ [[ ${temp_dstate[$i]} == "DUnknown" ]]; then
+ outdate_self=true
+ break
+ fi
+ done
+ temp_number=${#temp_name[@]}
+ done
+
+ if ! $outdate_self; then
+ #The disconnecting node is not in Primary
+ return 1
+ fi
+ fi
+
# e.g. post/promote of some other peer.
# Should not happen, fencing constraints should take care of that.
# But in case it does, scream out loud.
@@ -993,6 +1074,7 @@ drbd_validate_all () {
DRBDADM="drbdadm"
DRBDSETUP="drbdsetup"
DRBD_HAS_MULTI_VOLUME=false
+ DRBD_VERSION_9=false
# these will _exit_ if they don't find the binaries
check_binary $DRBDADM
@@ -1015,18 +1097,23 @@ drbd_validate_all () {
modinfo -F version drbd |
sed -ne 's/^\([0-9]\+\)\.\([0-9]\+\)\.\([0-9]\+\).*$/\1 \2 \3/p'))
fi
- if (( $DRBD_KERNEL_VERSION_CODE >= 0x080400 )); then
+ if (( $DRBD_KERNEL_VERSION_CODE >= 0x090000 )); then
+ DRBD_HAS_MULTI_VOLUME=true
+ DRBD_VERSION_9=true
+ ocf_log warn "RA for DRBD version 9 is in experiment, do not using multiple primaries in DRBD9.0"
+ elif (( $DRBD_KERNEL_VERSION_CODE >= 0x080400 )); then
DRBD_HAS_MULTI_VOLUME=true
- elif (( $DRBD_KERNEL_VERSION_CODE >= 0x090000 )) ; then
- ocf_log err "This resource agent does (still) only support DRBD version 8.x"
- exit $OCF_ERR_INSTALLED
fi
check_crm_feature_set
# Check clone and M/S options.
- meta_expect clone-max -le 2
+ # Drbd9 support more than two nodes
+ if ! $DRBD_VERSION_9 ; then
+ meta_expect clone-max -le 2
+ fi
meta_expect clone-node-max = 1
meta_expect master-node-max = 1
+ # With current DRBD-9.0 version more than two primaries at the same time is not support.
meta_expect master-max -le 2
# Rather than returning $OCF_ERR_CONFIGURED, we sometimes return
@@ -1080,7 +1167,8 @@ drbd_validate_all () {
# DRBD_DEVICES will be a shell array!
# FIXME we should double check that we explicitly restrict the set of
# valid characters in device names...
- if DRBD_DEVICES=($($DRBDADM --stacked sh-dev $DRBD_RESOURCE 2>/dev/null)); then
+ # In DRBD9, no matter stacked or not "$DRBDADM --stacked sh-dev $DRBD_RESOURCE" will return true
+ if ! $DRBD_VERSION_9 && DRBD_DEVICES=($($DRBDADM --stacked sh-dev $DRBD_RESOURCE 2>/dev/null)); then
# apparently a "stacked" resource. Remember for future DRBDADM calls.
DRBDADM="$DRBDADM -S"
elif DRBD_DEVICES=($($DRBDADM sh-dev $DRBD_RESOURCE 2>/dev/null)); then
diff --git a/user/v9/drbdsetup.c b/user/v9/drbdsetup.c
index 053b9d3..fba72e1 100644
--- a/user/v9/drbdsetup.c
+++ b/user/v9/drbdsetup.c
@@ -251,6 +251,7 @@ static int del_resource_cmd(struct drbd_cmd *cm, int argc, char **argv);
static int show_cmd(struct drbd_cmd *cm, int argc, char **argv);
static int status_cmd(struct drbd_cmd *cm, int argc, char **argv);
static int role_cmd(struct drbd_cmd *cm, int argc, char **argv);
+static int sh_status_9compat_cmd(struct drbd_cmd *cm, int argc, char **argv);
static int cstate_cmd(struct drbd_cmd *cm, int argc, char **argv);
static int dstate_cmd(struct drbd_cmd *cm, int argc, char **argv);
static int check_resize_cmd(struct drbd_cmd *cm, int argc, char **argv);
@@ -478,6 +479,9 @@ struct drbd_cmd commands[] = {
{"role", CTX_RESOURCE, 0, NO_PAYLOAD, role_cmd,
.lockless = true,
.summary = "Show the current role of a resource." },
+ {"sh-status", CTX_RESOURCE | CTX_ALL, 0, 0, sh_status_9compat_cmd,
+ .lockless = true,
+ .summary = "Show all status of resource." },
{"cstate", CTX_PEER_NODE, 0, NO_PAYLOAD, cstate_cmd,
.lockless = true,
.summary = "Show the current state of a connection." },
@@ -2576,6 +2580,87 @@ static int role_cmd(struct drbd_cmd *cm, int argc, char **argv)
return 0;
}
+
+static int sh_status_9compat_cmd(struct drbd_cmd *cm, int argc, char **argv)
+{
+
+ struct resources_list *resources_list, *resource;
+ char *old_objname = objname;
+
+ resources_list = sort_resources(list_resources());
+
+ for (resource = resources_list; resource; resource = resource->next) {
+ struct devices_list *devices, *device;
+ struct connections_list *connections, *connection;
+ struct peer_devices_list *peer_devices = NULL, *peer_device;
+ struct nlattr *nla;
+
+ if (strcmp(old_objname, "all") && strcmp(old_objname, resource->name))
+ continue;
+
+ objname = resource->name;
+ printI("_res_name=%s\n", objname);
+
+ nla = nla_find_nested(resource->res_opts, __nla_type(T_node_id));
+ if (nla)
+ printI("_node_id=%d\n\n", *(uint32_t *)nla_data(nla));
+ else
+ printI("_node_id=\n\n");
+
+ devices = list_devices(resource->name);
+ connections = sort_connections(list_connections(resource->name));
+ if (devices && connections)
+ peer_devices = list_peer_devices(resource->name);
+
+ link_peer_devices_to_devices(peer_devices, devices);
+
+ for (device = devices; device; device = device->next) {
+ ++indent;
+ printI("_minor=%d\n", device->minor);
+ printI("_volume=%d\n", device->ctx.ctx_volume);
+ //refer to v84
+ //printI("_known=%s\n", xxx);
+ printI("_role=%s\n", drbd_role_str(resource->info.res_role));
+ printI("_disk=%s\n\n", drbd_disk_str(device->info.dev_disk_state));
+
+ for (connection = connections; connection; connection = connection->next) {
+ ++indent;
+ for (peer_device = peer_devices; peer_device; peer_device = peer_device->next) {
+ if (connection->ctx.ctx_peer_node_id != peer_device->ctx.ctx_peer_node_id
+ || device->ctx.ctx_volume != peer_device->ctx.ctx_volume)
+ continue;
+ printI("_conn_name=%s\n", connection->ctx.ctx_conn_name);
+ printI("_peer_node_id=%d\n", connection->ctx.ctx_peer_node_id);
+ printI("_cstate=%s\n", drbd_conn_str(connection->info.conn_connection_state));
+ if (connection->info.conn_connection_state == C_CONNECTED) {
+ printI("_peer=%s\n", drbd_role_str(connection->info.conn_role));
+ printI("_pdsk=%s\n\n", drbd_disk_str(peer_device->info.peer_disk_state));
+ } else {
+ printI("_peer=\n");
+ printI("_pdsk=\n");
+ }
+ wrap_printf(0, "_peer_node_process\n\n");
+ }
+ //Dummy
+ //printI("_flags_susp==%s\n", xxx);
+ //...
+ --indent;
+ }
+
+ wrap_printf(0, "_sh_status_process\n\n");
+ --indent;
+ }
+
+ free_connections(connections);
+ free_devices(devices);
+ free_peer_devices(peer_devices);
+ }
+
+ free(resources_list);
+ objname = old_objname;
+ return 0;
+}
+
static int cstate_cmd(struct drbd_cmd *cm, int argc, char **argv)
{
struct connections_list *connections, *connection;