1358 lines
36 KiB
Diff
1358 lines
36 KiB
Diff
|
From 7a8b7b4adde5c31c79aee8b0792cd8369652afc6 Mon Sep 17 00:00:00 2001
|
||
|
From: Leo Yan <leo.yan@linaro.org>
|
||
|
Date: Fri, 7 May 2021 10:25:12 +0800
|
||
|
Subject: [PATCH 01/33] lvmlockd: idm: Introduce new locking scheme
|
||
|
|
||
|
Alongside the existed locking schemes of DLM and sanlock, this patch is
|
||
|
to introduce new locking scheme: In-Drive-Mutex (IDM).
|
||
|
|
||
|
With the IDM support in the drive, the locks are resident in the drive,
|
||
|
thus, the locking lease is maintained in a central place: the drive
|
||
|
firmware. We can consider this is a typical client-server model,
|
||
|
every host (or node) in the server cluster launches the request for
|
||
|
leasing mutex to a drive firmware, the drive firmware works as an
|
||
|
arbitrator to grant the mutex to a requester and it can reject other
|
||
|
applicants if the mutex has been acquired. To satisfy the LVM
|
||
|
activation for different modes, IDM supports two locking modes:
|
||
|
exclusive and shareable.
|
||
|
|
||
|
Every IDM is identified with two IDs, one is the host ID and another is
|
||
|
the resource ID. The resource ID is a unique identifier for what the
|
||
|
resource it's protected, in the integration with lvmlockd, the resource
|
||
|
ID is combined with VG's UUID and LV's UUID; for the global locking,
|
||
|
the bytes in resource ID are all zeros, and for the VG locking, the
|
||
|
LV's UUID is set as zero. Every host can generate a random UUID and
|
||
|
use it as the host ID for the SCSI command, this ID is used to clarify
|
||
|
the ownership for mutex.
|
||
|
|
||
|
For easily invoking the IDM commands to drive, like other locking
|
||
|
scheme (e.g. sanlock), a daemon program named IDM lock manager is
|
||
|
created, so the detailed IDM SCSI commands are encapsulated in the
|
||
|
daemon, and lvmlockd uses the wrapper APIs to communicate with the
|
||
|
daemon program.
|
||
|
|
||
|
This patch introduces the IDM locking wrapper layer, it forwards the
|
||
|
locking requests from lvmlockd to the IDM lock manager, and returns the
|
||
|
result from drives' responding.
|
||
|
|
||
|
One thing should be mentioned is the IDM's LVB. IDM supports LVB to max
|
||
|
7 bytes when stores into the drive, the most significant byte of 8 bytes
|
||
|
is reserved for control bits. For this reason, the patch maps the
|
||
|
timestamp in macrosecond unit with its cached LVB, essentially, if any
|
||
|
timestamp was updated by other nodes, that means the local LVB is
|
||
|
invalidate. When the timestamp is stored into drive's LVB, it's
|
||
|
possbile to cause time-going-backwards issue, which is introduced by the
|
||
|
time precision or missing synchronization acrossing over multiple nodes.
|
||
|
So the IDM wrapper fixes up the timestamp by increment 1 to the latest
|
||
|
value and write back into drive.
|
||
|
|
||
|
Currently LVB is used to track VG changes and its purpose is to notify
|
||
|
lvmetad cache invalidation when detects any metadata has been altered;
|
||
|
but lvmetad is not used anymore for caching metadata, LVB doesn't
|
||
|
really work. It's possible that the LVB functionality could be useful
|
||
|
again in the future, so let's enable it for IDM in the first place.
|
||
|
|
||
|
Signed-off-by: Leo Yan <leo.yan@linaro.org>
|
||
|
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
|
||
|
---
|
||
|
configure | 173 ++++++++
|
||
|
configure.ac | 20 +
|
||
|
daemons/lvmlockd/Makefile.in | 5 +
|
||
|
daemons/lvmlockd/lvmlockd-idm.c | 837 +++++++++++++++++++++++++++++++++++
|
||
|
daemons/lvmlockd/lvmlockd-internal.h | 108 +++++
|
||
|
5 files changed, 1143 insertions(+)
|
||
|
create mode 100644 daemons/lvmlockd/lvmlockd-idm.c
|
||
|
|
||
|
diff --git a/configure b/configure
|
||
|
index 7c6bd48d25cc..e2299ee9180e 100755
|
||
|
--- a/configure
|
||
|
+++ b/configure
|
||
|
@@ -747,6 +747,7 @@ BUILD_DMFILEMAPD
|
||
|
BUILD_LOCKDDLM_CONTROL
|
||
|
BUILD_LOCKDDLM
|
||
|
BUILD_LOCKDSANLOCK
|
||
|
+BUILD_LOCKDIDM
|
||
|
BUILD_LVMLOCKD
|
||
|
BUILD_LVMPOLLD
|
||
|
BUILD_LVMDBUSD
|
||
|
@@ -782,6 +783,8 @@ LOCKD_DLM_LIBS
|
||
|
LOCKD_DLM_CFLAGS
|
||
|
LOCKD_SANLOCK_LIBS
|
||
|
LOCKD_SANLOCK_CFLAGS
|
||
|
+LOCKD_IDM_LIBS
|
||
|
+LOCKD_IDM_CFLAGS
|
||
|
VALGRIND_LIBS
|
||
|
VALGRIND_CFLAGS
|
||
|
GENPNG
|
||
|
@@ -946,6 +949,7 @@ enable_lvmpolld
|
||
|
enable_lvmlockd_sanlock
|
||
|
enable_lvmlockd_dlm
|
||
|
enable_lvmlockd_dlmcontrol
|
||
|
+enable_lvmlockd_idm
|
||
|
enable_use_lvmlockd
|
||
|
with_lvmlockd_pidfile
|
||
|
enable_use_lvmpolld
|
||
|
@@ -1019,6 +1023,8 @@ LOCKD_DLM_CFLAGS
|
||
|
LOCKD_DLM_LIBS
|
||
|
LOCKD_DLM_CONTROL_CFLAGS
|
||
|
LOCKD_DLM_CONTROL_LIBS
|
||
|
+LOCKD_IDM_CFLAGS
|
||
|
+LOCKD_IDM_LIBS
|
||
|
NOTIFY_DBUS_CFLAGS
|
||
|
NOTIFY_DBUS_LIBS
|
||
|
BLKID_CFLAGS
|
||
|
@@ -1678,6 +1684,7 @@ Optional Features:
|
||
|
--enable-lvmlockd-dlm enable the LVM lock daemon using dlm
|
||
|
--enable-lvmlockd-dlmcontrol
|
||
|
enable lvmlockd remote refresh using libdlmcontrol
|
||
|
+ --enable-lvmlockd-idm enable the LVM lock daemon using idm
|
||
|
--disable-use-lvmlockd disable usage of LVM lock daemon
|
||
|
--disable-use-lvmpolld disable usage of LVM Poll Daemon
|
||
|
--enable-dmfilemapd enable the dmstats filemap daemon
|
||
|
@@ -1832,6 +1839,10 @@ Some influential environment variables:
|
||
|
C compiler flags for LOCKD_DLM_CONTROL, overriding pkg-config
|
||
|
LOCKD_DLM_CONTROL_LIBS
|
||
|
linker flags for LOCKD_DLM_CONTROL, overriding pkg-config
|
||
|
+ LOCKD_IDM_CFLAGS
|
||
|
+ C compiler flags for LOCKD_IDM, overriding pkg-config
|
||
|
+ LOCKD_IDM_LIBS
|
||
|
+ linker flags for LOCKD_IDM, overriding pkg-config
|
||
|
NOTIFY_DBUS_CFLAGS
|
||
|
C compiler flags for NOTIFY_DBUS, overriding pkg-config
|
||
|
NOTIFY_DBUS_LIBS
|
||
|
@@ -3124,6 +3135,7 @@ case "$host_os" in
|
||
|
LOCKDSANLOCK=no
|
||
|
LOCKDDLM=no
|
||
|
LOCKDDLM_CONTROL=no
|
||
|
+ LOCKDIDM=no
|
||
|
ODIRECT=yes
|
||
|
DM_IOCTLS=yes
|
||
|
SELINUX=yes
|
||
|
@@ -11192,6 +11204,167 @@ $as_echo "#define LOCKDDLM_CONTROL_SUPPORT 1" >>confdefs.h
|
||
|
fi
|
||
|
|
||
|
################################################################################
|
||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lvmlockdidm" >&5
|
||
|
+$as_echo_n "checking whether to build lvmlockdidm... " >&6; }
|
||
|
+# Check whether --enable-lvmlockd-idm was given.
|
||
|
+if test "${enable_lvmlockd_idm+set}" = set; then :
|
||
|
+ enableval=$enable_lvmlockd_idm; LOCKDIDM=$enableval
|
||
|
+fi
|
||
|
+
|
||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $LOCKDIDM" >&5
|
||
|
+$as_echo "$LOCKDIDM" >&6; }
|
||
|
+
|
||
|
+BUILD_LOCKDIDM=$LOCKDIDM
|
||
|
+
|
||
|
+if test "$BUILD_LOCKDIDM" = yes; then
|
||
|
+
|
||
|
+pkg_failed=no
|
||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LOCKD_IDM" >&5
|
||
|
+$as_echo_n "checking for LOCKD_IDM... " >&6; }
|
||
|
+
|
||
|
+if test -n "$LOCKD_IDM_CFLAGS"; then
|
||
|
+ pkg_cv_LOCKD_IDM_CFLAGS="$LOCKD_IDM_CFLAGS"
|
||
|
+ elif test -n "$PKG_CONFIG"; then
|
||
|
+ if test -n "$PKG_CONFIG" && \
|
||
|
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libseagate_ilm >= 0.1.0\""; } >&5
|
||
|
+ ($PKG_CONFIG --exists --print-errors "libseagate_ilm >= 0.1.0") 2>&5
|
||
|
+ ac_status=$?
|
||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||
|
+ test $ac_status = 0; }; then
|
||
|
+ pkg_cv_LOCKD_IDM_CFLAGS=`$PKG_CONFIG --cflags "libseagate_ilm >= 0.1.0" 2>/dev/null`
|
||
|
+ test "x$?" != "x0" && pkg_failed=yes
|
||
|
+else
|
||
|
+ pkg_failed=yes
|
||
|
+fi
|
||
|
+ else
|
||
|
+ pkg_failed=untried
|
||
|
+fi
|
||
|
+if test -n "$LOCKD_IDM_LIBS"; then
|
||
|
+ pkg_cv_LOCKD_IDM_LIBS="$LOCKD_IDM_LIBS"
|
||
|
+ elif test -n "$PKG_CONFIG"; then
|
||
|
+ if test -n "$PKG_CONFIG" && \
|
||
|
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libseagate_ilm >= 0.1.0\""; } >&5
|
||
|
+ ($PKG_CONFIG --exists --print-errors "libseagate_ilm >= 0.1.0") 2>&5
|
||
|
+ ac_status=$?
|
||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||
|
+ test $ac_status = 0; }; then
|
||
|
+ pkg_cv_LOCKD_IDM_LIBS=`$PKG_CONFIG --libs "libseagate_ilm >= 0.1.0" 2>/dev/null`
|
||
|
+ test "x$?" != "x0" && pkg_failed=yes
|
||
|
+else
|
||
|
+ pkg_failed=yes
|
||
|
+fi
|
||
|
+ else
|
||
|
+ pkg_failed=untried
|
||
|
+fi
|
||
|
+
|
||
|
+
|
||
|
+
|
||
|
+if test $pkg_failed = yes; then
|
||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||
|
+$as_echo "no" >&6; }
|
||
|
+
|
||
|
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
|
||
|
+ _pkg_short_errors_supported=yes
|
||
|
+else
|
||
|
+ _pkg_short_errors_supported=no
|
||
|
+fi
|
||
|
+ if test $_pkg_short_errors_supported = yes; then
|
||
|
+ LOCKD_IDM_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libseagate_ilm >= 0.1.0" 2>&1`
|
||
|
+ else
|
||
|
+ LOCKD_IDM_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libseagate_ilm >= 0.1.0" 2>&1`
|
||
|
+ fi
|
||
|
+ # Put the nasty error message in config.log where it belongs
|
||
|
+ echo "$LOCKD_IDM_PKG_ERRORS" >&5
|
||
|
+
|
||
|
+ $bailout
|
||
|
+elif test $pkg_failed = untried; then
|
||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||
|
+$as_echo "no" >&6; }
|
||
|
+ $bailout
|
||
|
+else
|
||
|
+ LOCKD_IDM_CFLAGS=$pkg_cv_LOCKD_IDM_CFLAGS
|
||
|
+ LOCKD_IDM_LIBS=$pkg_cv_LOCKD_IDM_LIBS
|
||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
|
||
|
+$as_echo "yes" >&6; }
|
||
|
+fi
|
||
|
+
|
||
|
+pkg_failed=no
|
||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BLKID" >&5
|
||
|
+$as_echo_n "checking for BLKID... " >&6; }
|
||
|
+
|
||
|
+if test -n "$BLKID_CFLAGS"; then
|
||
|
+ pkg_cv_BLKID_CFLAGS="$BLKID_CFLAGS"
|
||
|
+ elif test -n "$PKG_CONFIG"; then
|
||
|
+ if test -n "$PKG_CONFIG" && \
|
||
|
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blkid >= 2.24\""; } >&5
|
||
|
+ ($PKG_CONFIG --exists --print-errors "blkid >= 2.24") 2>&5
|
||
|
+ ac_status=$?
|
||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||
|
+ test $ac_status = 0; }; then
|
||
|
+ pkg_cv_BLKID_CFLAGS=`$PKG_CONFIG --cflags "blkid >= 2.24" 2>/dev/null`
|
||
|
+ test "x$?" != "x0" && pkg_failed=yes
|
||
|
+else
|
||
|
+ pkg_failed=yes
|
||
|
+fi
|
||
|
+ else
|
||
|
+ pkg_failed=untried
|
||
|
+fi
|
||
|
+if test -n "$BLKID_LIBS"; then
|
||
|
+ pkg_cv_BLKID_LIBS="$BLKID_LIBS"
|
||
|
+ elif test -n "$PKG_CONFIG"; then
|
||
|
+ if test -n "$PKG_CONFIG" && \
|
||
|
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blkid >= 2.24\""; } >&5
|
||
|
+ ($PKG_CONFIG --exists --print-errors "blkid >= 2.24") 2>&5
|
||
|
+ ac_status=$?
|
||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||
|
+ test $ac_status = 0; }; then
|
||
|
+ pkg_cv_BLKID_LIBS=`$PKG_CONFIG --libs "blkid >= 2.24" 2>/dev/null`
|
||
|
+ test "x$?" != "x0" && pkg_failed=yes
|
||
|
+else
|
||
|
+ pkg_failed=yes
|
||
|
+fi
|
||
|
+ else
|
||
|
+ pkg_failed=untried
|
||
|
+fi
|
||
|
+
|
||
|
+
|
||
|
+
|
||
|
+if test $pkg_failed = yes; then
|
||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||
|
+$as_echo "no" >&6; }
|
||
|
+
|
||
|
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
|
||
|
+ _pkg_short_errors_supported=yes
|
||
|
+else
|
||
|
+ _pkg_short_errors_supported=no
|
||
|
+fi
|
||
|
+ if test $_pkg_short_errors_supported = yes; then
|
||
|
+ BLKID_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "blkid >= 2.24" 2>&1`
|
||
|
+ else
|
||
|
+ BLKID_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "blkid >= 2.24" 2>&1`
|
||
|
+ fi
|
||
|
+ # Put the nasty error message in config.log where it belongs
|
||
|
+ echo "$BLKID_PKG_ERRORS" >&5
|
||
|
+
|
||
|
+ $bailout
|
||
|
+elif test $pkg_failed = untried; then
|
||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||
|
+$as_echo "no" >&6; }
|
||
|
+ $bailout
|
||
|
+else
|
||
|
+ BLKID_CFLAGS=$pkg_cv_BLKID_CFLAGS
|
||
|
+ BLKID_LIBS=$pkg_cv_BLKID_LIBS
|
||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
|
||
|
+$as_echo "yes" >&6; }
|
||
|
+ HAVE_LOCKD_IDM=yes
|
||
|
+fi
|
||
|
+
|
||
|
+$as_echo "#define LOCKDIDM_SUPPORT 1" >>confdefs.h
|
||
|
+
|
||
|
+ BUILD_LVMLOCKD=yes
|
||
|
+fi
|
||
|
+
|
||
|
+################################################################################
|
||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lvmlockd" >&5
|
||
|
$as_echo_n "checking whether to build lvmlockd... " >&6; }
|
||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BUILD_LVMLOCKD" >&5
|
||
|
diff --git a/configure.ac b/configure.ac
|
||
|
index 1a49e7fe7e10..40acc49c231c 100644
|
||
|
--- a/configure.ac
|
||
|
+++ b/configure.ac
|
||
|
@@ -41,6 +41,7 @@ case "$host_os" in
|
||
|
LOCKDSANLOCK=no
|
||
|
LOCKDDLM=no
|
||
|
LOCKDDLM_CONTROL=no
|
||
|
+ LOCKDIDM=no
|
||
|
ODIRECT=yes
|
||
|
DM_IOCTLS=yes
|
||
|
SELINUX=yes
|
||
|
@@ -990,6 +991,25 @@ if test "$BUILD_LOCKDDLM_CONTROL" = yes; then
|
||
|
fi
|
||
|
|
||
|
################################################################################
|
||
|
+dnl -- Build lvmlockdidm
|
||
|
+AC_MSG_CHECKING(whether to build lvmlockdidm)
|
||
|
+AC_ARG_ENABLE(lvmlockd-idm,
|
||
|
+ AC_HELP_STRING([--enable-lvmlockd-idm],
|
||
|
+ [enable the LVM lock daemon using idm]),
|
||
|
+ LOCKDIDM=$enableval)
|
||
|
+AC_MSG_RESULT($LOCKDIDM)
|
||
|
+
|
||
|
+BUILD_LOCKDIDM=$LOCKDIDM
|
||
|
+
|
||
|
+dnl -- Look for Seagate IDM libraries
|
||
|
+if test "$BUILD_LOCKDIDM" = yes; then
|
||
|
+ PKG_CHECK_MODULES(LOCKD_IDM, libseagate_ilm >= 0.1.0, [HAVE_LOCKD_IDM=yes], $bailout)
|
||
|
+ PKG_CHECK_MODULES(BLKID, blkid >= 2.24, [HAVE_LOCKD_IDM=yes], $bailout)
|
||
|
+ AC_DEFINE([LOCKDIDM_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd IDM option.])
|
||
|
+ BUILD_LVMLOCKD=yes
|
||
|
+fi
|
||
|
+
|
||
|
+################################################################################
|
||
|
dnl -- Build lvmlockd
|
||
|
AC_MSG_CHECKING(whether to build lvmlockd)
|
||
|
AC_MSG_RESULT($BUILD_LVMLOCKD)
|
||
|
diff --git a/daemons/lvmlockd/Makefile.in b/daemons/lvmlockd/Makefile.in
|
||
|
index e69ab91273b1..91beb1ad8ef3 100644
|
||
|
--- a/daemons/lvmlockd/Makefile.in
|
||
|
+++ b/daemons/lvmlockd/Makefile.in
|
||
|
@@ -30,6 +30,11 @@ ifeq ("@BUILD_LOCKDDLM@", "yes")
|
||
|
LOCK_LIBS += -ldlmcontrol
|
||
|
endif
|
||
|
|
||
|
+ifeq ("@BUILD_LOCKDIDM@", "yes")
|
||
|
+ SOURCES += lvmlockd-idm.c
|
||
|
+ LOCK_LIBS += -lseagate_ilm -lblkid
|
||
|
+endif
|
||
|
+
|
||
|
SOURCES2 = lvmlockctl.c
|
||
|
|
||
|
TARGETS = lvmlockd lvmlockctl
|
||
|
diff --git a/daemons/lvmlockd/lvmlockd-idm.c b/daemons/lvmlockd/lvmlockd-idm.c
|
||
|
new file mode 100644
|
||
|
index 000000000000..e9f50535c510
|
||
|
--- /dev/null
|
||
|
+++ b/daemons/lvmlockd/lvmlockd-idm.c
|
||
|
@@ -0,0 +1,837 @@
|
||
|
+/*
|
||
|
+ * Copyright (C) 2020-2021 Seagate Ltd.
|
||
|
+ *
|
||
|
+ * This file is part of LVM2.
|
||
|
+ *
|
||
|
+ * This copyrighted material is made available to anyone wishing to use,
|
||
|
+ * modify, copy, or redistribute it subject to the terms and conditions
|
||
|
+ * of the GNU Lesser General Public License v.2.1.
|
||
|
+ */
|
||
|
+
|
||
|
+#define _XOPEN_SOURCE 500 /* pthread */
|
||
|
+#define _ISOC99_SOURCE
|
||
|
+
|
||
|
+#include "tools/tool.h"
|
||
|
+
|
||
|
+#include "daemon-server.h"
|
||
|
+#include "lib/mm/xlate.h"
|
||
|
+
|
||
|
+#include "lvmlockd-internal.h"
|
||
|
+#include "daemons/lvmlockd/lvmlockd-client.h"
|
||
|
+
|
||
|
+#include "ilm.h"
|
||
|
+
|
||
|
+#include <blkid/blkid.h>
|
||
|
+#include <ctype.h>
|
||
|
+#include <dirent.h>
|
||
|
+#include <errno.h>
|
||
|
+#include <poll.h>
|
||
|
+#include <regex.h>
|
||
|
+#include <stddef.h>
|
||
|
+#include <syslog.h>
|
||
|
+#include <sys/sysmacros.h>
|
||
|
+#include <time.h>
|
||
|
+
|
||
|
+#define IDM_TIMEOUT 60000 /* unit: millisecond, 60 seconds */
|
||
|
+
|
||
|
+/*
|
||
|
+ * Each lockspace thread has its own In-Drive Mutex (IDM) lock manager's
|
||
|
+ * connection. After established socket connection, the lockspace has
|
||
|
+ * been created in IDM lock manager and afterwards use the socket file
|
||
|
+ * descriptor to send any requests for lock related operations.
|
||
|
+ */
|
||
|
+
|
||
|
+struct lm_idm {
|
||
|
+ int sock; /* IDM lock manager connection */
|
||
|
+};
|
||
|
+
|
||
|
+struct rd_idm {
|
||
|
+ struct idm_lock_id id;
|
||
|
+ struct idm_lock_op op;
|
||
|
+ uint64_t vb_timestamp;
|
||
|
+ struct val_blk *vb;
|
||
|
+};
|
||
|
+
|
||
|
+int lm_data_size_idm(void)
|
||
|
+{
|
||
|
+ return sizeof(struct rd_idm);
|
||
|
+}
|
||
|
+
|
||
|
+static uint64_t read_utc_us(void)
|
||
|
+{
|
||
|
+ struct timespec cur_time;
|
||
|
+
|
||
|
+ clock_gettime(CLOCK_REALTIME, &cur_time);
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Convert to microseconds unit. IDM reserves the MSB in 8 bytes
|
||
|
+ * and the low 56 bits are used for timestamp; 56 bits can support
|
||
|
+ * calendar year to 2284, so it has 260 years for overflow. Thus it
|
||
|
+ * is quite safe for overflow issue when wrote this code.
|
||
|
+ */
|
||
|
+ return cur_time.tv_sec * 1000000 + cur_time.tv_nsec / 1000;
|
||
|
+}
|
||
|
+
|
||
|
+static int uuid_read_format(char *uuid_str, const char *buffer)
|
||
|
+{
|
||
|
+ int out = 0;
|
||
|
+
|
||
|
+ /* just strip out any dashes */
|
||
|
+ while (*buffer) {
|
||
|
+
|
||
|
+ if (*buffer == '-') {
|
||
|
+ buffer++;
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (out >= 32) {
|
||
|
+ log_error("Too many characters to be uuid.");
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ uuid_str[out++] = *buffer;
|
||
|
+ buffer++;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (out != 32) {
|
||
|
+ log_error("Couldn't read uuid: incorrect number of "
|
||
|
+ "characters.");
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+#define SYSFS_ROOT "/sys"
|
||
|
+#define BUS_SCSI_DEVS "/bus/scsi/devices"
|
||
|
+
|
||
|
+static struct idm_lock_op glb_lock_op;
|
||
|
+
|
||
|
+static void lm_idm_free_dir_list(struct dirent **dir_list, int dir_num)
|
||
|
+{
|
||
|
+ int i;
|
||
|
+
|
||
|
+ for (i = 0; i < dir_num; ++i)
|
||
|
+ free(dir_list[i]);
|
||
|
+ free(dir_list);
|
||
|
+}
|
||
|
+
|
||
|
+static int lm_idm_scsi_directory_select(const struct dirent *s)
|
||
|
+{
|
||
|
+ regex_t regex;
|
||
|
+ int ret;
|
||
|
+
|
||
|
+ /* Only select directory with the format x:x:x:x */
|
||
|
+ ret = regcomp(®ex, "^[0-9]+:[0-9]+:[0-9]+:[0-9]+$", REG_EXTENDED);
|
||
|
+ if (ret)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ ret = regexec(®ex, s->d_name, 0, NULL, 0);
|
||
|
+ if (!ret) {
|
||
|
+ regfree(®ex);
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
+
|
||
|
+ regfree(®ex);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int lm_idm_scsi_find_block_dirctory(const char *block_path)
|
||
|
+{
|
||
|
+ struct stat stats;
|
||
|
+
|
||
|
+ if ((stat(block_path, &stats) >= 0) && S_ISDIR(stats.st_mode))
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static int lm_idm_scsi_block_node_select(const struct dirent *s)
|
||
|
+{
|
||
|
+ if (DT_LNK != s->d_type && DT_DIR != s->d_type)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ if (DT_DIR == s->d_type) {
|
||
|
+ /* Skip this directory: '.' and parent: '..' */
|
||
|
+ if (!strcmp(s->d_name, ".") || !strcmp(s->d_name, ".."))
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ return 1;
|
||
|
+}
|
||
|
+
|
||
|
+static int lm_idm_scsi_find_block_node(const char *blk_path, char **blk_dev)
|
||
|
+{
|
||
|
+ struct dirent **dir_list;
|
||
|
+ int dir_num;
|
||
|
+
|
||
|
+ dir_num = scandir(blk_path, &dir_list, lm_idm_scsi_block_node_select, NULL);
|
||
|
+ if (dir_num < 0) {
|
||
|
+ log_error("Cannot find valid directory entry in %s", blk_path);
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Should have only one block name under the path, if the dir_num is
|
||
|
+ * not 1 (e.g. 0 or any number bigger than 1), it must be wrong and
|
||
|
+ * should never happen.
|
||
|
+ */
|
||
|
+ if (dir_num == 1)
|
||
|
+ *blk_dev = strdup(dir_list[0]->d_name);
|
||
|
+ else
|
||
|
+ *blk_dev = NULL;
|
||
|
+
|
||
|
+ lm_idm_free_dir_list(dir_list, dir_num);
|
||
|
+
|
||
|
+ if (!*blk_dev)
|
||
|
+ return -1;
|
||
|
+
|
||
|
+ return dir_num;
|
||
|
+}
|
||
|
+
|
||
|
+static int lm_idm_scsi_search_propeller_partition(char *dev)
|
||
|
+{
|
||
|
+ int i, nparts;
|
||
|
+ blkid_probe pr;
|
||
|
+ blkid_partlist ls;
|
||
|
+ int found = -1;
|
||
|
+
|
||
|
+ pr = blkid_new_probe_from_filename(dev);
|
||
|
+ if (!pr) {
|
||
|
+ log_error("%s: failed to create a new libblkid probe", dev);
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Binary interface */
|
||
|
+ ls = blkid_probe_get_partitions(pr);
|
||
|
+ if (!ls) {
|
||
|
+ log_error("%s: failed to read partitions", dev);
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* List partitions */
|
||
|
+ nparts = blkid_partlist_numof_partitions(ls);
|
||
|
+ if (!nparts)
|
||
|
+ goto done;
|
||
|
+
|
||
|
+ for (i = 0; i < nparts; i++) {
|
||
|
+ const char *p;
|
||
|
+ blkid_partition par = blkid_partlist_get_partition(ls, i);
|
||
|
+
|
||
|
+ p = blkid_partition_get_name(par);
|
||
|
+ if (p) {
|
||
|
+ log_debug("partition name='%s'", p);
|
||
|
+
|
||
|
+ if (!strcmp(p, "propeller"))
|
||
|
+ found = blkid_partition_get_partno(par);
|
||
|
+ }
|
||
|
+
|
||
|
+ if (found >= 0)
|
||
|
+ break;
|
||
|
+ }
|
||
|
+
|
||
|
+done:
|
||
|
+ blkid_free_probe(pr);
|
||
|
+ return found;
|
||
|
+}
|
||
|
+
|
||
|
+static char *lm_idm_scsi_get_block_device_node(const char *scsi_path)
|
||
|
+{
|
||
|
+ char *blk_path = NULL;
|
||
|
+ char *blk_dev = NULL;
|
||
|
+ char *dev_node = NULL;
|
||
|
+ int ret;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Locate the "block" directory, such like:
|
||
|
+ * /sys/bus/scsi/devices/1:0:0:0/block
|
||
|
+ */
|
||
|
+ ret = asprintf(&blk_path, "%s/%s", scsi_path, "block");
|
||
|
+ if (ret < 0) {
|
||
|
+ log_error("Fail to allocate block path for %s", scsi_path);
|
||
|
+ goto fail;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = lm_idm_scsi_find_block_dirctory(blk_path);
|
||
|
+ if (ret < 0) {
|
||
|
+ log_error("Fail to find block path %s", blk_path);
|
||
|
+ goto fail;
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Locate the block device name, such like:
|
||
|
+ * /sys/bus/scsi/devices/1:0:0:0/block/sdb
|
||
|
+ *
|
||
|
+ * After return from this function and if it makes success,
|
||
|
+ * the global variable "blk_dev" points to the block device
|
||
|
+ * name, in this example it points to string "sdb".
|
||
|
+ */
|
||
|
+ ret = lm_idm_scsi_find_block_node(blk_path, &blk_dev);
|
||
|
+ if (ret < 0) {
|
||
|
+ log_error("Fail to find block node");
|
||
|
+ goto fail;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = asprintf(&dev_node, "/dev/%s", blk_dev);
|
||
|
+ if (ret < 0) {
|
||
|
+ log_error("Fail to allocate memory for blk node path");
|
||
|
+ goto fail;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = lm_idm_scsi_search_propeller_partition(dev_node);
|
||
|
+ if (ret < 0)
|
||
|
+ goto fail;
|
||
|
+
|
||
|
+ free(blk_path);
|
||
|
+ free(blk_dev);
|
||
|
+ return dev_node;
|
||
|
+
|
||
|
+fail:
|
||
|
+ free(blk_path);
|
||
|
+ free(blk_dev);
|
||
|
+ free(dev_node);
|
||
|
+ return NULL;
|
||
|
+}
|
||
|
+
|
||
|
+static int lm_idm_get_gl_lock_pv_list(void)
|
||
|
+{
|
||
|
+ struct dirent **dir_list;
|
||
|
+ char scsi_bus_path[PATH_MAX];
|
||
|
+ char *drive_path;
|
||
|
+ int i, dir_num, ret;
|
||
|
+
|
||
|
+ if (glb_lock_op.drive_num)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ snprintf(scsi_bus_path, sizeof(scsi_bus_path), "%s%s",
|
||
|
+ SYSFS_ROOT, BUS_SCSI_DEVS);
|
||
|
+
|
||
|
+ dir_num = scandir(scsi_bus_path, &dir_list,
|
||
|
+ lm_idm_scsi_directory_select, NULL);
|
||
|
+ if (dir_num < 0) { /* scsi mid level may not be loaded */
|
||
|
+ log_error("Attached devices: none");
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ for (i = 0; i < dir_num; i++) {
|
||
|
+ char *scsi_path;
|
||
|
+
|
||
|
+ ret = asprintf(&scsi_path, "%s/%s", scsi_bus_path,
|
||
|
+ dir_list[i]->d_name);
|
||
|
+ if (ret < 0) {
|
||
|
+ log_error("Fail to allocate memory for scsi directory");
|
||
|
+ goto failed;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (glb_lock_op.drive_num >= ILM_DRIVE_MAX_NUM) {
|
||
|
+ log_error("Global lock: drive number %d exceeds limitation (%d) ?!",
|
||
|
+ glb_lock_op.drive_num, ILM_DRIVE_MAX_NUM);
|
||
|
+ free(scsi_path);
|
||
|
+ goto failed;
|
||
|
+ }
|
||
|
+
|
||
|
+ drive_path = lm_idm_scsi_get_block_device_node(scsi_path);
|
||
|
+ if (!drive_path) {
|
||
|
+ free(scsi_path);
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+
|
||
|
+ glb_lock_op.drives[glb_lock_op.drive_num] = drive_path;
|
||
|
+ glb_lock_op.drive_num++;
|
||
|
+
|
||
|
+ free(scsi_path);
|
||
|
+ }
|
||
|
+
|
||
|
+ lm_idm_free_dir_list(dir_list, dir_num);
|
||
|
+ return 0;
|
||
|
+
|
||
|
+failed:
|
||
|
+ lm_idm_free_dir_list(dir_list, dir_num);
|
||
|
+
|
||
|
+ for (i = 0; i < glb_lock_op.drive_num; i++) {
|
||
|
+ if (glb_lock_op.drives[i]) {
|
||
|
+ free(glb_lock_op.drives[i]);
|
||
|
+ glb_lock_op.drives[i] = NULL;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static void lm_idm_update_vb_timestamp(uint64_t *vb_timestamp)
|
||
|
+{
|
||
|
+ uint64_t utc_us = read_utc_us();
|
||
|
+
|
||
|
+ /*
|
||
|
+ * It's possible that the multiple nodes have no clock
|
||
|
+ * synchronization with microsecond prcision and the time
|
||
|
+ * is going backward. For this case, simply increment the
|
||
|
+ * existing timestamp and write out to drive.
|
||
|
+ */
|
||
|
+ if (*vb_timestamp >= utc_us)
|
||
|
+ (*vb_timestamp)++;
|
||
|
+ else
|
||
|
+ *vb_timestamp = utc_us;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_prepare_lockspace_idm(struct lockspace *ls)
|
||
|
+{
|
||
|
+ struct lm_idm *lm = NULL;
|
||
|
+
|
||
|
+ lm = malloc(sizeof(struct lm_idm));
|
||
|
+ if (!lm) {
|
||
|
+ log_error("S %s prepare_lockspace_idm fail to allocate lm_idm for %s",
|
||
|
+ ls->name, ls->vg_name);
|
||
|
+ return -ENOMEM;
|
||
|
+ }
|
||
|
+ memset(lm, 0x0, sizeof(struct lm_idm));
|
||
|
+
|
||
|
+ ls->lm_data = lm;
|
||
|
+ log_debug("S %s prepare_lockspace_idm done", ls->name);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_add_lockspace_idm(struct lockspace *ls, int adopt)
|
||
|
+{
|
||
|
+ char killpath[IDM_FAILURE_PATH_LEN];
|
||
|
+ char killargs[IDM_FAILURE_ARGS_LEN];
|
||
|
+ struct lm_idm *lmi = (struct lm_idm *)ls->lm_data;
|
||
|
+ int rv;
|
||
|
+
|
||
|
+ if (daemon_test)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ if (!strcmp(ls->name, S_NAME_GL_IDM)) {
|
||
|
+ /*
|
||
|
+ * Prepare the pv list for global lock, if the drive contains
|
||
|
+ * "propeller" partition, then this drive will be considered
|
||
|
+ * as a member of pv list.
|
||
|
+ */
|
||
|
+ rv = lm_idm_get_gl_lock_pv_list();
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("S %s add_lockspace_idm fail to get pv list for glb lock",
|
||
|
+ ls->name);
|
||
|
+ return -EIO;
|
||
|
+ } else {
|
||
|
+ log_error("S %s add_lockspace_idm get pv list for glb lock",
|
||
|
+ ls->name);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Construct the execution path for command "lvmlockctl" by using the
|
||
|
+ * path to the lvm binary and appending "lockctl".
|
||
|
+ */
|
||
|
+ memset(killpath, 0, sizeof(killpath));
|
||
|
+ snprintf(killpath, IDM_FAILURE_PATH_LEN, "%slockctl", LVM_PATH);
|
||
|
+
|
||
|
+ /* Pass the argument "--kill vg_name" for killpath */
|
||
|
+ memset(killargs, 0, sizeof(killargs));
|
||
|
+ snprintf(killargs, IDM_FAILURE_ARGS_LEN, "--kill %s", ls->vg_name);
|
||
|
+
|
||
|
+ /* Connect with IDM lock manager per every lockspace. */
|
||
|
+ rv = ilm_connect(&lmi->sock);
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("S %s add_lockspace_idm fail to connect the lock manager %d",
|
||
|
+ ls->name, lmi->sock);
|
||
|
+ lmi->sock = 0;
|
||
|
+ rv = -EMANAGER;
|
||
|
+ goto fail;
|
||
|
+ }
|
||
|
+
|
||
|
+ rv = ilm_set_killpath(lmi->sock, killpath, killargs);
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("S %s add_lockspace_idm fail to set kill path %d",
|
||
|
+ ls->name, rv);
|
||
|
+ rv = -EMANAGER;
|
||
|
+ goto fail;
|
||
|
+ }
|
||
|
+
|
||
|
+ log_debug("S %s add_lockspace_idm kill path is: \"%s %s\"",
|
||
|
+ ls->name, killpath, killargs);
|
||
|
+
|
||
|
+ log_debug("S %s add_lockspace_idm done", ls->name);
|
||
|
+ return 0;
|
||
|
+
|
||
|
+fail:
|
||
|
+ if (lmi && lmi->sock)
|
||
|
+ close(lmi->sock);
|
||
|
+ if (lmi)
|
||
|
+ free(lmi);
|
||
|
+ return rv;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_rem_lockspace_idm(struct lockspace *ls, int free_vg)
|
||
|
+{
|
||
|
+ struct lm_idm *lmi = (struct lm_idm *)ls->lm_data;
|
||
|
+ int i, rv = 0;
|
||
|
+
|
||
|
+ if (daemon_test)
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ rv = ilm_disconnect(lmi->sock);
|
||
|
+ if (rv < 0)
|
||
|
+ log_error("S %s rem_lockspace_idm error %d", ls->name, rv);
|
||
|
+
|
||
|
+ /* Release pv list for global lock */
|
||
|
+ if (!strcmp(ls->name, "lvm_global")) {
|
||
|
+ for (i = 0; i < glb_lock_op.drive_num; i++) {
|
||
|
+ if (glb_lock_op.drives[i]) {
|
||
|
+ free(glb_lock_op.drives[i]);
|
||
|
+ glb_lock_op.drives[i] = NULL;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+out:
|
||
|
+ free(lmi);
|
||
|
+ ls->lm_data = NULL;
|
||
|
+ return rv;
|
||
|
+}
|
||
|
+
|
||
|
+static int lm_add_resource_idm(struct lockspace *ls, struct resource *r)
|
||
|
+{
|
||
|
+ struct rd_idm *rdi = (struct rd_idm *)r->lm_data;
|
||
|
+
|
||
|
+ if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
|
||
|
+ rdi->vb = zalloc(sizeof(struct val_blk));
|
||
|
+ if (!rdi->vb)
|
||
|
+ return -ENOMEM;
|
||
|
+ }
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_rem_resource_idm(struct lockspace *ls, struct resource *r)
|
||
|
+{
|
||
|
+ struct rd_idm *rdi = (struct rd_idm *)r->lm_data;
|
||
|
+
|
||
|
+ if (rdi->vb)
|
||
|
+ free(rdi->vb);
|
||
|
+
|
||
|
+ memset(rdi, 0, sizeof(struct rd_idm));
|
||
|
+ r->lm_init = 0;
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int to_idm_mode(int ld_mode)
|
||
|
+{
|
||
|
+ switch (ld_mode) {
|
||
|
+ case LD_LK_EX:
|
||
|
+ return IDM_MODE_EXCLUSIVE;
|
||
|
+ case LD_LK_SH:
|
||
|
+ return IDM_MODE_SHAREABLE;
|
||
|
+ default:
|
||
|
+ break;
|
||
|
+ };
|
||
|
+
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_lock_idm(struct lockspace *ls, struct resource *r, int ld_mode,
|
||
|
+ struct val_blk *vb_out, char *lv_uuid, struct pvs *pvs,
|
||
|
+ int adopt)
|
||
|
+{
|
||
|
+ struct lm_idm *lmi = (struct lm_idm *)ls->lm_data;
|
||
|
+ struct rd_idm *rdi = (struct rd_idm *)r->lm_data;
|
||
|
+ char **drive_path = NULL;
|
||
|
+ uint64_t timestamp;
|
||
|
+ int reset_vb = 0;
|
||
|
+ int rv, i;
|
||
|
+
|
||
|
+ if (!r->lm_init) {
|
||
|
+ rv = lm_add_resource_idm(ls, r);
|
||
|
+ if (rv < 0)
|
||
|
+ return rv;
|
||
|
+ r->lm_init = 1;
|
||
|
+ }
|
||
|
+
|
||
|
+ rdi->op.mode = to_idm_mode(ld_mode);
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("lock_idm invalid mode %d", ld_mode);
|
||
|
+ return -EINVAL;
|
||
|
+ }
|
||
|
+
|
||
|
+ log_debug("S %s R %s lock_idm", ls->name, r->name);
|
||
|
+
|
||
|
+ if (daemon_test) {
|
||
|
+ if (rdi->vb) {
|
||
|
+ vb_out->version = le16_to_cpu(rdi->vb->version);
|
||
|
+ vb_out->flags = le16_to_cpu(rdi->vb->flags);
|
||
|
+ vb_out->r_version = le32_to_cpu(rdi->vb->r_version);
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ rdi->op.timeout = IDM_TIMEOUT;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Generate the UUID string, for RT_VG, it only needs to generate
|
||
|
+ * UUID string for VG level, for RT_LV, it needs to generate
|
||
|
+ * UUID strings for both VG and LV levels. At the end, these IDs
|
||
|
+ * are used as identifier for IDM in drive firmware.
|
||
|
+ */
|
||
|
+ if (r->type == LD_RT_VG || r->type == LD_RT_LV)
|
||
|
+ log_debug("S %s R %s VG uuid %s", ls->name, r->name, ls->vg_uuid);
|
||
|
+ if (r->type == LD_RT_LV)
|
||
|
+ log_debug("S %s R %s LV uuid %s", ls->name, r->name, lv_uuid);
|
||
|
+
|
||
|
+ memset(&rdi->id, 0x0, sizeof(struct idm_lock_id));
|
||
|
+ if (r->type == LD_RT_VG) {
|
||
|
+ uuid_read_format(rdi->id.vg_uuid, ls->vg_uuid);
|
||
|
+ } else if (r->type == LD_RT_LV) {
|
||
|
+ uuid_read_format(rdi->id.vg_uuid, ls->vg_uuid);
|
||
|
+ uuid_read_format(rdi->id.lv_uuid, lv_uuid);
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Establish the drive path list for lock, since different lock type
|
||
|
+ * has different drive list; the GL lock uses the global pv list,
|
||
|
+ * the VG lock uses the pv list spanned for the whole volume group,
|
||
|
+ * the LV lock uses the pv list for the logical volume.
|
||
|
+ */
|
||
|
+ switch (r->type) {
|
||
|
+ case LD_RT_GL:
|
||
|
+ drive_path = glb_lock_op.drives;
|
||
|
+ rdi->op.drive_num = glb_lock_op.drive_num;
|
||
|
+ break;
|
||
|
+ case LD_RT_VG:
|
||
|
+ drive_path = (char **)ls->pvs.path;
|
||
|
+ rdi->op.drive_num = ls->pvs.num;
|
||
|
+ break;
|
||
|
+ case LD_RT_LV:
|
||
|
+ drive_path = (char **)pvs->path;
|
||
|
+ rdi->op.drive_num = pvs->num;
|
||
|
+ break;
|
||
|
+ default:
|
||
|
+ break;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!drive_path) {
|
||
|
+ log_error("S %s R %s cannot find the valid drive path array",
|
||
|
+ ls->name, r->name);
|
||
|
+ return -EINVAL;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (rdi->op.drive_num >= ILM_DRIVE_MAX_NUM) {
|
||
|
+ log_error("S %s R %s exceeds limitation for drive path array",
|
||
|
+ ls->name, r->name);
|
||
|
+ return -EINVAL;
|
||
|
+ }
|
||
|
+
|
||
|
+ for (i = 0; i < rdi->op.drive_num; i++)
|
||
|
+ rdi->op.drives[i] = drive_path[i];
|
||
|
+
|
||
|
+ log_debug("S %s R %s mode %d drive_num %d timeout %d",
|
||
|
+ ls->name, r->name, rdi->op.mode,
|
||
|
+ rdi->op.drive_num, rdi->op.timeout);
|
||
|
+
|
||
|
+ for (i = 0; i < rdi->op.drive_num; i++)
|
||
|
+ log_debug("S %s R %s drive path[%d] %s",
|
||
|
+ ls->name, r->name, i, rdi->op.drives[i]);
|
||
|
+
|
||
|
+ rv = ilm_lock(lmi->sock, &rdi->id, &rdi->op);
|
||
|
+ if (rv < 0) {
|
||
|
+ log_debug("S %s R %s lock_idm acquire mode %d rv %d",
|
||
|
+ ls->name, r->name, ld_mode, rv);
|
||
|
+ return -ELOCKIO;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (rdi->vb) {
|
||
|
+ rv = ilm_read_lvb(lmi->sock, &rdi->id, (char *)×tamp,
|
||
|
+ sizeof(uint64_t));
|
||
|
+
|
||
|
+ /*
|
||
|
+ * If fail to read value block, which might be caused by drive
|
||
|
+ * failure, notify up layer to invalidate metadata.
|
||
|
+ */
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("S %s R %s lock_idm get_lvb error %d",
|
||
|
+ ls->name, r->name, rv);
|
||
|
+ reset_vb = 1;
|
||
|
+
|
||
|
+ /* Reset timestamp */
|
||
|
+ rdi->vb_timestamp = 0;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * If the cached timestamp mismatches with the stored value
|
||
|
+ * in the IDM, this means another host has updated timestamp
|
||
|
+ * for the new VB. Let's reset VB and notify up layer to
|
||
|
+ * invalidate metadata.
|
||
|
+ */
|
||
|
+ } else if (rdi->vb_timestamp != timestamp) {
|
||
|
+ log_debug("S %s R %s lock_idm get lvb timestamp %lu:%lu",
|
||
|
+ ls->name, r->name, rdi->vb_timestamp,
|
||
|
+ timestamp);
|
||
|
+
|
||
|
+ rdi->vb_timestamp = timestamp;
|
||
|
+ reset_vb = 1;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (reset_vb == 1) {
|
||
|
+ memset(rdi->vb, 0, sizeof(struct val_blk));
|
||
|
+ memset(vb_out, 0, sizeof(struct val_blk));
|
||
|
+
|
||
|
+ /*
|
||
|
+ * The lock is still acquired, but the vb values has
|
||
|
+ * been invalidated.
|
||
|
+ */
|
||
|
+ rv = 0;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Otherwise, copy the cached VB to up layer */
|
||
|
+ memcpy(vb_out, rdi->vb, sizeof(struct val_blk));
|
||
|
+ }
|
||
|
+
|
||
|
+out:
|
||
|
+ return rv;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_convert_idm(struct lockspace *ls, struct resource *r,
|
||
|
+ int ld_mode, uint32_t r_version)
|
||
|
+{
|
||
|
+ struct lm_idm *lmi = (struct lm_idm *)ls->lm_data;
|
||
|
+ struct rd_idm *rdi = (struct rd_idm *)r->lm_data;
|
||
|
+ int mode, rv;
|
||
|
+
|
||
|
+ if (rdi->vb && r_version && (r->mode == LD_LK_EX)) {
|
||
|
+ if (!rdi->vb->version) {
|
||
|
+ /* first time vb has been written */
|
||
|
+ rdi->vb->version = VAL_BLK_VERSION;
|
||
|
+ }
|
||
|
+ rdi->vb->r_version = r_version;
|
||
|
+
|
||
|
+ log_debug("S %s R %s convert_idm set r_version %u",
|
||
|
+ ls->name, r->name, r_version);
|
||
|
+
|
||
|
+ lm_idm_update_vb_timestamp(&rdi->vb_timestamp);
|
||
|
+ log_debug("S %s R %s convert_idm vb %x %x %u timestamp %lu",
|
||
|
+ ls->name, r->name, rdi->vb->version, rdi->vb->flags,
|
||
|
+ rdi->vb->r_version, rdi->vb_timestamp);
|
||
|
+ }
|
||
|
+
|
||
|
+ mode = to_idm_mode(ld_mode);
|
||
|
+ if (mode < 0) {
|
||
|
+ log_error("S %s R %s convert_idm invalid mode %d",
|
||
|
+ ls->name, r->name, ld_mode);
|
||
|
+ return -EINVAL;
|
||
|
+ }
|
||
|
+
|
||
|
+ log_debug("S %s R %s convert_idm", ls->name, r->name);
|
||
|
+
|
||
|
+ if (daemon_test)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ if (rdi->vb && r_version && (r->mode == LD_LK_EX)) {
|
||
|
+ rv = ilm_write_lvb(lmi->sock, &rdi->id,
|
||
|
+ (char *)rdi->vb_timestamp, sizeof(uint64_t));
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("S %s R %s convert_idm write lvb error %d",
|
||
|
+ ls->name, r->name, rv);
|
||
|
+ return -ELMERR;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ rv = ilm_convert(lmi->sock, &rdi->id, mode);
|
||
|
+ if (rv < 0)
|
||
|
+ log_error("S %s R %s convert_idm convert error %d",
|
||
|
+ ls->name, r->name, rv);
|
||
|
+
|
||
|
+ return rv;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_unlock_idm(struct lockspace *ls, struct resource *r,
|
||
|
+ uint32_t r_version, uint32_t lmu_flags)
|
||
|
+{
|
||
|
+ struct lm_idm *lmi = (struct lm_idm *)ls->lm_data;
|
||
|
+ struct rd_idm *rdi = (struct rd_idm *)r->lm_data;
|
||
|
+ int rv;
|
||
|
+
|
||
|
+ if (rdi->vb && r_version && (r->mode == LD_LK_EX)) {
|
||
|
+ if (!rdi->vb->version) {
|
||
|
+ /* first time vb has been written */
|
||
|
+ rdi->vb->version = VAL_BLK_VERSION;
|
||
|
+ }
|
||
|
+ if (r_version)
|
||
|
+ rdi->vb->r_version = r_version;
|
||
|
+
|
||
|
+ lm_idm_update_vb_timestamp(&rdi->vb_timestamp);
|
||
|
+ log_debug("S %s R %s unlock_idm vb %x %x %u timestamp %lu",
|
||
|
+ ls->name, r->name, rdi->vb->version, rdi->vb->flags,
|
||
|
+ rdi->vb->r_version, rdi->vb_timestamp);
|
||
|
+ }
|
||
|
+
|
||
|
+ log_debug("S %s R %s unlock_idm", ls->name, r->name);
|
||
|
+
|
||
|
+ if (daemon_test)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ if (rdi->vb && r_version && (r->mode == LD_LK_EX)) {
|
||
|
+ rv = ilm_write_lvb(lmi->sock, &rdi->id,
|
||
|
+ (char *)&rdi->vb_timestamp, sizeof(uint64_t));
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("S %s R %s unlock_idm set_lvb error %d",
|
||
|
+ ls->name, r->name, rv);
|
||
|
+ return -ELMERR;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ rv = ilm_unlock(lmi->sock, &rdi->id);
|
||
|
+ if (rv < 0)
|
||
|
+ log_error("S %s R %s unlock_idm error %d", ls->name, r->name, rv);
|
||
|
+
|
||
|
+ return rv;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_hosts_idm(struct lockspace *ls, int notify)
|
||
|
+{
|
||
|
+ struct resource *r;
|
||
|
+ struct lm_idm *lmi = (struct lm_idm *)ls->lm_data;
|
||
|
+ struct rd_idm *rdi;
|
||
|
+ int count, self, found_others = 0;
|
||
|
+ int rv;
|
||
|
+
|
||
|
+ list_for_each_entry(r, &ls->resources, list) {
|
||
|
+ if (!r->lm_init)
|
||
|
+ continue;
|
||
|
+
|
||
|
+ rdi = (struct rd_idm *)r->lm_data;
|
||
|
+
|
||
|
+ rv = ilm_get_host_count(lmi->sock, &rdi->id, &rdi->op,
|
||
|
+ &count, &self);
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("S %s lm_hosts_idm error %d", ls->name, rv);
|
||
|
+ return rv;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Fixup: need to reduce self count */
|
||
|
+ if (count > found_others)
|
||
|
+ found_others = count;
|
||
|
+ }
|
||
|
+
|
||
|
+ return found_others;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_get_lockspaces_idm(struct list_head *ls_rejoin)
|
||
|
+{
|
||
|
+ /* TODO: Need to add support for adoption. */
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+int lm_is_running_idm(void)
|
||
|
+{
|
||
|
+ int sock, rv;
|
||
|
+
|
||
|
+ if (daemon_test)
|
||
|
+ return gl_use_idm;
|
||
|
+
|
||
|
+ rv = ilm_connect(&sock);
|
||
|
+ if (rv < 0) {
|
||
|
+ log_error("Fail to connect seagate IDM lock manager %d", rv);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ ilm_disconnect(sock);
|
||
|
+ return 1;
|
||
|
+}
|
||
|
diff --git a/daemons/lvmlockd/lvmlockd-internal.h b/daemons/lvmlockd/lvmlockd-internal.h
|
||
|
index 14bdfeed04a0..06bf07eb59cf 100644
|
||
|
--- a/daemons/lvmlockd/lvmlockd-internal.h
|
||
|
+++ b/daemons/lvmlockd/lvmlockd-internal.h
|
||
|
@@ -20,6 +20,7 @@
|
||
|
#define R_NAME_GL "GLLK"
|
||
|
#define R_NAME_VG "VGLK"
|
||
|
#define S_NAME_GL_DLM "lvm_global"
|
||
|
+#define S_NAME_GL_IDM "lvm_global"
|
||
|
#define LVM_LS_PREFIX "lvm_" /* ls name is prefix + vg_name */
|
||
|
/* global lockspace name for sanlock is a vg name */
|
||
|
|
||
|
@@ -29,6 +30,7 @@ enum {
|
||
|
LD_LM_UNUSED = 1, /* place holder so values match lib/locking/lvmlockd.h */
|
||
|
LD_LM_DLM = 2,
|
||
|
LD_LM_SANLOCK = 3,
|
||
|
+ LD_LM_IDM = 4,
|
||
|
};
|
||
|
|
||
|
/* operation types */
|
||
|
@@ -118,6 +120,11 @@ struct client {
|
||
|
*/
|
||
|
#define DEFAULT_MAX_RETRIES 4
|
||
|
|
||
|
+struct pvs {
|
||
|
+ const char **path;
|
||
|
+ int num;
|
||
|
+};
|
||
|
+
|
||
|
struct action {
|
||
|
struct list_head list;
|
||
|
uint32_t client_id;
|
||
|
@@ -140,6 +147,7 @@ struct action {
|
||
|
char vg_args[MAX_ARGS+1];
|
||
|
char lv_args[MAX_ARGS+1];
|
||
|
char vg_sysid[MAX_NAME+1];
|
||
|
+ struct pvs pvs; /* PV list for idm */
|
||
|
};
|
||
|
|
||
|
struct resource {
|
||
|
@@ -184,6 +192,7 @@ struct lockspace {
|
||
|
uint64_t free_lock_offset; /* for sanlock, start search for free lock here */
|
||
|
int free_lock_sector_size; /* for sanlock */
|
||
|
int free_lock_align_size; /* for sanlock */
|
||
|
+ struct pvs pvs; /* for idm: PV list */
|
||
|
|
||
|
uint32_t start_client_id; /* client_id that started the lockspace */
|
||
|
pthread_t thread; /* makes synchronous lock requests */
|
||
|
@@ -325,6 +334,7 @@ static inline int list_empty(const struct list_head *head)
|
||
|
EXTERN int gl_type_static;
|
||
|
EXTERN int gl_use_dlm;
|
||
|
EXTERN int gl_use_sanlock;
|
||
|
+EXTERN int gl_use_idm;
|
||
|
EXTERN int gl_vg_removed;
|
||
|
EXTERN char gl_lsname_dlm[MAX_NAME+1];
|
||
|
EXTERN char gl_lsname_sanlock[MAX_NAME+1];
|
||
|
@@ -619,4 +629,102 @@ static inline int lm_support_sanlock(void)
|
||
|
|
||
|
#endif /* sanlock support */
|
||
|
|
||
|
+#ifdef LOCKDIDM_SUPPORT
|
||
|
+
|
||
|
+int lm_data_size_idm(void);
|
||
|
+int lm_init_vg_idm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
|
||
|
+int lm_prepare_lockspace_idm(struct lockspace *ls);
|
||
|
+int lm_add_lockspace_idm(struct lockspace *ls, int adopt);
|
||
|
+int lm_rem_lockspace_idm(struct lockspace *ls, int free_vg);
|
||
|
+int lm_lock_idm(struct lockspace *ls, struct resource *r, int ld_mode,
|
||
|
+ struct val_blk *vb_out, char *lv_uuid, struct pvs *pvs,
|
||
|
+ int adopt);
|
||
|
+int lm_convert_idm(struct lockspace *ls, struct resource *r,
|
||
|
+ int ld_mode, uint32_t r_version);
|
||
|
+int lm_unlock_idm(struct lockspace *ls, struct resource *r,
|
||
|
+ uint32_t r_version, uint32_t lmu_flags);
|
||
|
+int lm_hosts_idm(struct lockspace *ls, int notify);
|
||
|
+int lm_get_lockspaces_idm(struct list_head *ls_rejoin);
|
||
|
+int lm_is_running_idm(void);
|
||
|
+int lm_rem_resource_idm(struct lockspace *ls, struct resource *r);
|
||
|
+
|
||
|
+static inline int lm_support_idm(void)
|
||
|
+{
|
||
|
+ return 1;
|
||
|
+}
|
||
|
+
|
||
|
+#else
|
||
|
+
|
||
|
+static inline int lm_data_size_idm(void)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_init_vg_idm(char *ls_name, char *vg_name, uint32_t flags,
|
||
|
+ char *vg_args)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_prepare_lockspace_idm(struct lockspace *ls)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_add_lockspace_idm(struct lockspace *ls, int adopt)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_rem_lockspace_idm(struct lockspace *ls, int free_vg)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_lock_idm(struct lockspace *ls, struct resource *r, int ld_mode,
|
||
|
+ struct val_blk *vb_out, char *lv_uuid, struct pvs *pvs,
|
||
|
+ int adopt)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_convert_idm(struct lockspace *ls, struct resource *r,
|
||
|
+ int ld_mode, uint32_t r_version)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_unlock_idm(struct lockspace *ls, struct resource *r,
|
||
|
+ uint32_t r_version, uint32_t lmu_flags)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_hosts_idm(struct lockspace *ls, int notify)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_get_lockspaces_idm(struct list_head *ls_rejoin)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_is_running_idm(void)
|
||
|
+{
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_rem_resource_idm(struct lockspace *ls, struct resource *r)
|
||
|
+{
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int lm_support_idm(void)
|
||
|
+{
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+#endif /* Seagate IDM support */
|
||
|
+
|
||
|
#endif /* _LVM_LVMLOCKD_INTERNAL_H */
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|