SUBDIRS += po
endif
-SUBDIRS += lib tools
+SUBDIRS += lib tools daemons
ifeq ($(MAKECMDGOALS),distclean)
- SUBDIRS += lib/format1 \
+ SUBDIRS += daemons/clvmd \
+ lib/format1 \
lib/format_pool \
+ lib/locking \
lib/mirror \
lib/snapshot \
po \
include make.tmpl
+daemons: lib
lib: include
tools: lib
-po: lib tools
+po: tools daemons
ifeq ("@INTL@", "yes")
lib.pofile: include.pofile
tools.pofile: lib.pofile
-po.pofile: lib.pofile tools.pofile
+daemons.pofile: lib.pofile
+po.pofile: tools.pofile daemons.pofile
pofile: po.pofile
endif
-2.00.17-cvs (2004-06-20)
+2.00.18-cvs (2004-06-24)
+Version 2.00.18 - 24 June 2004
+==============================
+ Add cluster support.
+
Version 2.00.17 - 20 June 2004
==============================
configure --enable-fsadm to try out fsadm. fsadm is not tested yet.
#endif"
ac_default_prefix=/usr
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL FSADM LIBOBJS LTLIBOBJS'
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os MSGFMT JOBS STATIC_LINK LVM1 POOL SNAPSHOTS MIRRORS OWNER GROUP CLDFLAGS CLDWHOLEARCHIVE CLDNOWHOLEARCHIVE LD_DEPS LD_FLAGS SOFLAG LVM_VERSION LVM1_FALLBACK DEBUG DEVMAPPER HAVE_LIBDL HAVE_SELINUX CMDLIB LOCALEDIR CONFDIR STATICDIR INTL_PACKAGE INTL CLVMD CLUSTER FSADM LIBOBJS LTLIBOBJS'
ac_subst_files=''
# Initialize some variables set by options.
TYPE=internal
--with-pool=TYPE GFS pool read-only support: internal/shared/none
TYPE=internal
+ --with-cluster=TYPE Cluster LVM locking support: internal/shared/none
+ TYPE=internal
--with-snapshots=TYPE Snapshot support: internal/shared/none
TYPE=internal
--with-mirrors=TYPE Mirror support: internal/shared/none
TYPE=internal
+ --with-clvmd Build cluster LVM Daemon
--with-localedir=DIR Translation files in DIR PREFIX/share/locale
--with-confdir=DIR Configuration files in DIR /etc
--with-staticdir=DIR Static binary in DIR EXEC_PREFIX/sbin
SOFLAG="-shared"
DEVMAPPER=yes
ODIRECT=yes
+ CLUSTER=internal
FSADM=no ;;
darwin*)
CFLAGS="-no-cpp-precomp -fno-common"
SOFLAG="-dynamiclib"
DEVMAPPER=no
ODIRECT=no
+ CLUSTER=none
FSADM=no ;;
esac
fi
+# Check whether --with-cluster or --without-cluster was given.
+if test "${with_cluster+set}" = set; then
+ withval="$with_cluster"
+ CLUSTER="$withval"
+fi;
+
+if [ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ];
+ then { { echo "$as_me:$LINENO: error: --with-cluster parameter invalid
+" >&5
+echo "$as_me: error: --with-cluster parameter invalid
+" >&2;}
+ { (exit 1); exit 1; }; }
+ exit
+fi;
+
+if test x$CLUSTER = xinternal; then
+ CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
+fi
+
# Check whether --enable-jobs or --disable-jobs was given.
if test "${enable_jobs+set}" = set; then
enableval="$enable_jobs"
CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
fi
+
+# Check whether --with-clvmd or --without-clvmd was given.
+if test "${with_clvmd+set}" = set; then
+ withval="$with_clvmd"
+ \
+CLVMD=$withval
+else
+ CLVMD=no
+fi;
+if test x$CLVMD = xyes && test x$CLUSTER = xnone; then
+ CLUSTER=internal
+fi
+echo "$ac_t""$CLVMD" 1>&6
+
echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
# Check whether --enable-debug or --disable-debug was given.
if test "${enable_debug+set}" = set; then
HAVE_LIBDL=no
fi
-if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
+if [ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
"x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
\) -a "x$STATIC_LINK" = xyes ];
then { { echo "$as_me:$LINENO: error: Features cannot be 'shared' when building statically
- ac_config_files="$ac_config_files Makefile make.tmpl doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
+
+
+ ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile doc/Makefile include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/snapshot/Makefile man/Makefile po/Makefile tools/Makefile tools/version.h tools/fsadm/Makefile test/mm/Makefile test/device/Makefile test/format1/Makefile test/regex/Makefile test/filters/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
# tests run on this system so they can be shared between configure
# Handling of arguments.
"Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"make.tmpl" ) CONFIG_FILES="$CONFIG_FILES make.tmpl" ;;
+ "daemons/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/Makefile" ;;
+ "daemons/clvmd/Makefile" ) CONFIG_FILES="$CONFIG_FILES daemons/clvmd/Makefile" ;;
"doc/Makefile" ) CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
"include/Makefile" ) CONFIG_FILES="$CONFIG_FILES include/Makefile" ;;
"lib/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/Makefile" ;;
"lib/format1/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format1/Makefile" ;;
"lib/format_pool/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/format_pool/Makefile" ;;
+ "lib/locking/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/locking/Makefile" ;;
"lib/mirror/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/mirror/Makefile" ;;
"lib/snapshot/Makefile" ) CONFIG_FILES="$CONFIG_FILES lib/snapshot/Makefile" ;;
"man/Makefile" ) CONFIG_FILES="$CONFIG_FILES man/Makefile" ;;
s,@STATICDIR@,$STATICDIR,;t t
s,@INTL_PACKAGE@,$INTL_PACKAGE,;t t
s,@INTL@,$INTL,;t t
+s,@CLVMD@,$CLVMD,;t t
+s,@CLUSTER@,$CLUSTER,;t t
s,@FSADM@,$FSADM,;t t
s,@LIBOBJS@,$LIBOBJS,;t t
s,@LTLIBOBJS@,$LTLIBOBJS,;t t
SOFLAG="-shared"
DEVMAPPER=yes
ODIRECT=yes
+ CLUSTER=internal
FSADM=no ;;
darwin*)
CFLAGS="-no-cpp-precomp -fno-common"
SOFLAG="-dynamiclib"
DEVMAPPER=no
ODIRECT=no
+ CLUSTER=none
FSADM=no ;;
esac
CFLAGS="$CFLAGS -DPOOL_INTERNAL"
fi
+dnl -- cluster_locking inclusion type
+AC_ARG_WITH(cluster,
+ [ --with-cluster=TYPE Cluster LVM locking support: internal/shared/none
+ [TYPE=internal] ],
+ [ CLUSTER="$withval" ])
+
+if [[ "x$CLUSTER" != xnone -a "x$CLUSTER" != xinternal -a "x$CLUSTER" != xshared ]];
+ then AC_MSG_ERROR(
+--with-cluster parameter invalid
+)
+ exit
+fi;
+
+if test x$CLUSTER = xinternal; then
+ CFLAGS="$CFLAGS -DCLUSTER_LOCKING_INTERNAL"
+fi
AC_ARG_ENABLE(jobs, [ --enable-jobs=NUM Number of jobs to run simultaneously], JOBS=-j$enableval, JOBS=-j2)
CFLAGS="$CFLAGS -DREADLINE_SUPPORT"
fi
+dnl Build cluster LVM daemon
+AC_ARG_WITH(clvmd, [ --with-clvmd Build cluster LVM Daemon], \
+CLVMD=$withval, CLVMD=no)
+dnl If clvmd enabled and not cluster locking, automgically include the locking.
+if test x$CLVMD = xyes && test x$CLUSTER = xnone; then
+ CLUSTER=internal
+fi
+echo "$ac_t""$CLVMD" 1>&6
+
echo $ac_n "checking whether to enable debugging""... $ac_c" 1>&6
dnl Enable Debugging
AC_ARG_ENABLE(debug, [ --enable-debug Enable debugging], \
fi
dnl Check for shared/static conflicts
-if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o \
+if [[ \( "x$LVM1" = xshared -o "x$POOL" = xshared -o "x$CLUSTER" = xshared -o \
"x$SNAPSHOTS" = xshared -o "x$MIRRORS" = xshared \
\) -a "x$STATIC_LINK" = xyes ]];
then AC_MSG_ERROR(
AC_SUBST(STATICDIR)
AC_SUBST(INTL_PACKAGE)
AC_SUBST(INTL)
+AC_SUBST(CLVMD)
+AC_SUBST(CLUSTER)
AC_SUBST(FSADM)
dnl First and last lines should not contain files to generate in order to
AC_OUTPUT( \
Makefile \
make.tmpl \
+daemons/Makefile \
+daemons/clvmd/Makefile \
doc/Makefile \
include/Makefile \
lib/Makefile \
lib/format1/Makefile \
lib/format_pool/Makefile \
+lib/locking/Makefile \
lib/mirror/Makefile \
lib/snapshot/Makefile \
man/Makefile \
--- /dev/null
+#
+# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+ifeq ("@CLVMD@", "yes")
+ SUBDIRS = clvmd
+endif
+
+include $(top_srcdir)/make.tmpl
+
--- /dev/null
+#
+# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+SOURCES = \
+ clvmd-cman.c \
+ clvmd-command.c \
+ clvmd.c \
+ libclvm.c \
+ lvm-functions.c \
+ system-lv.c
+
+TARGETS = \
+ clvmd
+
+include $(top_srcdir)/make.tmpl
+
+CFLAGS += -D_REENTRANT -fno-strict-aliasing
+LIBS += -ldevmapper -ldlm -llvm -lpthread
+
+INSTALL_TARGETS = \
+ install_clvmd
+
+clvmd: $(OBJECTS) $(top_srcdir)/lib/liblvm.a
+ $(CC) -o clvmd $(OBJECTS) $(LD_FLAGS) $(LVMLIBS) $(LIBS)
+
+.PHONY: install_clvmd
+
+install_clvmd: $(TARGETS)
+ $(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) clvmd \
+ $(sbindir)/clvmd
+
+install: $(INSTALL_TARGETS)
+
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Definitions for CLVMD server and clients */
+
+/*
+ * The protocol spoken over the cluster and across the local socket.
+ */
+
+#ifndef _CLVM_H
+#define _CLVM_H
+
+struct clvm_header {
+ uint8_t cmd; /* See below */
+ uint8_t flags; /* See below */
+ uint16_t xid; /* Transaction ID */
+ uint32_t clientid; /* Only used in Daemon->Daemon comms */
+ int32_t status; /* For replies, whether request succeeded */
+ uint32_t arglen; /* Length of argument below.
+ If >1500 then it will be passed
+ around the cluster in the system LV */
+ char node[1]; /* Actually a NUL-terminated string, node name.
+ If this is empty then the command is
+ forwarded to all cluster nodes unless
+ FLAG_LOCAL is also set. */
+ char args[1]; /* Arguments for the command follow the
+ node name, This member is only
+ valid if the node name is empty */
+} __attribute__ ((packed));
+
+/* Flags */
+#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */
+#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */
+
+/* Name of the local socket to communicate between libclvm and clvmd */
+//static const char CLVMD_SOCKNAME[]="/var/run/clvmd";
+static const char CLVMD_SOCKNAME[] = "\0clvmd";
+
+/* Internal commands & replies */
+#define CLVMD_CMD_REPLY 1
+#define CLVMD_CMD_VERSION 2 /* Send version around cluster when we start */
+#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running
+ an incompatible version */
+#define CLVMD_CMD_TEST 4 /* Just for mucking about */
+
+#define CLVMD_CMD_LOCK 30
+#define CLVMD_CMD_UNLOCK 31
+
+/* Lock/Unlock commands */
+#define CLVMD_CMD_LOCK_LV 50
+#define CLVMD_CMD_LOCK_VG 51
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * CMAN communication layer for clvmd.
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "clvmd-comms.h"
+#include "clvm.h"
+#include "libdlm.h"
+#include "log.h"
+#include "clvmd.h"
+#include "lvm-functions.h"
+
+#define LOCKSPACE_NAME "clvmd"
+
+static int cluster_sock;
+static int num_nodes;
+static struct cl_cluster_node *nodes = NULL;
+static int count_nodes; /* size of allocated nodes array */
+static int max_updown_nodes = 50; /* Current size of the allocated array */
+/* Node up/down status, indexed by nodeid */
+static int *node_updown = NULL;
+static dlm_lshandle_t *lockspace;
+
+static void sigusr1_handler(int sig);
+static void count_clvmds_running(void);
+static void get_members(void);
+static int nodeid_from_csid(char *csid);
+static int name_from_nodeid(int nodeid, char *name);
+
+struct lock_wait {
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ struct dlm_lksb lksb;
+};
+
+int init_cluster()
+{
+ struct sockaddr_cl saddr;
+ int port = CLUSTER_PORT_CLVMD;
+
+ /* Open the cluster communication socket */
+ cluster_sock = socket(AF_CLUSTER, SOCK_DGRAM, CLPROTO_CLIENT);
+ if (cluster_sock == -1) {
+ perror("Can't open cluster socket");
+ return -1;
+ }
+
+ /* Bind to our port number on the cluster.
+ Writes to this will block if the cluster loses quorum */
+ saddr.scl_family = AF_CLUSTER;
+ saddr.scl_port = port;
+
+ if (bind
+ (cluster_sock, (struct sockaddr *) &saddr,
+ sizeof(struct sockaddr_cl))) {
+ log_error("Can't bind cluster socket: %m");
+ return -1;
+ }
+
+ /* Get the cluster members list */
+ get_members();
+ count_clvmds_running();
+
+ /* Create a lockspace for LV & VG locks to live in */
+ lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
+ if (!lockspace) {
+ log_error("Unable to create lockspace for CLVM\n");
+ return -1;
+ }
+ dlm_ls_pthread_init(lockspace);
+ return 0;
+}
+
+int get_main_cluster_fd()
+{
+ return cluster_sock;
+}
+
+int get_num_nodes()
+{
+ return num_nodes;
+}
+
+/* send_message with the fd check removed */
+int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext)
+{
+ struct iovec iov[2];
+ struct msghdr msg;
+ struct sockaddr_cl saddr;
+ int len = 0;
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_iov = iov;
+ msg.msg_flags = 0;
+ iov[0].iov_len = msglen;
+ iov[0].iov_base = buf;
+
+ saddr.scl_family = AF_CLUSTER;
+ saddr.scl_port = CLUSTER_PORT_CLVMD;
+ if (csid) {
+ msg.msg_name = &saddr;
+ msg.msg_namelen = sizeof(saddr);
+ memcpy(&saddr.scl_nodeid, csid, MAX_CSID_LEN);
+ } else { /* Cluster broadcast */
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ }
+
+ do {
+ len = sendmsg(cluster_sock, &msg, 0);
+ if (len < 0 && errno != EAGAIN)
+ log_error(errtext);
+
+ } while (len == -1 && errno == EAGAIN);
+ return len;
+}
+
+void get_our_csid(char *csid)
+{
+ int i;
+ memset(csid, 0, MAX_CSID_LEN);
+
+ for (i = 0; i < num_nodes; i++) {
+ if (nodes[i].us)
+ memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
+ }
+}
+
+/* Call a callback routine for each node that known (down mean not running a clvmd) */
+int cluster_do_node_callback(struct local_client *client,
+ void (*callback) (struct local_client *, char *,
+ int))
+{
+ int i;
+ int somedown = 0;
+
+ for (i = 0; i < get_num_nodes(); i++) {
+ callback(client, (char *)&nodes[i].node_id, node_updown[nodes[i].node_id]);
+ if (!node_updown[nodes[i].node_id])
+ somedown = -1;
+ }
+ return somedown;
+}
+
+/* Process OOB message from the cluster socket,
+ this currently just means that a node has stopped listening on our port */
+static void process_oob_msg(char *buf, int len, int nodeid)
+{
+ char namebuf[256];
+ switch (buf[0]) {
+ case CLUSTER_OOB_MSG_PORTCLOSED:
+ name_from_nodeid(nodeid, namebuf);
+ log_notice("clvmd on node %s has died\n", namebuf);
+ DEBUGLOG("Got OOB message, removing node %s\n", namebuf);
+
+ node_updown[nodeid] = 0;
+ break;
+
+ case CLUSTER_OOB_MSG_STATECHANGE:
+ DEBUGLOG("Got OOB message, Cluster state change\n");
+ get_members();
+ break;
+ default:
+ /* ERROR */
+ DEBUGLOG("Got unknown OOB message: %d\n", buf[0]);
+ }
+}
+
+int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid,
+ struct local_client **new_client)
+{
+ struct iovec iov[2];
+ struct msghdr msg;
+ struct sockaddr_cl saddr;
+
+ /* We never return a new client */
+ *new_client = NULL;
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_iov = iov;
+ msg.msg_name = &saddr;
+ msg.msg_flags = 0;
+ msg.msg_namelen = sizeof(saddr);
+ iov[0].iov_len = len;
+ iov[0].iov_base = buf;
+
+ len = recvmsg(cluster_sock, &msg, MSG_OOB | O_NONBLOCK);
+ if (len < 0 && errno == EAGAIN)
+ return len;
+
+ DEBUGLOG("Read on cluster socket, len = %d\n", len);
+
+ /* A real error */
+ if (len < 0) {
+ log_error("read error on cluster socket: %m");
+ return 0;
+ }
+
+ /* EOF - we have left the cluster */
+ if (len == 0)
+ return 0;
+
+ /* Is it OOB? probably a node gone down */
+ if (msg.msg_flags & MSG_OOB) {
+ process_oob_msg(iov[0].iov_base, len, saddr.scl_nodeid);
+
+ /* Tell the upper layer to ignore this message */
+ len = -1;
+ errno = EAGAIN;
+ }
+ memcpy(csid, &saddr.scl_nodeid, sizeof(saddr.scl_nodeid));
+ return len;
+}
+
+void add_up_node(char *csid)
+{
+ /* It's up ! */
+ int nodeid = nodeid_from_csid(csid);
+
+ if (nodeid >= max_updown_nodes) {
+ int *new_updown = realloc(node_updown, max_updown_nodes + 10);
+
+ if (new_updown) {
+ node_updown = new_updown;
+ max_updown_nodes += 10;
+ DEBUGLOG("realloced more space for nodes. now %d\n",
+ max_updown_nodes);
+ } else {
+ log_error
+ ("Realloc failed. Node status for clvmd will be wrong\n");
+ return;
+ }
+ }
+ node_updown[nodeid] = 1;
+ DEBUGLOG("Added new node %d to updown list\n", nodeid);
+}
+
+void cluster_closedown()
+{
+ unlock_all();
+ dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
+ close(cluster_sock);
+}
+
+static int is_listening(int nodeid)
+{
+ struct cl_listen_request rq;
+ int status;
+
+ rq.port = CLUSTER_PORT_CLVMD;
+ rq.nodeid = nodeid;
+
+ do {
+ status = ioctl(cluster_sock, SIOCCLUSTER_ISLISTENING, &rq);
+ if (status < 0 && errno == EBUSY) { /* Don't busywait */
+ sleep(1);
+ errno = EBUSY; /* In case sleep trashes it */
+ }
+ }
+ while (status < 0 && errno == EBUSY);
+
+ return status;
+}
+
+/* Populate the list of CLVMDs running.
+ called only at startup time */
+void count_clvmds_running(void)
+{
+ int i;
+
+ for (i = 0; i < num_nodes; i++) {
+ node_updown[nodes[i].node_id] = is_listening(nodes[i].node_id);
+ }
+}
+
+/* Get a list of active cluster members */
+static void get_members()
+{
+ struct cl_cluster_nodelist nodelist;
+
+ num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, 0);
+ if (num_nodes == -1) {
+ perror("get nodes");
+ } else {
+ /* Not enough room for new nodes list ? */
+ if (num_nodes > count_nodes && nodes) {
+ free(nodes);
+ nodes = NULL;
+ }
+
+ if (nodes == NULL) {
+ count_nodes = num_nodes + 10; /* Overallocate a little */
+ nodes = malloc(count_nodes * sizeof(struct cl_cluster_node));
+ if (!nodes) {
+ perror("Unable to allocate nodes array\n");
+ exit(5);
+ }
+ }
+ nodelist.max_members = count_nodes;
+ nodelist.nodes = nodes;
+
+ num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, &nodelist);
+ if (num_nodes <= 0) {
+ perror("get node details");
+ exit(6);
+ }
+
+ /* Sanity check struct */
+ if (nodes[0].size != sizeof(struct cl_cluster_node)) {
+ log_error
+ ("sizeof(cl_cluster_node) does not match size returned from the kernel: aborting\n");
+ exit(10);
+ }
+
+ if (node_updown == NULL) {
+ node_updown =
+ (int *) malloc(sizeof(int) *
+ max(num_nodes, max_updown_nodes));
+ memset(node_updown, 0,
+ sizeof(int) * max(num_nodes, max_updown_nodes));
+ }
+ }
+}
+
+/* Convert a node name to a CSID */
+int csid_from_name(char *csid, char *name)
+{
+ int i;
+
+ for (i = 0; i < num_nodes; i++) {
+ if (strcmp(name, nodes[i].name) == 0) {
+ memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+/* Convert a CSID to a node name */
+int name_from_csid(char *csid, char *name)
+{
+ int i;
+
+ for (i = 0; i < num_nodes; i++) {
+ if (memcmp(csid, &nodes[i].node_id, MAX_CSID_LEN) == 0) {
+ strcpy(name, nodes[i].name);
+ return 0;
+ }
+ }
+ /* Who?? */
+ strcpy(name, "Unknown");
+ return -1;
+}
+
+/* Convert a node ID to a node name */
+int name_from_nodeid(int nodeid, char *name)
+{
+ int i;
+
+ for (i = 0; i < num_nodes; i++) {
+ if (nodeid == nodes[i].node_id) {
+ strcpy(name, nodes[i].name);
+ return 0;
+ }
+ }
+ /* Who?? */
+ strcpy(name, "Unknown");
+ return -1;
+}
+
+/* Convert a CSID to a node ID */
+static int nodeid_from_csid(char *csid)
+{
+ int nodeid;
+
+ memcpy(&nodeid, csid, MAX_CSID_LEN);
+
+ return nodeid;
+}
+
+int is_quorate()
+{
+ return ioctl(cluster_sock, SIOCCLUSTER_ISQUORATE, 0);
+}
+
+static void sync_ast_routine(void *arg)
+{
+ struct lock_wait *lwait = arg;
+
+ pthread_mutex_lock(&lwait->mutex);
+ pthread_cond_signal(&lwait->cond);
+ pthread_mutex_unlock(&lwait->mutex);
+}
+
+int sync_lock(const char *resource, int mode, int flags, int *lockid)
+{
+ int status;
+ struct lock_wait lwait;
+
+ if (!lockid) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* Conversions need the lockid in the LKSB */
+ if (flags & LKF_CONVERT)
+ lwait.lksb.sb_lkid = *lockid;
+
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ status = dlm_ls_lock(lockspace,
+ mode,
+ &lwait.lksb,
+ flags,
+ resource,
+ strlen(resource),
+ 0, sync_ast_routine, &lwait, NULL, NULL);
+ if (status)
+ return status;
+
+ /* Wait for it to complete */
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+
+ *lockid = lwait.lksb.sb_lkid;
+
+ errno = lwait.lksb.sb_status;
+ if (lwait.lksb.sb_status)
+ return -1;
+ else
+ return 0;
+}
+
+int sync_unlock(const char *resource /* UNUSED */, int lockid)
+{
+ int status;
+ struct lock_wait lwait;
+
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait);
+
+ if (status)
+ return status;
+
+ /* Wait for it to complete */
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+
+ errno = lwait.lksb.sb_status;
+ if (lwait.lksb.sb_status != EUNLOCK)
+ return -1;
+ else
+ return 0;
+
+}
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+
+ CLVMD Cluster LVM daemon command processor.
+
+ To add commands to the daemon simply add a processor in do_command and return
+ and messages back in buf and the length in *retlen. The initial value of
+ buflen is the maximum size of the buffer. if buf is not large enough then it
+ may be reallocated by the functions in here to a suitable size bearing in
+ mind that anything larger than the passed-in size will have to be returned
+ using the system LV and so performance will suffer.
+
+ The status return will be negated and passed back to the originating node.
+
+ pre- and post- command routines are called only on the local node. The
+ purpose is primarily to get and release locks, though the pre- routine should
+ also do any other local setups required by the command (if any) and can
+ return a failure code that prevents the command from being distributed around
+ the cluster
+
+ The pre- and post- routines are run in their own thread so can block as long
+ they like, do_command is run in the main clvmd thread so should not block for
+ too long. If the pre-command returns an error code (!=0) then the command
+ will not be propogated around the cluster but the post-command WILL be called
+
+ Also note that the pre and post routine are *always* called on the local
+ node, even if the command to be executed was only requested to run on a
+ remote node. It may peek inside the client structure to check the status of
+ the command.
+
+ The clients of the daemon must, naturally, understand the return messages and
+ codes.
+
+ Routines in here may only READ the values in the client structure passed in
+ apart from client->private which they are free to do what they like with.
+
+*/
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "list.h"
+#include "locking.h"
+#include "log.h"
+#include "lvm-functions.h"
+#include "clvmd-comms.h"
+#include "clvm.h"
+#include "clvmd.h"
+#include "libdlm.h"
+
+/* This is where all the real work happens:
+ NOTE: client will be NULL when this is executed on a remote node */
+int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
+ char **buf, int buflen, int *retlen)
+{
+ char *args = msg->node + strlen(msg->node) + 1;
+ int arglen = msglen - sizeof(struct clvm_header) - strlen(msg->node);
+ int status = 0;
+ char *lockname;
+ struct utsname nodeinfo;
+ unsigned char lock_cmd;
+ unsigned char lock_flags;
+
+ /* Do the command */
+ switch (msg->cmd) {
+ /* Just a test message */
+ case CLVMD_CMD_TEST:
+ if (arglen > buflen) {
+ buflen = arglen + 200;
+ *buf = realloc(*buf, buflen);
+ }
+ uname(&nodeinfo);
+ *retlen = 1 + snprintf(*buf, buflen, "TEST from %s: %s v%s",
+ nodeinfo.nodename, args,
+ nodeinfo.release);
+ break;
+
+ case CLVMD_CMD_LOCK_VG:
+ /* Check to see if the VG is in use by LVM1 */
+ status = do_check_lvm1(&args[2]);
+ break;
+
+ case CLVMD_CMD_LOCK_LV:
+ /* This is the biggie */
+ lock_cmd = args[0];
+ lock_flags = args[1];
+ lockname = &args[2];
+ status = do_lock_lv(lock_cmd, lock_flags, lockname);
+ /* Replace EIO with something less scary */
+ if (status == EIO) {
+ *retlen =
+ 1 + snprintf(*buf, buflen,
+ "Internal lvm error, check syslog");
+ return EIO;
+ }
+ break;
+
+ default:
+ /* Won't get here because command is validated in pre_command */
+ break;
+ }
+
+ /* Check the status of the command and return the error text */
+ if (status) {
+ *retlen = 1 + snprintf(*buf, buflen, strerror(status));
+ }
+
+ return status;
+
+}
+
+/* Pre-command is a good place to get locks that are needed only for the duration
+ of the commands around the cluster (don't forget to free them in post-command),
+ and to sanity check the command arguments */
+int do_pre_command(struct local_client *client)
+{
+ struct clvm_header *header =
+ (struct clvm_header *) client->bits.localsock.cmd;
+ unsigned char lock_cmd;
+ unsigned char lock_flags;
+ char *args = header->node + strlen(header->node) + 1;
+ int lockid;
+ int status = 0;
+ char *lockname;
+
+ switch (header->cmd) {
+ case CLVMD_CMD_TEST:
+ status = sync_lock("CLVMD_TEST", LKM_EXMODE, 0, &lockid);
+ client->bits.localsock.private = (void *) lockid;
+ break;
+
+ case CLVMD_CMD_LOCK_VG:
+ lock_cmd = args[0];
+ lock_flags = args[1];
+ lockname = &args[2];
+ DEBUGLOG("doing PRE command LOCK_VG %s at %x\n", lockname,
+ lock_cmd);
+ if (lock_cmd == LCK_UNLOCK) {
+ hold_unlock(lockname);
+ } else {
+ status =
+ hold_lock(lockname, (int) lock_cmd,
+ (int) lock_flags);
+ if (status)
+ status = errno;
+ }
+ break;
+
+ case CLVMD_CMD_LOCK_LV:
+ lock_cmd = args[0];
+ lock_flags = args[1];
+ lockname = &args[2];
+ status = pre_lock_lv(lock_cmd, lock_flags, lockname);
+ break;
+
+ default:
+ log_error("Unknown command %d received\n", header->cmd);
+ status = EINVAL;
+ }
+ return status;
+}
+
+/* Note that the post-command routine is called even if the pre-command or the real command
+ failed */
+int do_post_command(struct local_client *client)
+{
+ struct clvm_header *header =
+ (struct clvm_header *) client->bits.localsock.cmd;
+ int status = 0;
+ unsigned char lock_cmd;
+ unsigned char lock_flags;
+ char *args = header->node + strlen(header->node) + 1;
+ char *lockname;
+
+ switch (header->cmd) {
+ case CLVMD_CMD_TEST:
+ status =
+ sync_unlock("CLVMD_TEST", (int) (long) client->bits.localsock.private);
+ break;
+
+ case CLVMD_CMD_LOCK_VG:
+ /* Nothing to do here */
+ break;
+
+ case CLVMD_CMD_LOCK_LV:
+ lock_cmd = args[0];
+ lock_flags = args[1];
+ lockname = &args[2];
+ status = post_lock_lv(lock_cmd, lock_flags, lockname);
+ break;
+ }
+ return status;
+}
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Abstraction layer for clvmd cluster communications
+ */
+
+#ifndef _CLVMD_COMMS_H
+#define _CLVMD_COMMS_H
+
+struct local_client;
+
+extern int cluster_send_message(void *buf, int msglen, char *csid,
+ const char *errtext);
+extern int name_from_csid(char *csid, char *name);
+extern int csid_from_name(char *csid, char *name);
+extern int get_num_nodes(void);
+extern int cluster_fd_callback(struct local_client *fd, char *buf, int len,
+ char *csid, struct local_client **new_client);
+extern int init_cluster(void);
+extern int get_main_cluster_fd(void); /* gets accept FD or cman cluster socket */
+extern int cluster_do_node_callback(struct local_client *client,
+ void (*callback) (struct local_client *,
+ char *csid, int node_up));
+extern int is_quorate(void);
+
+extern void get_our_csid(char *csid);
+extern void add_up_node(char *csid);
+extern void cluster_closedown(void);
+
+extern int sync_lock(const char *resource, int mode, int flags, int *lockid);
+extern int sync_unlock(const char *resource, int lockid);
+
+#ifdef USE_GULM
+#include "tcp-comms.h"
+#else
+/* cman */
+#include "cnxman-socket.h"
+#define MAX_CSID_LEN 4
+#endif
+
+
+#endif
--- /dev/null
+/******************************************************************************
+*******************************************************************************
+**
+** Copyright (C) Sistina Software, Inc. 2002-2003 All rights reserved.
+**
+*******************************************************************************
+******************************************************************************/
+
+/* This provides the interface between clvmd and gulm as the cluster
+ * and lock manager.
+ *
+ * It also provides the "liblm" functions too as it's hard (and pointless)
+ * to seperate them out when using gulm.
+ *
+ * What it does /not/ provide is the communications between clvmd daemons
+ * on the cluster nodes. That is done in tcp-comms.c
+ */
+
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <utmpx.h>
+#include <syslog.h>
+#include <assert.h>
+
+#include "ccs.h"
+#include "list.h"
+#include "locking.h"
+#include "log.h"
+#include "clvm.h"
+#include "clvmd-comms.h"
+#include "clvmd.h"
+#include "hash.h"
+#include "clvmd-gulm.h"
+#include "libgulm.h"
+#include "hash.h"
+
+/* Hash list of nodes in the cluster */
+static struct hash_table *node_hash;
+
+/* hash list of outstanding lock requests */
+static struct hash_table *lock_hash;
+
+/* Copy of the current core state */
+static uint8_t current_corestate;
+
+/* Number of active nodes */
+static int num_nodes;
+
+static char *cluster_name;
+
+static pthread_mutex_t lock_start_mutex;
+static volatile int lock_start_flag;
+
+struct node_info
+{
+ enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
+ char name[MAX_CLUSTER_MEMBER_NAME_LEN];
+};
+
+struct lock_wait
+{
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int status;
+};
+
+/* Forward */
+static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
+ struct local_client **new_client);
+static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
+ struct local_client **new_client);
+static int get_all_cluster_nodes(void);
+
+/* In tcp-comms.c */
+extern struct hash_table *sock_hash;
+
+static int add_internal_client(int fd, fd_callback_t callback)
+{
+ struct local_client *client;
+
+ DEBUGLOG("Add_internal_client, fd = %d\n", fd);
+
+ /* Add a GULM file descriptor it to the main loop */
+ client = malloc(sizeof(struct local_client));
+ if (!client)
+ {
+ DEBUGLOG("malloc failed\n");
+ return -1;
+ }
+
+ memset(client, 0, sizeof(struct local_client));
+ client->fd = fd;
+ client->type = CLUSTER_INTERNAL;
+ client->callback = callback;
+ add_client(client);
+
+ return 0;
+}
+
+/* Gulm library handle */
+static gulm_interface_p gulm_if;
+static lg_core_callbacks_t core_callbacks;
+static lg_lockspace_callbacks_t lock_callbacks;
+
+static void badsig_handler(int sig)
+{
+ DEBUGLOG("got sig %d\n", sig);
+ cluster_closedown();
+ exit(0);
+}
+
+static void sighup_handler(int sig)
+{
+ DEBUGLOG("got SIGHUP\n");
+
+ /* Re-read CCS node list */
+ get_all_cluster_nodes();
+}
+
+int init_cluster()
+{
+ int status;
+ int ccs_h;
+
+ /* Get cluster name from CCS */
+ /* TODO: is this right? */
+ ccs_h = ccs_connect();
+ ccs_get(ccs_h, "//cluster/@name", &cluster_name);
+ ccs_disconnect(ccs_h);
+
+ /* Block locking until we are logged in */
+ pthread_mutex_init(&lock_start_mutex, NULL);
+ pthread_mutex_lock(&lock_start_mutex);
+ lock_start_flag = 1;
+
+ node_hash = hash_create(100);
+ lock_hash = hash_create(10);
+
+ /* Get all nodes from CCS */
+ get_all_cluster_nodes();
+
+ /* Initialise GULM library */
+ status = lg_initialize(&gulm_if, cluster_name, "clvmd");
+ if (status)
+ {
+ DEBUGLOG("lg_initialize failed: %d\n", status);
+ return status;
+ }
+
+ /* Connect to core - we are not "important" :-) */
+ status = lg_core_login(gulm_if, 0);
+ if (status)
+ {
+ DEBUGLOG("lg_core_login failed: %d\n", status);
+ return status;
+ }
+
+ /* Initialise the inter-node comms */
+ status = init_comms();
+ if (status)
+ return status;
+
+ /* Add core FD to the list */
+ status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock);
+ if (status)
+ {
+ DEBUGLOG("can't allocate client space\n");
+ return status;
+ }
+
+ /* Connect to the lock server */
+ if (lg_lock_login(gulm_if, "CLVM"))
+ {
+ syslog(LOG_ERR, "Cannot login in to LOCK server\n");
+ DEBUGLOG("Cannot login in to LOCK server\n");
+ exit(88);
+ }
+
+ /* Add lockspace FD to the list */
+ status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock);
+ if (status)
+ {
+ DEBUGLOG("can't allocate client space\n");
+ exit(status);
+ }
+
+ /* Request a list of nodes, we can;t really do anything until
+ this comes back */
+ status = lg_core_nodelist(gulm_if);
+ if (status)
+ {
+ DEBUGLOG("lg_core_nodelist failed: %d\n", status);
+ return status;
+ }
+
+ /* So I can kill it without taking GULM down too */
+ signal(SIGINT, badsig_handler);
+ signal(SIGTERM, badsig_handler);
+
+ /* Re-read the node list on SIGHUP */
+ signal(SIGHUP, sighup_handler);
+
+ return 0;
+}
+
+void cluster_closedown()
+{
+ DEBUGLOG("cluster_closedown\n");
+ lg_lock_logout(gulm_if);
+ lg_core_logout(gulm_if);
+ lg_core_shutdown(gulm_if);
+ lg_release(gulm_if);
+}
+
+/* Expire locks for a named node, or us */
+#define GIO_KEY_SIZE 46
+static void drop_expired_locks(char *nodename)
+{
+ struct utsname nodeinfo;
+ uint8_t mask[GIO_KEY_SIZE];
+
+ memset(mask, 0xff, GIO_KEY_SIZE);
+
+ if (!nodename)
+ {
+ uname(&nodeinfo);
+ nodename = nodeinfo.nodename;
+ }
+
+ if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE))
+ {
+ DEBUGLOG("Error calling lg_lock_drop_exp()\n");
+ }
+}
+
+
+static int read_from_core_sock(struct local_client *client, char *buf, int len, char *csid,
+ struct local_client **new_client)
+{
+ int status;
+
+ *new_client = NULL;
+ status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL);
+ return status<0 ? status : 1;
+}
+
+static int read_from_lock_sock(struct local_client *client, char *buf, int len, char *csid,
+ struct local_client **new_client)
+{
+ int status;
+
+ *new_client = NULL;
+ status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL);
+ return status<0 ? status : 1;
+}
+
+
+/* CORE callback routines */
+static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate)
+{
+ DEBUGLOG("CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d\n",
+ gen, error, rank, corestate);
+
+ if (error)
+ exit(error);
+
+ current_corestate = corestate;
+ return 0;
+}
+
+static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
+{
+ if (nodestate == lg_core_Logged_in)
+ {
+ /* Don't clobber NODE_CLVMD state */
+ if (ninfo->state != NODE_CLVMD)
+ {
+ if (ninfo->state == NODE_UNKNOWN ||
+ ninfo->state == NODE_DOWN)
+ num_nodes++;
+
+ ninfo->state = NODE_UP;
+ }
+ }
+ else
+ {
+ if (nodestate == lg_core_Expired ||
+ nodestate == lg_core_Fenced ||
+ nodestate == lg_core_Logged_out)
+ {
+ if (ninfo->state != NODE_DOWN)
+ num_nodes--;
+ ninfo->state = NODE_DOWN;
+ tcp_remove_client(csid);
+ }
+ }
+ DEBUGLOG("set_node_state, '%s' state = %d, num_nodes=%d\n",
+ ninfo->name, ninfo->state, num_nodes);
+}
+
+static struct node_info *add_or_set_node(char *name, uint32_t ip, uint8_t state)
+{
+ struct node_info *ninfo;
+
+ ninfo = hash_lookup_binary(node_hash, (char *)&ip, MAX_CSID_LEN);
+ if (!ninfo)
+ {
+ /* If we can't find that node then re-read the config file in case it
+ was added after we were started */
+ DEBUGLOG("Node %s not found, re-reading config file\n", name);
+ get_all_cluster_nodes();
+
+ /* Now try again */
+ ninfo = hash_lookup_binary(node_hash, (char *)&ip, MAX_CSID_LEN);
+ if (!ninfo)
+ {
+ DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name);
+ return NULL;
+ }
+ }
+
+ set_node_state(ninfo, (char *)&ip, state);
+
+ return ninfo;
+}
+
+static int core_nodelist(void *misc, lglcb_t type, char *name, uint32_t ip, uint8_t state)
+{
+ DEBUGLOG("CORE nodelist\n");
+
+ if (type == lglcb_start)
+ {
+ DEBUGLOG("Got Nodelist, start\n");
+ }
+ else
+ {
+ if (type == lglcb_item)
+ {
+ DEBUGLOG("Got nodelist, item: %s, %#x, %#x\n", name, ip, state);
+
+ add_or_set_node(name, ip, state);
+ }
+ else
+ {
+ if (type == lglcb_stop)
+ {
+ char ourcsid[MAX_CSID_LEN];
+
+ DEBUGLOG("Got Nodelist, stop\n");
+ clvmd_cluster_init_completed();
+
+ /* Mark ourself as up */
+ get_our_csid(ourcsid);
+ add_up_node(ourcsid);
+ }
+ else
+ {
+ DEBUGLOG("Unknown lglcb_t %#x\n", type);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int core_statechange(void *misc, uint8_t corestate, uint32_t masterip, char *mastername)
+{
+ DEBUGLOG("CORE Got statechange corestate:%#x masterip:%#x mastername:%s\n",
+ corestate, masterip, mastername);
+
+ current_corestate = corestate;
+ return 0;
+}
+
+static int core_nodechange(void *misc, char *nodename, uint32_t nodeip, uint8_t nodestate)
+{
+ struct node_info *ninfo;
+
+ DEBUGLOG("CORE node change, name=%s, ip=%x, state = %d\n", nodename, nodeip, nodestate);
+
+ /* If we don't get nodeip here, try a lookup by name */
+ if (!nodeip)
+ csid_from_name((char *)&nodeip, nodename);
+ if (!nodeip)
+ return 0;
+
+ ninfo = add_or_set_node(nodename, nodeip, nodestate);
+ if (!ninfo)
+ return 0;
+
+ /* Check if we need to drop any expired locks */
+ if (ninfo->state == NODE_DOWN)
+ {
+ drop_expired_locks(nodename);
+ }
+
+ return 0;
+}
+static int core_error(void *misc, uint32_t err)
+{
+ DEBUGLOG("CORE error: %d\n", err);
+ // Not sure what happens here
+ return 0;
+}
+
+/* LOCK callback routines */
+static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
+{
+ DEBUGLOG("LOCK Got a Login reply. err:%d which:%d\n",
+ error, which);
+
+ if (error)
+ exit(error);
+
+ /* Drop any expired locks for us that might be hanging around */
+ drop_expired_locks(NULL);
+
+ /* Enable locking operations in other threads */
+ if (lock_start_flag)
+ {
+ lock_start_flag = 0;
+ pthread_mutex_unlock(&lock_start_mutex);
+ }
+
+ return 0;
+}
+
+static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen, uint8_t state, uint32_t flags, uint32_t error,
+ uint8_t *LVB, uint16_t LVBlen)
+{
+ struct lock_wait *lwait;
+
+ DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error);
+
+ lwait = hash_lookup(lock_hash, key);
+ if (!lwait)
+ {
+ DEBUGLOG("Can't find hash entry for resource %s\n", key);
+ return 0;
+ }
+ lwait->status = error;
+ pthread_mutex_lock(&lwait->mutex);
+ pthread_cond_signal(&lwait->cond);
+ pthread_mutex_unlock(&lwait->mutex);
+
+ return 0;
+}
+static int lock_error(void *misc, uint32_t err)
+{
+ DEBUGLOG("LOCK error: %d\n", err);
+ // Not sure what happens here
+ return 0;
+}
+
+
+/* CORE callbacks */
+static lg_core_callbacks_t core_callbacks = {
+ .login_reply = core_login_reply,
+ .nodelist = core_nodelist,
+ .statechange = core_statechange,
+ .nodechange = core_nodechange,
+ .error = core_error,
+};
+
+/* LOCK callbacks */
+static lg_lockspace_callbacks_t lock_callbacks = {
+ .login_reply = lock_login_reply,
+ .lock_state = lock_lock_state,
+ .error = lock_error,
+};
+
+/* Allow tcp-comms to loop round the list of active nodes */
+int get_next_node_csid(void **context, char *csid)
+{
+ struct node_info *ninfo = NULL;
+
+ /* First node */
+ if (!*context)
+ {
+ *context = hash_get_first(node_hash);
+ }
+ else
+ {
+ *context = hash_get_next(node_hash, *context);
+ }
+ if (*context)
+ ninfo = hash_get_data(node_hash, *context);
+
+ /* Find a node that is UP */
+ while (*context && ninfo->state == NODE_DOWN)
+ {
+ *context = hash_get_next(node_hash, *context);
+ if (*context)
+ {
+ ninfo = hash_get_data(node_hash, *context);
+ }
+ }
+
+ if (!*context || ninfo->state == NODE_DOWN)
+ {
+ return 0;
+ }
+
+ memcpy(csid, hash_get_key(node_hash, *context), MAX_CSID_LEN);
+ return 1;
+}
+
+int name_from_csid(char *csid, char *name)
+{
+ struct node_info *ninfo;
+
+ ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
+ if (!ninfo)
+ {
+ sprintf(name, "UNKNOWN [%d.%d.%d.%d]",
+ csid[0], csid[1], csid[2], csid[3]);
+ return -1;
+ }
+
+ strcpy(name, ninfo->name);
+ return 0;
+}
+
+
+int csid_from_name(char *csid, char *name)
+{
+ struct hash_node *hn;
+ struct node_info *ninfo;
+
+ hash_iterate(hn, node_hash)
+ {
+ ninfo = hash_get_data(node_hash, hn);
+ if (strcmp(ninfo->name, name) == 0)
+ {
+ memcpy(csid, hash_get_key(node_hash, hn), MAX_CSID_LEN);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+int get_num_nodes()
+{
+ DEBUGLOG("num_nodes = %d\n", num_nodes);
+ return num_nodes;
+}
+
+/* Node is now known to be running a clvmd */
+void add_up_node(char *csid)
+{
+ struct node_info *ninfo;
+
+ ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
+ if (!ninfo)
+ return;
+
+ ninfo->state = NODE_CLVMD;
+ return;
+
+}
+/* Node is now known to be NOT running a clvmd */
+void add_down_node(char *csid)
+{
+ struct node_info *ninfo;
+
+ ninfo = hash_lookup_binary(node_hash, csid, MAX_CSID_LEN);
+ if (!ninfo)
+ return;
+
+ /* Only set it to UP if it was previously known to be
+ running clvmd - gulm may set it DOWN quite soon */
+ if (ninfo->state == NODE_CLVMD)
+ ninfo->state = NODE_UP;
+ return;
+
+}
+
+/* Call a callback for each node, so the caller knows whether it's up or down */
+int cluster_do_node_callback(struct local_client *master_client,
+ void (*callback)(struct local_client *, char *csid, int node_up))
+{
+ struct hash_node *hn;
+ struct node_info *ninfo;
+
+ hash_iterate(hn, node_hash)
+ {
+ char csid[MAX_CSID_LEN];
+ struct local_client *client;
+
+ ninfo = hash_get_data(node_hash, hn);
+ memcpy(csid, hash_get_key(node_hash, hn), MAX_CSID_LEN);
+
+ DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state);
+
+ client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+ if (client)
+ callback(master_client, csid, ninfo->state == NODE_CLVMD);
+ }
+ return 0;
+}
+
+/* Convert gulm error codes to unix errno numbers */
+static int gulm_to_errno(int gulm_ret)
+{
+ switch (gulm_ret)
+ {
+ case lg_err_TryFailed:
+ errno = EAGAIN;
+ break;
+
+ case lg_err_AlreadyPend:
+ errno = EBUSY;
+
+ /* More?? */
+ default:
+ errno = EINVAL;
+ }
+
+ return gulm_ret ? -1 : 0;
+}
+
+/* Real locking */
+static int _lock_resource(char *resource, int mode, int flags, int *lockid)
+{
+ int status;
+ struct lock_wait lwait;
+
+ /* Wait until the lock module is ready */
+ if (lock_start_flag)
+ {
+ pthread_mutex_lock(&lock_start_mutex);
+ pthread_mutex_unlock(&lock_start_mutex);
+ }
+
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ /* This needs to be converted from DLM/LVM2 value for GULM */
+ if (flags == LCK_NONBLOCK) flags = lg_lock_flag_Try;
+
+ hash_insert(lock_hash, resource, &lwait);
+ DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode);
+
+ status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
+ mode, flags, NULL, 0);
+ if (status)
+ {
+ DEBUGLOG("lg_lock_state returned %d\n", status);
+ return status;
+ }
+
+ /* Wait for it to complete */
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+
+ hash_remove(lock_hash, resource);
+ DEBUGLOG("lock-resource returning %d\n", lwait.status);
+
+ return gulm_to_errno(lwait.status);
+}
+
+
+static int _unlock_resource(char *resource, int lockid)
+{
+ int status;
+ struct lock_wait lwait;
+
+ pthread_cond_init(&lwait.cond, NULL);
+ pthread_mutex_init(&lwait.mutex, NULL);
+ pthread_mutex_lock(&lwait.mutex);
+
+ hash_insert(lock_hash, resource, &lwait);
+
+ DEBUGLOG("unlock_resource %s\n", resource);
+ status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
+ lg_lock_state_Unlock, 0, NULL, 0);
+
+ if (status)
+ {
+ DEBUGLOG("lg_lock_state(unlock) returned %d\n", status);
+ return status;
+ }
+
+ /* Wait for it to complete */
+
+ pthread_cond_wait(&lwait.cond, &lwait.mutex);
+ pthread_mutex_unlock(&lwait.mutex);
+
+ hash_remove(lock_hash, resource);
+
+ return gulm_to_errno(lwait.status);
+}
+
+
+/* These two locking functions MUST be called in a seperate thread from
+ the clvmd main loop because they expect to be woken up by it.
+
+ These are abstractions around the real locking functions (above)
+ as we need to emulate the DLM's EX/PW/CW interaction with GULM using
+ two locks.
+ To aid unlocking, we store the lock mode in the lockid (as GULM
+ doesn't use this).
+*/
+int sync_lock(const char *resource, int mode, int flags, int *lockid)
+{
+ int status;
+ char lock1[strlen(resource)+3];
+ char lock2[strlen(resource)+3];
+
+ snprintf(lock1, sizeof(lock1), "%s-1", resource);
+ snprintf(lock2, sizeof(lock2), "%s-2", resource);
+
+ switch (mode)
+ {
+ case LCK_EXCL:
+ status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid);
+ if (status)
+ goto out;
+
+ /* If we can't get this lock then bail out */
+ status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid);
+ if (status == lg_err_TryFailed)
+ {
+ _unlock_resource(lock1, *lockid);
+ status = -1;
+ errno = EAGAIN;
+ }
+ break;
+
+ case LCK_READ:
+ status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid);
+ break;
+
+ case LCK_WRITE:
+ status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid);
+ break;
+
+ default:
+ status = -1;
+ errno = EINVAL;
+ break;
+ }
+ out:
+ *lockid = mode;
+ return status;
+}
+
+int sync_unlock(const char *resource, int lockid)
+{
+ int status = 0;
+ char lock1[strlen(resource)+3];
+ char lock2[strlen(resource)+3];
+
+ snprintf(lock1, sizeof(lock1), "%s-1", resource);
+ snprintf(lock2, sizeof(lock2), "%s-2", resource);
+
+ /* The held lock mode is in the lock id */
+ assert(lockid == LCK_EXCL ||
+ lockid == LCK_READ ||
+ lockid == LCK_WRITE);
+
+ switch (lockid)
+ {
+ case LCK_EXCL:
+ status = _unlock_resource(lock1, lockid);
+ if (status)
+ goto out;
+ status = _unlock_resource(lock2, lockid);
+ break;
+
+ case LCK_READ:
+ status = _unlock_resource(lock1, lockid);
+ break;
+
+ case LCK_WRITE:
+ status = _unlock_resource(lock2, lockid);
+ break;
+ }
+
+ out:
+ return status;
+}
+
+int is_quorate()
+{
+ if (current_corestate == lg_core_Slave ||
+ current_corestate == lg_core_Master ||
+ current_corestate == lg_core_Client)
+ return 1;
+ else
+ return 0;
+}
+
+/* Get all the cluster node names & IPs from CCS and
+ add them to our node list so we know who to talk to.
+ Called when we start up and if we get sent SIGHUP.
+*/
+static int get_all_cluster_nodes()
+{
+ int ctree;
+ char *nodename;
+ int error;
+
+ /* Open the config file */
+ ctree = ccs_connect();
+ if (ctree <= 0)
+ {
+ log_error("Error connecting to CCS");
+ return -1;
+ }
+
+ error = ccs_get(ctree, "//nodes/node/@name", &nodename);
+ while (nodename)
+ {
+ char nodeip[MAX_CSID_LEN];
+ char *clvmflag;
+ char key[256];
+
+ sprintf(key, "//nodes/node[@name=\"%s\"]/clvm", nodename);
+ ccs_get(ctree, key, &clvmflag);
+
+ if ((get_ip_address(nodename, nodeip) == 0) && atoi(clvmflag))
+ {
+ struct node_info *ninfo;
+
+ /* If it's not in the list, then add it */
+ ninfo = hash_lookup_binary(node_hash, nodeip, MAX_CSID_LEN);
+ if (!ninfo)
+ {
+ ninfo = malloc(sizeof(struct node_info));
+ if (!ninfo)
+ {
+ syslog(LOG_ERR, "Cannot alloc memory for node info\n");
+ ccs_disconnect(ctree);
+ return -1;
+ }
+ strcpy(ninfo->name, nodename);
+
+ ninfo->state = NODE_DOWN;
+ hash_insert_binary(node_hash, nodeip, MAX_CSID_LEN, ninfo);
+ }
+ }
+ else
+ {
+ DEBUGLOG("node %s has clvm disabled\n", nodename);
+ }
+ if (clvmflag) free(clvmflag);
+ free(nodename);
+ error = ccs_get(ctree, "//nodes/node/@name", &nodename);
+ }
+
+ /* Finished with config file */
+ ccs_disconnect(ctree);
+
+ return 0;
+}
+
+int gulm_fd(void)
+{
+ return lg_core_selector(gulm_if);
+}
--- /dev/null
+
+
+
+extern int get_next_node_csid(void **context, char *csid);
+extern void add_down_node(char *csid);
+extern int gulm_fd(void);
+extern int get_ip_address(char *node, char *addr);
+extern void tcp_remove_client(char *csid);
+extern int alloc_client(int fd, char *csid, struct local_client **new_client);
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * CLVMD: Cluster LVM daemon
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "clvmd-comms.h"
+#include "lvm-functions.h"
+#include "clvm.h"
+#include "clvmd.h"
+#include "libdlm.h"
+#include "system-lv.h"
+#include "list.h"
+#include "log.h"
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* The maximum size of a message that will fit into a packet. Anything bigger
+ than this is sent via the system LV */
+#define MAX_INLINE_MESSAGE (MAX_CLUSTER_MESSAGE-sizeof(struct clvm_header))
+
+#define ISLOCAL_CSID(c) (memcmp(c, our_csid, MAX_CSID_LEN) == 0)
+
+/* Head of the fd list. Also contains
+ the cluster_socket details */
+static struct local_client local_client_head;
+
+static unsigned short global_xid = 0; /* Last transaction ID issued */
+
+static char our_csid[MAX_CSID_LEN];
+
+/* Structure of items on the LVM thread list */
+struct lvm_thread_cmd {
+ struct list list;
+
+ struct local_client *client;
+ struct clvm_header *msg;
+ char csid[MAX_CSID_LEN];
+ int remote; /* Flag */
+ int msglen;
+ unsigned short xid;
+};
+static pthread_t lvm_thread;
+static pthread_mutex_t lvm_thread_mutex;
+static pthread_cond_t lvm_thread_cond;
+static struct list lvm_cmd_head;
+static int quit = 0;
+
+/* Prototypes for code further down */
+static void sigusr2_handler(int sig);
+static void sigterm_handler(int sig);
+static void send_local_reply(struct local_client *client, int status,
+ int clientid);
+static void free_reply(struct local_client *client);
+static void send_version_message(void);
+static void *pre_and_post_thread(void *arg);
+static int send_message(void *buf, int msglen, char *csid, int fd,
+ const char *errtext);
+static int read_from_local_sock(struct local_client *thisfd);
+static int process_local_command(struct clvm_header *msg, int msglen,
+ struct local_client *client,
+ unsigned short xid);
+static void process_remote_command(struct clvm_header *msg, int msglen, int fd,
+ char *csid);
+static int process_reply(struct clvm_header *msg, int msglen, char *csid);
+static int open_local_sock(void);
+static struct local_client *find_client(int clientid);
+static void main_loop(int local_sock, int cmd_timeout);
+static void be_daemon(void);
+static int check_all_clvmds_running(struct local_client *client);
+static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
+ int len, char *csid,
+ struct local_client **new_client);
+static void *lvm_thread_fn(void *);
+static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg,
+ int msglen, char *csid);
+static int distribute_command(struct local_client *thisfd);
+static void hton_clvm(struct clvm_header *hdr);
+static void ntoh_clvm(struct clvm_header *hdr);
+static void add_reply_to_list(struct local_client *client, int status,
+ char *csid, const char *buf, int len);
+
+static void usage(char *prog, FILE *file)
+{
+ fprintf(file, "Usage:\n");
+ fprintf(file, "%s [Vhd]\n", prog);
+ fprintf(file, "\n");
+ fprintf(file, " -V Show version of clvmd\n");
+ fprintf(file, " -h Show this help information\n");
+ fprintf(file, " -d Don't fork, run in the foreground\n");
+ fprintf(file, " -t<secs> Command timeout (default 60 seconds)\n");
+ fprintf(file, "\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int local_sock;
+ struct local_client *newfd;
+ struct utsname nodeinfo;
+ signed char opt;
+ int debug = 0;
+ int cmd_timeout = DEFAULT_CMD_TIMEOUT;
+ sigset_t ss;
+
+ /* Deal with command-line arguments */
+ opterr = 0;
+ optind = 0;
+ while ((opt = getopt(argc, argv, "?vVhdt:")) != EOF) {
+ switch (opt) {
+ case 'h':
+ usage(argv[0], stdout);
+ exit(0);
+
+ case '?':
+ usage(argv[0], stderr);
+ exit(0);
+
+ case 'd':
+ debug++;
+ break;
+
+ case 't':
+ cmd_timeout = atoi(optarg);
+ if (!cmd_timeout) {
+ fprintf(stderr, "command timeout is invalid\n");
+ usage(argv[0], stderr);
+ exit(1);
+ }
+ break;
+
+ case 'V':
+ printf("\nCluster LVM Daemon version %d.%d.%d\n\n",
+ CLVMD_MAJOR_VERSION, CLVMD_MINOR_VERSION,
+ CLVMD_PATCH_VERSION);
+ exit(1);
+ break;
+
+ }
+ }
+
+ /* Fork into the background (unless requested not to) */
+ if (!debug) {
+ be_daemon();
+ }
+
+ DEBUGLOG("CLVMD started\n");
+
+ /* Open the Unix socket we listen for commands on.
+ We do this before opening the cluster socket so that
+ potential clients will block rather than error if we are running
+ but the cluster is not ready yet */
+ local_sock = open_local_sock();
+ if (local_sock < 0)
+ exit(2);
+
+ /* Set up signal handlers, USR1 is for cluster change notifications (in cman)
+ USR2 causes child threads to exit.
+ PIPE should be ignored */
+ signal(SIGUSR2, sigusr2_handler);
+ signal(SIGTERM, sigterm_handler);
+ signal(SIGINT, sigterm_handler);
+ signal(SIGPIPE, SIG_IGN);
+
+ /* Block SIGUSR2 in the main process */
+ sigemptyset(&ss);
+ sigaddset(&ss, SIGUSR2);
+ sigprocmask(SIG_BLOCK, &ss, NULL);
+
+ /* Initialise the LVM thread variables */
+ list_init(&lvm_cmd_head);
+ pthread_mutex_init(&lvm_thread_mutex, NULL);
+ pthread_cond_init(&lvm_thread_cond, NULL);
+ init_lvhash();
+
+ /* Start the cluster interface */
+ if (init_cluster()) {
+ DEBUGLOG("Can't initialise cluster interface\n");
+ log_error("Can't initialise cluster interface\n");
+ exit(5);
+ }
+ DEBUGLOG("Cluster ready, doing some more initialisation\n");
+
+ /* Save our CSID */
+ uname(&nodeinfo);
+ get_our_csid(our_csid);
+
+ /* Initialise the FD list head */
+ local_client_head.fd = get_main_cluster_fd();
+ local_client_head.type = CLUSTER_MAIN_SOCK;
+ local_client_head.callback = cluster_fd_callback;
+
+ /* Add the local socket to the list */
+ newfd = malloc(sizeof(struct local_client));
+ if (!newfd)
+ exit(2);
+
+ newfd->fd = local_sock;
+ newfd->type = LOCAL_RENDEZVOUS;
+ newfd->callback = local_rendezvous_callback;
+ newfd->next = local_client_head.next;
+ local_client_head.next = newfd;
+
+ /* This needs to be started after cluster initialisation
+ as it may need to take out locks */
+ DEBUGLOG("starting LVM thread\n");
+ pthread_create(&lvm_thread, NULL, lvm_thread_fn, nodeinfo.nodename);
+
+#ifndef USE_GULM
+ /* Tell the rest of the cluster our version number */
+ /* CMAN can do this immediately, gulm needs to wait until
+ the core initialisation has finished and the node list
+ has been gathered */
+ send_version_message();
+#endif
+
+ DEBUGLOG("clvmd ready for work\n");
+
+ /* Do some work */
+ main_loop(local_sock, cmd_timeout);
+
+ return 0;
+}
+
+/* Called when the GuLM cluster layer has completed initialisation.
+ We send the version message */
+void clvmd_cluster_init_completed()
+{
+ send_version_message();
+}
+
+/* Data on a connected socket */
+static int local_sock_callback(struct local_client *thisfd, char *buf, int len,
+ char *csid, struct local_client **new_client)
+{
+ *new_client = NULL;
+ return read_from_local_sock(thisfd);
+}
+
+/* Data on a connected socket */
+static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
+ int len, char *csid,
+ struct local_client **new_client)
+{
+ /* Someone connected to our local socket, accept it. */
+
+ struct sockaddr_un socka;
+ struct local_client *newfd;
+ socklen_t sl = sizeof(socka);
+ int client_fd = accept(thisfd->fd, (struct sockaddr *) &socka, &sl);
+
+ if (client_fd >= 0) {
+ newfd = malloc(sizeof(struct local_client));
+ if (!newfd) {
+ close(client_fd);
+ return 1;
+ }
+ newfd->fd = client_fd;
+ newfd->type = LOCAL_SOCK;
+ newfd->xid = 0;
+ newfd->callback = local_sock_callback;
+ newfd->bits.localsock.replies = NULL;
+ newfd->bits.localsock.expected_replies = 0;
+ newfd->bits.localsock.cmd = NULL;
+ newfd->bits.localsock.in_progress = FALSE;
+ newfd->bits.localsock.sent_out = FALSE;
+ newfd->bits.localsock.threadid = 0;
+ newfd->bits.localsock.finished = 0;
+ newfd->bits.localsock.pipe_client = NULL;
+ newfd->bits.localsock.all_success = 1;
+ DEBUGLOG("Got new connection on fd %d\n", newfd->fd);
+ *new_client = newfd;
+ }
+ return 1;
+}
+
+static int local_pipe_callback(struct local_client *thisfd, char *buf,
+ int maxlen, char *csid,
+ struct local_client **new_client)
+{
+ int len;
+ char buffer[PIPE_BUF];
+ struct local_client *sock_client = thisfd->bits.pipe.client;
+ int status = -1; /* in error by default */
+
+ len = read(thisfd->fd, buffer, sizeof(int));
+
+ DEBUGLOG("read on PIPE %d: %d bytes: status: %d\n",
+ thisfd->fd, len, *(int *) buffer);
+
+ if (len == sizeof(int)) {
+ status = *(int *) buffer;
+ }
+
+ /* EOF on pipe or an error, close it */
+ if (len <= 0) {
+ int jstat;
+ close(thisfd->fd);
+
+ /* Clear out the cross-link */
+ if (thisfd->bits.pipe.client != NULL)
+ thisfd->bits.pipe.client->bits.localsock.pipe_client =
+ NULL;
+
+ /* Reap child thread */
+ if (thisfd->bits.pipe.threadid) {
+ jstat =
+ pthread_join(thisfd->bits.pipe.threadid,
+ (void **) &status);
+ thisfd->bits.pipe.threadid = 0;
+ if (thisfd->bits.pipe.client != NULL)
+ thisfd->bits.pipe.client->bits.localsock.
+ threadid = 0;
+ }
+ return -1;
+ } else {
+ DEBUGLOG("background routine status was %d, sock_client=%p\n",
+ status, sock_client);
+ /* But has the client gone away ?? */
+ if (sock_client == NULL) {
+ DEBUGLOG
+ ("Got PIPE response for dead client, ignoring it\n");
+ } else {
+ /* If error then just return that code */
+ if (status)
+ send_local_reply(sock_client, status,
+ sock_client->fd);
+ else {
+ if (sock_client->bits.localsock.state ==
+ POST_COMMAND) {
+ send_local_reply(sock_client, 0,
+ sock_client->fd);
+ } else // PRE_COMMAND finished.
+ {
+ if (
+ (status =
+ distribute_command(sock_client)) !=
+ 0) send_local_reply(sock_client,
+ EFBIG,
+ sock_client->
+ fd);
+ }
+ }
+ }
+ }
+ return len;
+}
+
+/* If a noed is up, look for it in the reply array, if it's not there then
+ add one with "ETIMEDOUT".
+ NOTE: This won't race with real replies because they happen in the same thread.
+*/
+static void timedout_callback(struct local_client *client, char *csid,
+ int node_up)
+{
+ if (node_up) {
+ struct node_reply *reply;
+ char nodename[MAX_CLUSTER_MEMBER_NAME_LEN];
+
+ name_from_csid(csid, nodename);
+ DEBUGLOG("PJC: checking for a reply from %s\n", nodename);
+ pthread_mutex_lock(&client->bits.localsock.reply_mutex);
+
+ reply = client->bits.localsock.replies;
+ while (reply && strcmp(reply->node, nodename) != 0) {
+ reply = reply->next;
+ }
+
+ pthread_mutex_unlock(&client->bits.localsock.reply_mutex);
+
+ if (!reply) {
+ DEBUGLOG("PJC: node %s timed-out\n", nodename);
+ add_reply_to_list(client, ETIMEDOUT, csid,
+ "Command timed out", 18);
+ }
+ }
+}
+
+/* Called when the request has timed out on at least one node. We fill in
+ the remaining node entries with ETIMEDOUT and return.
+
+ By the time we get here the node that caused
+ the timeout could have gone down, in which case we will never get the expected
+ number of replies that triggers the post command so we need to do it here
+*/
+static void request_timed_out(struct local_client *client)
+{
+ DEBUGLOG("Request timed-out. padding\n");
+ cluster_do_node_callback(client, timedout_callback);
+
+ if (client->bits.localsock.num_replies !=
+ client->bits.localsock.expected_replies) {
+ /* Post-process the command */
+ if (client->bits.localsock.threadid) {
+ pthread_mutex_lock(&client->bits.localsock.mutex);
+ client->bits.localsock.state = POST_COMMAND;
+ pthread_cond_signal(&client->bits.localsock.cond);
+ pthread_mutex_unlock(&client->bits.localsock.mutex);
+ }
+ }
+}
+
+/* This is where the real work happens */
+static void main_loop(int local_sock, int cmd_timeout)
+{
+ DEBUGLOG("Using timeout of %d seconds\n", cmd_timeout);
+
+ /* Main loop */
+ while (!quit) {
+ fd_set in;
+ int select_status;
+ struct local_client *thisfd;
+ struct timeval tv = { cmd_timeout, 0 };
+ int quorate = is_quorate();
+
+ /* Wait on the cluster FD and all local sockets/pipes */
+ FD_ZERO(&in);
+ for (thisfd = &local_client_head; thisfd != NULL;
+ thisfd = thisfd->next) {
+ /* if the cluster is not quorate then don't listen for new requests */
+ if ((thisfd->type != LOCAL_RENDEZVOUS &&
+ thisfd->type != LOCAL_SOCK) || quorate)
+ FD_SET(thisfd->fd, &in);
+ }
+
+ if ((select_status = select(FD_SETSIZE, &in, NULL, NULL, &tv)) > 0) {
+ struct local_client *lastfd = NULL;
+ struct clvm_header *inheader;
+ char csid[MAX_CSID_LEN];
+ char buf[MAX_CLUSTER_MESSAGE];
+
+ for (thisfd = &local_client_head; thisfd != NULL;
+ thisfd = thisfd->next) {
+ if (FD_ISSET(thisfd->fd, &in)) {
+ struct local_client *newfd;
+ int ret;
+
+ /* Do callback */
+ ret =
+ thisfd->callback(thisfd, buf,
+ sizeof(buf), csid,
+ &newfd);
+ /* Ignore EAGAIN */
+ if (ret < 0 && (errno == EAGAIN ||
+ errno == EINTR)) continue;
+
+ /* Got error or EOF: Remove it from the list safely */
+ if (ret <= 0) {
+ struct local_client *free_fd;
+ int type = thisfd->type;
+
+ /* If the cluster socket shuts down, so do we */
+ if (type == CLUSTER_MAIN_SOCK ||
+ type == CLUSTER_INTERNAL)
+ goto closedown;
+
+ DEBUGLOG
+ ("ret == %d, errno = %d. removing client\n",
+ ret, errno);
+ lastfd->next = thisfd->next;
+ free_fd = thisfd;
+ thisfd = lastfd;
+ free(free_fd);
+ break;
+ }
+
+ /* New client...simply add it to the list */
+ if (newfd) {
+ newfd->next = thisfd->next;
+ thisfd->next = newfd;
+ break;
+ }
+
+ switch (thisfd->type) {
+ case CLUSTER_MAIN_SOCK:
+ case CLUSTER_DATA_SOCK:
+ inheader =
+ (struct clvm_header *) buf;
+ ntoh_clvm(inheader); /* Byteswap fields */
+ if (inheader->cmd ==
+ CLVMD_CMD_REPLY)
+ process_reply
+ (inheader, ret,
+ csid);
+ else
+ add_to_lvmqueue(thisfd,
+ inheader,
+ ret,
+ csid);
+ break;
+
+ /* All the work for these is done in the callback
+ rightly or wrongly... */
+ case LOCAL_RENDEZVOUS:
+ case LOCAL_SOCK:
+ case THREAD_PIPE:
+ case CLUSTER_INTERNAL:
+ break;
+ }
+ }
+ lastfd = thisfd;
+ }
+ }
+
+ /* Select timed out. Check for clients that have been waiting too long for a response */
+ if (select_status == 0) {
+ time_t the_time = time(NULL);
+
+ for (thisfd = &local_client_head; thisfd != NULL;
+ thisfd = thisfd->next) {
+ if (thisfd->type == LOCAL_SOCK
+ && thisfd->bits.localsock.sent_out
+ && thisfd->bits.localsock.sent_time +
+ cmd_timeout < the_time
+ && thisfd->bits.localsock.
+ expected_replies !=
+ thisfd->bits.localsock.num_replies) {
+ /* Send timed out message + replies we already have */
+ DEBUGLOG
+ ("Request timed-out (send: %ld, now: %ld)\n",
+ thisfd->bits.localsock.sent_time,
+ the_time);
+
+ thisfd->bits.localsock.all_success = 0;
+
+ request_timed_out(thisfd);
+ }
+ }
+ }
+ if (select_status < 0) {
+ if (errno == EINTR)
+ continue;
+
+#ifdef DEBUG
+ perror("select error");
+ exit(-1);
+#endif
+ }
+ }
+
+ closedown:
+ cluster_closedown();
+ close(local_sock);
+}
+
+/* Fork into the background and detach from our parent process */
+static void be_daemon()
+{
+ pid_t pid;
+ int devnull = open("/dev/null", O_RDWR);
+ if (devnull == -1) {
+ perror("Can't open /dev/null");
+ exit(3);
+ }
+
+ switch (pid = fork()) {
+ case -1:
+ perror("clvmd: can't fork");
+ exit(2);
+
+ case 0: /* child */
+ break;
+
+ default: /* Parent */
+ exit(0);
+ }
+
+ /* Detach ourself from the calling environment */
+ if (close(0) || close(1) || close(2)) {
+ perror("Error closing terminal FDs");
+ exit(4);
+ }
+ setsid();
+
+ if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
+ || dup2(devnull, 2) < 0) {
+ perror("Error setting terminal FDs to /dev/null");
+ log_error("Error setting terminal FDs to /dev/null: %m");
+ exit(5);
+ }
+ if (chdir("/")) {
+ log_error("Error setting current directory to /: %m");
+ exit(6);
+ }
+
+}
+
+/* Called when we have a read from the local socket.
+ was in the main loop but it's grown up and is a big girl now */
+static int read_from_local_sock(struct local_client *thisfd)
+{
+ int len;
+ int argslen;
+ int missing_len;
+ char buffer[PIPE_BUF];
+
+ len = read(thisfd->fd, buffer, sizeof(buffer));
+
+ DEBUGLOG("Read on local socket %d, len = %d\n", thisfd->fd, len);
+
+ /* EOF or error on socket */
+ if (len <= 0) {
+ int *status;
+ int jstat;
+
+ DEBUGLOG("EOF on local socket: inprogress=%d\n",
+ thisfd->bits.localsock.in_progress);
+
+ thisfd->bits.localsock.finished = 1;
+
+ /* If the client went away in mid command then tidy up */
+ if (thisfd->bits.localsock.in_progress) {
+ pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+ thisfd->bits.localsock.state = POST_COMMAND;
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+
+ /* Free any unsent buffers */
+ free_reply(thisfd);
+ }
+
+ /* Kill the subthread & free resources */
+ if (thisfd->bits.localsock.threadid) {
+ DEBUGLOG("Waiting for child thread\n");
+ pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+ thisfd->bits.localsock.state = POST_COMMAND;
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+ pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
+
+ jstat =
+ pthread_join(thisfd->bits.localsock.threadid,
+ (void **) &status);
+ DEBUGLOG("Joined child thread\n");
+
+ thisfd->bits.localsock.threadid = 0;
+ pthread_cond_destroy(&thisfd->bits.localsock.cond);
+ pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
+
+ /* Remove the pipe client */
+ if (thisfd->bits.localsock.pipe_client != NULL) {
+ struct local_client *newfd;
+ struct local_client *lastfd = NULL;
+ struct local_client *free_fd = NULL;
+
+ close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
+ close(thisfd->bits.localsock.pipe);
+
+ /* Remove pipe client */
+ for (newfd = &local_client_head; newfd != NULL;
+ newfd = newfd->next) {
+ if (thisfd->bits.localsock.
+ pipe_client == newfd) {
+ thisfd->bits.localsock.
+ pipe_client = NULL;
+
+ lastfd->next = newfd->next;
+ free_fd = newfd;
+ newfd->next = lastfd;
+ free(free_fd);
+ break;
+ }
+ lastfd = newfd;
+ }
+ }
+ }
+
+ /* Free the command buffer */
+ if (thisfd->bits.localsock.cmd)
+ free(thisfd->bits.localsock.cmd);
+
+ /* Clear out the cross-link */
+ if (thisfd->bits.localsock.pipe_client != NULL)
+ thisfd->bits.localsock.pipe_client->bits.pipe.client =
+ NULL;
+
+ close(thisfd->fd);
+ return 0;
+ } else {
+ int comms_pipe[2];
+ struct local_client *newfd;
+ char csid[MAX_CSID_LEN];
+ struct clvm_header *inheader;
+
+ inheader = (struct clvm_header *) buffer;
+
+ /* Fill in the client ID */
+ inheader->clientid = htonl(thisfd->fd);
+
+ /* If we are already busy then return an error */
+ if (thisfd->bits.localsock.in_progress) {
+ struct clvm_header reply;
+ reply.cmd = CLVMD_CMD_REPLY;
+ reply.status = -EBUSY;
+ reply.arglen = 0;
+ reply.flags = 0;
+ send_message(&reply, sizeof(reply), our_csid,
+ thisfd->fd,
+ "Error sending EBUSY reply to local user");
+ return len;
+ }
+
+ /* Free any old buffer space */
+ if (thisfd->bits.localsock.cmd)
+ free(thisfd->bits.localsock.cmd);
+
+ /* See if we have the whole message */
+ argslen =
+ len - strlen(inheader->node) - sizeof(struct clvm_header);
+ missing_len = inheader->arglen - argslen;
+
+ /* Save the message */
+ thisfd->bits.localsock.cmd = malloc(len + missing_len);
+ if (!thisfd->bits.localsock.cmd) {
+ struct clvm_header reply;
+ reply.cmd = CLVMD_CMD_REPLY;
+ reply.status = -ENOMEM;
+ reply.arglen = 0;
+ reply.flags = 0;
+ send_message(&reply, sizeof(reply), our_csid,
+ thisfd->fd,
+ "Error sending ENOMEM reply to local user");
+ return 0;
+ }
+ memcpy(thisfd->bits.localsock.cmd, buffer, len);
+ thisfd->bits.localsock.cmd_len = len + missing_len;
+ inheader = (struct clvm_header *) thisfd->bits.localsock.cmd;
+
+ /* If we don't have the full message then read the rest now */
+ if (missing_len) {
+ char *argptr =
+ inheader->node + strlen(inheader->node) + 1;
+
+ while (missing_len > 0 && len >= 0) {
+ DEBUGLOG
+ ("got %d bytes, need another %d (total %d)\n",
+ argslen, missing_len, inheader->arglen);
+ len =
+ read(thisfd->fd, argptr + argslen,
+ missing_len);
+ if (len >= 0) {
+ missing_len -= len;
+ argslen += len;
+ }
+ }
+ }
+
+ /* Only run the command if all the cluster nodes are running CLVMD */
+ if (((inheader->flags & CLVMD_FLAG_LOCAL) == 0) &&
+ (check_all_clvmds_running(thisfd) == -1)) {
+ thisfd->bits.localsock.expected_replies = 0;
+ thisfd->bits.localsock.num_replies = 0;
+ send_local_reply(thisfd, EHOSTDOWN, thisfd->fd);
+ return len;
+ }
+
+ /* Check the node name for validity */
+ if (inheader->node[0] && csid_from_name(csid, inheader->node)) {
+ /* Error, node is not in the cluster */
+ struct clvm_header reply;
+ DEBUGLOG("Unknown node: '%s'\n", inheader->node);
+
+ reply.cmd = CLVMD_CMD_REPLY;
+ reply.status = -ENOENT;
+ reply.flags = 0;
+ reply.arglen = 0;
+ send_message(&reply, sizeof(reply), our_csid,
+ thisfd->fd,
+ "Error sending ENOENT reply to local user");
+ thisfd->bits.localsock.expected_replies = 0;
+ thisfd->bits.localsock.num_replies = 0;
+ thisfd->bits.localsock.in_progress = FALSE;
+ thisfd->bits.localsock.sent_out = FALSE;
+ return len;
+ }
+
+ /* If we already have a subthread then just signal it to start */
+ if (thisfd->bits.localsock.threadid) {
+ pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+ thisfd->bits.localsock.state = PRE_COMMAND;
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+ return len;
+ }
+
+ /* Create a pipe and add the reading end to our FD list */
+ pipe(comms_pipe);
+ newfd = malloc(sizeof(struct local_client));
+ if (!newfd) {
+ struct clvm_header reply;
+ close(comms_pipe[0]);
+ close(comms_pipe[1]);
+
+ reply.cmd = CLVMD_CMD_REPLY;
+ reply.status = -ENOMEM;
+ reply.arglen = 0;
+ reply.flags = 0;
+ send_message(&reply, sizeof(reply), our_csid,
+ thisfd->fd,
+ "Error sending ENOMEM reply to local user");
+ return len;
+ }
+ DEBUGLOG("creating pipe, [%d, %d]\n", comms_pipe[0],
+ comms_pipe[1]);
+ newfd->fd = comms_pipe[0];
+ newfd->type = THREAD_PIPE;
+ newfd->callback = local_pipe_callback;
+ newfd->next = thisfd->next;
+ newfd->bits.pipe.client = thisfd;
+ newfd->bits.pipe.threadid = 0;
+ thisfd->next = newfd;
+
+ /* Store a cross link to the pipe */
+ thisfd->bits.localsock.pipe_client = newfd;
+
+ thisfd->bits.localsock.pipe = comms_pipe[1];
+
+ /* Initialise and lock the mutex so the subthread will wait after
+ finishing the PRE routine */
+ pthread_mutex_init(&thisfd->bits.localsock.mutex, NULL);
+ pthread_cond_init(&thisfd->bits.localsock.cond, NULL);
+ pthread_mutex_init(&thisfd->bits.localsock.reply_mutex, NULL);
+
+ /* Make sure the thread has a copy of it's own ID */
+ newfd->bits.pipe.threadid = thisfd->bits.localsock.threadid;
+
+ /* Run the pre routine */
+ thisfd->bits.localsock.in_progress = TRUE;
+ thisfd->bits.localsock.state = PRE_COMMAND;
+ pthread_create(&thisfd->bits.localsock.threadid, NULL,
+ pre_and_post_thread, thisfd);
+ }
+ return len;
+}
+
+/* Add a file descriptor from the cluster or comms interface to
+ our list of FDs for select
+*/
+int add_client(struct local_client *new_client)
+{
+ new_client->next = local_client_head.next;
+ local_client_head.next = new_client;
+
+ return 0;
+}
+
+
+/*
+ * Send a long message using the System LV
+ */
+static int send_long_message(struct local_client *thisfd, struct clvm_header *inheader, int len)
+{
+ struct clvm_header new_header;
+ int status;
+
+ DEBUGLOG("Long message: being sent via system LV:\n");
+
+ /* Use System LV */
+ status = system_lv_write_data((char *)inheader, len);
+ if (status < 0)
+ return errno;
+
+ /* Send message indicating System-LV is being used */
+ memcpy(&new_header, inheader, sizeof(new_header));
+ new_header.flags |= CLVMD_FLAG_SYSTEMLV;
+ new_header.xid = thisfd->xid;
+
+ return send_message(&new_header, sizeof(new_header), NULL, -1,
+ "Error forwarding long message to cluster");
+}
+
+/* Called when the pre-command has completed successfully - we
+ now execute the real command on all the requested nodes */
+static int distribute_command(struct local_client *thisfd)
+{
+ struct clvm_header *inheader =
+ (struct clvm_header *) thisfd->bits.localsock.cmd;
+ int len = thisfd->bits.localsock.cmd_len;
+
+ thisfd->xid = global_xid++;
+ DEBUGLOG("distribute command: XID = %d\n", thisfd->xid);
+
+ /* Forward it to other nodes in the cluster if needed */
+ if (!(inheader->flags & CLVMD_FLAG_LOCAL)) {
+ /* if node is empty then do it on the whole cluster */
+ if (inheader->node[0] == '\0') {
+ thisfd->bits.localsock.expected_replies =
+ get_num_nodes();
+ thisfd->bits.localsock.num_replies = 0;
+ thisfd->bits.localsock.sent_time = time(NULL);
+ thisfd->bits.localsock.in_progress = TRUE;
+ thisfd->bits.localsock.sent_out = TRUE;
+
+ /* Do it here first */
+ add_to_lvmqueue(thisfd, inheader, len, NULL);
+
+ DEBUGLOG("Sending message to all cluster nodes\n");
+ if (len > MAX_INLINE_MESSAGE) {
+ send_long_message(thisfd, inheader, len );
+ } else {
+ inheader->xid = thisfd->xid;
+ send_message(inheader, len, NULL, -1,
+ "Error forwarding message to cluster");
+ }
+ } else {
+ /* Do it on a single node */
+ char csid[MAX_CSID_LEN];
+
+ if (csid_from_name(csid, inheader->node)) {
+ /* This has already been checked so should not happen */
+ return 0;
+ } else {
+ /* OK, found a node... */
+ thisfd->bits.localsock.expected_replies = 1;
+ thisfd->bits.localsock.num_replies = 0;
+ thisfd->bits.localsock.in_progress = TRUE;
+
+ /* Are we the requested node ?? */
+ if (memcmp(csid, our_csid, MAX_CSID_LEN) == 0) {
+ DEBUGLOG("Doing command on local node only\n");
+ add_to_lvmqueue(thisfd, inheader, len, NULL);
+ } else {
+ DEBUGLOG("Sending message to single node: %s\n",
+ inheader->node);
+ if (len > MAX_INLINE_MESSAGE) {
+ send_long_message(thisfd, inheader, len );
+ } else {
+ inheader->xid = thisfd->xid;
+ send_message(inheader, len,
+ csid, -1,
+ "Error forwarding message to cluster node");
+ }
+ }
+ }
+ }
+ } else {
+ /* Local explicitly requested, ignore nodes */
+ thisfd->bits.localsock.in_progress = TRUE;
+ thisfd->bits.localsock.expected_replies = 1;
+ thisfd->bits.localsock.num_replies = 0;
+ add_to_lvmqueue(thisfd, inheader, len, NULL);
+ }
+ return 0;
+}
+
+/* Process a command from a remote node and return the result */
+void process_remote_command(struct clvm_header *msg, int msglen, int fd,
+ char *csid)
+{
+ char *replyargs;
+ char nodename[MAX_CLUSTER_MEMBER_NAME_LEN];
+ int replylen = 0;
+ int buflen = MAX_CLUSTER_MESSAGE - sizeof(struct clvm_header) - 1;
+ int status;
+ int msg_malloced = 0;
+
+ /* Get the node name as we /may/ need it later */
+ name_from_csid(csid, nodename);
+
+ DEBUGLOG("process_remote_command %d for clientid 0x%x on node %s\n",
+ msg->cmd, msg->clientid, nodename);
+
+ /* Is the data to be found in the system LV ? */
+ if (msg->flags & CLVMD_FLAG_SYSTEMLV) {
+ struct clvm_header *newmsg;
+
+ DEBUGLOG("Reading message from system LV\n");
+ newmsg =
+ (struct clvm_header *) malloc(msg->arglen +
+ sizeof(struct clvm_header));
+ if (newmsg) {
+ if (system_lv_read_data
+ (nodename, (char *) newmsg,
+ (size_t *) &msglen) == 0) {
+ msg = newmsg;
+ msg_malloced = 1;
+ } else {
+ struct clvm_header head;
+ DEBUGLOG("System LV read failed\n");
+
+ /* Return a failure response */
+ head.cmd = CLVMD_CMD_REPLY;
+ head.status = -EFBIG;
+ head.flags = 0;
+ head.clientid = msg->clientid;
+ head.arglen = 0;
+ head.node[0] = '\0';
+ send_message(&head, sizeof(struct clvm_header),
+ csid, fd,
+ "Error sending ENOMEM command reply");
+ return;
+ }
+ } else {
+ struct clvm_header head;
+ DEBUGLOG
+ ("Error attempting to malloc %d bytes for system LV read\n",
+ msg->arglen);
+ /* Return a failure response */
+ head.cmd = CLVMD_CMD_REPLY;
+ head.status = -ENOMEM;
+ head.flags = 0;
+ head.clientid = msg->clientid;
+ head.arglen = 0;
+ head.node[0] = '\0';
+ send_message(&head, sizeof(struct clvm_header), csid,
+ fd, "Error sending ENOMEM command reply");
+ return;
+ }
+ }
+
+ /* Check for GOAWAY and sulk */
+ if (msg->cmd == CLVMD_CMD_GOAWAY) {
+
+ DEBUGLOG("Told to go away by %s\n", nodename);
+ log_error("Told to go away by %s\n", nodename);
+ exit(99);
+ }
+
+ /* Version check is internal - don't bother exposing it in
+ clvmd-command.c */
+ if (msg->cmd == CLVMD_CMD_VERSION) {
+ int *version_nums = (int *) msg->args;
+ char node[256];
+ name_from_csid(csid, node);
+ DEBUGLOG("Remote node %s is version %d.%d.%d\n",
+ node,
+ ntohl(version_nums[0]),
+ ntohl(version_nums[1]), ntohl(version_nums[2]));
+
+ if (ntohl(version_nums[0]) != CLVMD_MAJOR_VERSION) {
+ struct clvm_header byebyemsg;
+ DEBUGLOG
+ ("Telling node %s to go away because of incompatible version number\n",
+ node);
+ log_notice
+ ("Telling node %s to go away because of incompatible version number %d.%d.%d\n",
+ node, ntohl(version_nums[0]),
+ ntohl(version_nums[1]), ntohl(version_nums[2]));
+
+ byebyemsg.cmd = CLVMD_CMD_GOAWAY;
+ byebyemsg.status = 0;
+ byebyemsg.flags = 0;
+ byebyemsg.arglen = 0;
+ byebyemsg.clientid = 0;
+ cluster_send_message(&byebyemsg, sizeof(byebyemsg),
+ our_csid,
+ "Error Sending GOAWAY message");
+ } else {
+ add_up_node(csid);
+ }
+ return;
+ }
+
+ /* Allocate a default reply buffer */
+ replyargs = malloc(MAX_CLUSTER_MESSAGE - sizeof(struct clvm_header));
+
+ if (replyargs != NULL) {
+ /* Run the command */
+ status =
+ do_command(NULL, msg, msglen, &replyargs, buflen,
+ &replylen);
+ } else {
+ status = -ENOMEM;
+ }
+
+ /* If it wasn't a reply, then reply */
+ if (msg->cmd != CLVMD_CMD_REPLY) {
+ char *aggreply;
+
+ aggreply =
+ realloc(replyargs, replylen + sizeof(struct clvm_header));
+ if (aggreply) {
+ struct clvm_header *agghead =
+ (struct clvm_header *) aggreply;
+
+ replyargs = aggreply;
+ /* Move it up so there's room for a header in front of the data */
+ memmove(aggreply + offsetof(struct clvm_header, args),
+ replyargs, replylen);
+
+ agghead->xid = msg->xid;
+
+ /* Use the system LV ? */
+ if (replylen > MAX_INLINE_MESSAGE) {
+ agghead->cmd = CLVMD_CMD_REPLY;
+ agghead->status = status;
+ agghead->flags = CLVMD_FLAG_SYSTEMLV;
+ agghead->clientid = msg->clientid;
+ agghead->arglen = replylen;
+ agghead->node[0] = '\0';
+
+ /* If System LV operation failed then report it as EFBIG but only do it
+ if the data buffer has something in it. */
+ if (system_lv_write_data
+ (aggreply,
+ replylen + sizeof(struct clvm_header)) < 0
+ && replylen > 0)
+ agghead->status = -EFBIG;
+
+ send_message(agghead,
+ sizeof(struct clvm_header), csid,
+ fd,
+ "Error sending long command reply");
+
+ } else {
+ agghead->cmd = CLVMD_CMD_REPLY;
+ agghead->status = status;
+ agghead->flags = 0;
+ agghead->clientid = msg->clientid;
+ agghead->arglen = replylen;
+ agghead->node[0] = '\0';
+ send_message(aggreply,
+ sizeof(struct clvm_header) +
+ replylen + 2, csid, fd,
+ "Error sending command reply");
+ }
+ } else {
+ struct clvm_header head;
+
+ DEBUGLOG("Error attempting to realloc return buffer\n");
+ /* Return a failure response */
+ head.cmd = CLVMD_CMD_REPLY;
+ head.status = -ENOMEM;
+ head.flags = 0;
+ head.clientid = msg->clientid;
+ head.arglen = 0;
+ head.node[0] = '\0';
+ send_message(&head, sizeof(struct clvm_header), csid,
+ fd, "Error sending ENOMEM command reply");
+ return;
+ }
+ }
+
+ /* Free buffer if it was malloced */
+ if (msg_malloced) {
+ free(msg);
+ }
+ free(replyargs);
+}
+
+/* Add a reply to a command to the list of replies for this client.
+ If we have got a full set then send them to the waiting client down the local
+ socket */
+static void add_reply_to_list(struct local_client *client, int status,
+ char *csid, const char *buf, int len)
+{
+ struct node_reply *reply;
+
+ pthread_mutex_lock(&client->bits.localsock.reply_mutex);
+
+ /* Add it to the list of replies */
+ reply = malloc(sizeof(struct node_reply));
+ if (reply) {
+ reply->status = status;
+ name_from_csid(csid, reply->node);
+ DEBUGLOG("Reply from node %s: %d bytes\n", reply->node, len);
+
+ if (len > 0) {
+ reply->replymsg = malloc(len);
+ if (!reply->replymsg) {
+ reply->status = -ENOMEM;
+ } else {
+ memcpy(reply->replymsg, buf, len);
+ }
+ } else {
+ reply->replymsg = NULL;
+ }
+ /* Hook it onto the reply chain */
+ reply->next = client->bits.localsock.replies;
+ client->bits.localsock.replies = reply;
+ } else {
+ /* It's all gone horribly wrong... */
+ pthread_mutex_unlock(&client->bits.localsock.reply_mutex);
+ send_local_reply(client, ENOMEM, client->fd);
+ return;
+ }
+ DEBUGLOG("Got %d replies, expecting: %d\n",
+ client->bits.localsock.num_replies + 1,
+ client->bits.localsock.expected_replies);
+
+ /* If we have the whole lot then do the post-process */
+ if (++client->bits.localsock.num_replies ==
+ client->bits.localsock.expected_replies) {
+ /* Post-process the command */
+ if (client->bits.localsock.threadid) {
+ pthread_mutex_lock(&client->bits.localsock.mutex);
+ client->bits.localsock.state = POST_COMMAND;
+ pthread_cond_signal(&client->bits.localsock.cond);
+ pthread_mutex_unlock(&client->bits.localsock.mutex);
+ }
+ }
+ pthread_mutex_unlock(&client->bits.localsock.reply_mutex);
+}
+
+/* This is the thread that runs the PRE and post commands for a particular connection */
+static void *pre_and_post_thread(void *arg)
+{
+ struct local_client *client = (struct local_client *) arg;
+ int status;
+ sigset_t ss;
+ int pipe_fd = client->bits.localsock.pipe;
+
+ DEBUGLOG("in sub thread: client = %p\n", client);
+
+ /* Ignore SIGUSR1 (handled by master process) but enable
+ SIGUSR2 (kills subthreads) */
+ sigemptyset(&ss);
+ sigaddset(&ss, SIGUSR1);
+ pthread_sigmask(SIG_BLOCK, &ss, NULL);
+
+ sigdelset(&ss, SIGUSR1);
+ sigaddset(&ss, SIGUSR2);
+ pthread_sigmask(SIG_UNBLOCK, &ss, NULL);
+
+ /* Loop around doing PRE and POST functions until the client goes away */
+ while (!client->bits.localsock.finished) {
+ /* Execute the code */
+ status = do_pre_command(client);
+
+ if (status)
+ client->bits.localsock.all_success = 0;
+
+ DEBUGLOG("Writing status %d down pipe %d\n", status, pipe_fd);
+ /* Tell the parent process we have finished this bit */
+ write(pipe_fd, &status, sizeof(int));
+
+ /* We may need to wait for the condition variable before running the post command */
+ pthread_mutex_lock(&client->bits.localsock.mutex);
+ DEBUGLOG("Waiting to do post command - state = %d\n",
+ client->bits.localsock.state);
+
+ if (client->bits.localsock.state != POST_COMMAND) {
+ pthread_cond_wait(&client->bits.localsock.cond,
+ &client->bits.localsock.mutex);
+ }
+ pthread_mutex_unlock(&client->bits.localsock.mutex);
+
+ DEBUGLOG("Got post command condition...\n");
+
+ do_post_command(client);
+
+ write(pipe_fd, &status, sizeof(int));
+
+ if (client->bits.localsock.finished)
+ break;
+
+ DEBUGLOG("Waiting for next pre command\n");
+
+ pthread_mutex_lock(&client->bits.localsock.mutex);
+ if (client->bits.localsock.state != PRE_COMMAND) {
+ pthread_cond_wait(&client->bits.localsock.cond,
+ &client->bits.localsock.mutex);
+ }
+ pthread_mutex_unlock(&client->bits.localsock.mutex);
+
+ DEBUGLOG("Got pre command condition...\n");
+ }
+ DEBUGLOG("Subthread finished\n");
+ return (void *) 0;
+}
+
+/* Process a command on the local node and store the result */
+static int process_local_command(struct clvm_header *msg, int msglen,
+ struct local_client *client,
+ unsigned short xid)
+{
+ char *replybuf = malloc(MAX_CLUSTER_MESSAGE);
+ int buflen = MAX_CLUSTER_MESSAGE - sizeof(struct clvm_header) - 1;
+ int replylen = 0;
+ int status;
+
+ DEBUGLOG("process_local_command: msg=%p, msglen =%d, client=%p\n", msg,
+ msglen, client);
+ if (replybuf == NULL)
+ return -1;
+
+ status = do_command(client, msg, msglen, &replybuf, buflen, &replylen);
+
+ if (status)
+ client->bits.localsock.all_success = 0;
+
+ /* If we took too long then discard the reply */
+ if (xid == client->xid) {
+ add_reply_to_list(client, status, our_csid, replybuf, replylen);
+ } else {
+ DEBUGLOG
+ ("Local command took too long, discarding xid %d, current is %d\n",
+ xid, client->xid);
+ }
+
+ free(replybuf);
+ return status;
+}
+
+static int process_reply(struct clvm_header *msg, int msglen, char *csid)
+{
+ struct local_client *client = NULL;
+
+ client = find_client(msg->clientid);
+ if (!client) {
+ DEBUGLOG("Got message for unknown client 0x%x\n",
+ msg->clientid);
+ log_error("Got message for unknown client 0x%x\n",
+ msg->clientid);
+ return -1;
+ }
+
+ if (msg->status)
+ client->bits.localsock.all_success = 0;
+
+ /* Gather replies together for this client id */
+ if (msg->xid == client->xid) {
+ add_reply_to_list(client, msg->status, csid, msg->args,
+ msg->arglen);
+ } else {
+ DEBUGLOG("Discarding reply with old XID %d, current = %d\n",
+ msg->xid, client->xid);
+ }
+ return 0;
+}
+
+/* Send an aggregated reply back to the client */
+static void send_local_reply(struct local_client *client, int status, int fd)
+{
+ struct clvm_header *clientreply;
+ struct node_reply *thisreply = client->bits.localsock.replies;
+ char *replybuf;
+ char *ptr;
+ int message_len = 0;
+
+ DEBUGLOG("Send local reply\n");
+
+ /* Work out the total size of the reply */
+ while (thisreply) {
+ if (thisreply->replymsg)
+ message_len += strlen(thisreply->replymsg) + 1;
+ else
+ message_len++;
+
+ message_len += strlen(thisreply->node) + 1 + sizeof(int);
+
+ thisreply = thisreply->next;
+ }
+
+ /* Add in the size of our header */
+ message_len = message_len + sizeof(struct clvm_header) + 1;
+ replybuf = malloc(message_len);
+
+ clientreply = (struct clvm_header *) replybuf;
+ clientreply->status = -status;
+ clientreply->cmd = CLVMD_CMD_REPLY;
+ clientreply->node[0] = '\0';
+
+ ptr = clientreply->args;
+
+ /* Add in all the replies, and free them as we go */
+ thisreply = client->bits.localsock.replies;
+ while (thisreply) {
+ struct node_reply *tempreply = thisreply;
+
+ strcpy(ptr, thisreply->node);
+ ptr += strlen(thisreply->node) + 1;
+
+ *(int *) ptr = thisreply->status;
+ ptr += sizeof(int);
+
+ if (thisreply->replymsg) {
+ strcpy(ptr, thisreply->replymsg);
+ ptr += strlen(thisreply->replymsg) + 1;
+ } else {
+ ptr[0] = '\0';
+ ptr++;
+ }
+ thisreply = thisreply->next;
+
+ if (tempreply->replymsg)
+ free(tempreply->replymsg);
+ free(tempreply);
+ }
+
+ /* Terminate with an empty node name */
+ *ptr = '\0';
+
+ clientreply->arglen = ptr - clientreply->args + 1;
+
+ /* And send it */
+ send_message(replybuf, message_len, our_csid, fd,
+ "Error sending REPLY to client");
+ free(replybuf);
+
+ /* Reset comms variables */
+ client->bits.localsock.replies = NULL;
+ client->bits.localsock.expected_replies = 0;
+ client->bits.localsock.in_progress = FALSE;
+ client->bits.localsock.sent_out = FALSE;
+}
+
+/* Just free a reply chain baceuse it wasn't used. */
+static void free_reply(struct local_client *client)
+{
+ /* Add in all the replies, and free them as we go */
+ struct node_reply *thisreply = client->bits.localsock.replies;
+ while (thisreply) {
+ struct node_reply *tempreply = thisreply;
+
+ thisreply = thisreply->next;
+
+ if (tempreply->replymsg)
+ free(tempreply->replymsg);
+ free(tempreply);
+ }
+ client->bits.localsock.replies = NULL;
+}
+
+/* Send our version number to the cluster */
+static void send_version_message()
+{
+ char message[sizeof(struct clvm_header) + sizeof(int) * 3];
+ struct clvm_header *msg = (struct clvm_header *) message;
+ int *version_nums = (int *) msg->args;
+
+ msg->cmd = CLVMD_CMD_VERSION;
+ msg->status = 0;
+ msg->flags = 0;
+ msg->clientid = 0;
+ msg->arglen = sizeof(int) * 3;
+
+ version_nums[0] = htonl(CLVMD_MAJOR_VERSION);
+ version_nums[1] = htonl(CLVMD_MINOR_VERSION);
+ version_nums[2] = htonl(CLVMD_PATCH_VERSION);
+
+ cluster_send_message(message, sizeof(message), NULL,
+ "Error Sending version number");
+}
+
+/* Send a message to either a local client or another server */
+static int send_message(void *buf, int msglen, char *csid, int fd,
+ const char *errtext)
+{
+ int len;
+
+ /* Send remote messages down the cluster socket */
+ if (csid == NULL || !ISLOCAL_CSID(csid)) {
+ hton_clvm((struct clvm_header *) buf); /* Byte swap if necessary */
+ return cluster_send_message(buf, msglen, csid, errtext);
+ } else {
+ int ptr = 0;
+
+ /* Make sure it all goes */
+ do {
+ len = write(fd, buf + ptr, msglen - ptr);
+
+ if (len <= 0) {
+ log_error(errtext);
+ break;
+ }
+ ptr += len;
+ } while (len < msglen);
+ }
+ return len;
+}
+
+static int process_work_item(struct lvm_thread_cmd *cmd)
+{
+ if (!cmd->remote) {
+ DEBUGLOG("process_work_item: local\n");
+ process_local_command(cmd->msg, cmd->msglen, cmd->client,
+ cmd->xid);
+ } else {
+ DEBUGLOG("process_work_item: remote\n");
+ process_remote_command(cmd->msg, cmd->msglen, cmd->client->fd,
+ cmd->csid);
+ }
+ return 0;
+}
+
+/*
+ * Routine that runs in the "LVM thread".
+ */
+static void *lvm_thread_fn(void *arg)
+{
+ struct list *cmdl, *tmp;
+ sigset_t ss;
+
+ DEBUGLOG("LVM thread function started\n");
+ pthread_mutex_lock(&lvm_thread_mutex);
+
+ /* Ignore SIGUSR1 & 2 */
+ sigemptyset(&ss);
+ sigaddset(&ss, SIGUSR1);
+ sigaddset(&ss, SIGUSR2);
+ pthread_sigmask(SIG_BLOCK, &ss, NULL);
+
+ /* Initialise the interface to liblvm */
+ init_lvm();
+ pthread_mutex_unlock(&lvm_thread_mutex);
+
+ /* Now wait for some actual work */
+ for (;;) {
+ DEBUGLOG("LVM thread waiting for work\n");
+
+ pthread_mutex_lock(&lvm_thread_mutex);
+ if (list_empty(&lvm_cmd_head))
+ pthread_cond_wait(&lvm_thread_cond, &lvm_thread_mutex);
+
+ list_iterate_safe(cmdl, tmp, &lvm_cmd_head) {
+ struct lvm_thread_cmd *cmd;
+
+ cmd =
+ list_struct_base(cmdl, struct lvm_thread_cmd, list);
+ list_del(&cmd->list);
+ pthread_mutex_unlock(&lvm_thread_mutex);
+
+ process_work_item(cmd);
+ free(cmd->msg);
+ free(cmd);
+
+ pthread_mutex_lock(&lvm_thread_mutex);
+ }
+ pthread_mutex_unlock(&lvm_thread_mutex);
+ }
+}
+
+/* Pass down some work to the LVM thread */
+static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg,
+ int msglen, char *csid)
+{
+ struct lvm_thread_cmd *cmd;
+
+ cmd = malloc(sizeof(struct lvm_thread_cmd));
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->msg = malloc(msglen);
+ if (!cmd->msg) {
+ log_error("Unable to allocate buffer space\n");
+ free(cmd);
+ return -1;
+ }
+
+ cmd->client = client;
+ cmd->msglen = msglen;
+ cmd->xid = client->xid;
+ memcpy(cmd->msg, msg, msglen);
+ if (csid) {
+ memcpy(cmd->csid, csid, MAX_CSID_LEN);
+ cmd->remote = 1;
+ } else {
+ cmd->remote = 0;
+ }
+
+ DEBUGLOG
+ ("add_to_lvmqueue: cmd=%p. client=%p, msg=%p, len=%d, csid=%p, xid=%d\n",
+ cmd, client, msg, msglen, csid, cmd->xid);
+ pthread_mutex_lock(&lvm_thread_mutex);
+ list_add(&lvm_cmd_head, &cmd->list);
+ pthread_cond_signal(&lvm_thread_cond);
+ pthread_mutex_unlock(&lvm_thread_mutex);
+
+ return 0;
+}
+
+/* Open the local socket, that's the one we talk to libclvm down */
+static int open_local_sock()
+{
+ int local_socket;
+ struct sockaddr_un sockaddr;
+
+ /* Open local socket */
+ if (CLVMD_SOCKNAME[0] != '\0')
+ unlink(CLVMD_SOCKNAME);
+ local_socket = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (local_socket < 0) {
+ log_error("Can't create local socket: %m");
+ return -1;
+ }
+
+ memset(&sockaddr, 0, sizeof(sockaddr));
+ memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
+ sockaddr.sun_family = AF_UNIX;
+ if (bind(local_socket, (struct sockaddr *) &sockaddr, sizeof(sockaddr))) {
+ log_error("can't bind local socket: %m");
+ close(local_socket);
+ return -1;
+ }
+ if (listen(local_socket, 1) != 0) {
+ log_error("listen local: %m");
+ close(local_socket);
+ return -1;
+ }
+ if (CLVMD_SOCKNAME[0] != '\0')
+ chmod(CLVMD_SOCKNAME, 0600);
+
+ return local_socket;
+}
+
+static void check_all_callback(struct local_client *client, char *csid,
+ int node_up)
+{
+ if (!node_up)
+ add_reply_to_list(client, -EHOSTDOWN, csid, "CLVMD not running",
+ 18);
+}
+
+/* Check to see if all CLVMDs are running (ie one on
+ every node in the cluster).
+ If not, returns -1 and prints out a list of errant nodes */
+static int check_all_clvmds_running(struct local_client *client)
+{
+ DEBUGLOG("check_all_clvmds_running\n");
+ return cluster_do_node_callback(client, check_all_callback);
+}
+
+/* Return a local_client struct given a client ID.
+ client IDs are in network byte order */
+static struct local_client *find_client(int clientid)
+{
+ struct local_client *thisfd;
+ for (thisfd = &local_client_head; thisfd != NULL; thisfd = thisfd->next) {
+ if (thisfd->fd == ntohl(clientid))
+ return thisfd;
+ }
+ return NULL;
+}
+
+/* Byte-swapping routines for the header so we
+ work in a heterogeneous environment */
+static void hton_clvm(struct clvm_header *hdr)
+{
+ hdr->status = htonl(hdr->status);
+ hdr->arglen = htonl(hdr->arglen);
+ hdr->xid = htons(hdr->xid);
+ /* Don't swap clientid as it's only a token as far as
+ remote nodes are concerned */
+}
+
+static void ntoh_clvm(struct clvm_header *hdr)
+{
+ hdr->status = ntohl(hdr->status);
+ hdr->arglen = ntohl(hdr->arglen);
+ hdr->xid = ntohs(hdr->xid);
+}
+
+/* Handler for SIGUSR2 - sent to kill subthreads */
+static void sigusr2_handler(int sig)
+{
+ DEBUGLOG("SIGUSR2 received\n");
+ pthread_exit((void *) -1);
+ return;
+}
+
+static void sigterm_handler(int sig)
+{
+ DEBUGLOG("SIGTERM received\n");
+ quit = 1;
+ return;
+}
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _CLVMD_H
+#define _CLVMD_H
+
+#define CLVMD_MAJOR_VERSION 0
+#define CLVMD_MINOR_VERSION 2
+#define CLVMD_PATCH_VERSION 1
+
+/* Name of the cluster LVM admin lock */
+#define ADMIN_LOCK_NAME "CLVMD_ADMIN"
+
+/* Default time (in seconds) we will wait for all remote commands to execute
+ before declaring them dead */
+#define DEFAULT_CMD_TIMEOUT 60
+
+/* One of these for each reply we get from command execution on a node */
+struct node_reply {
+ char node[MAX_CLUSTER_MEMBER_NAME_LEN];
+ char *replymsg;
+ int status;
+ struct node_reply *next;
+};
+
+/*
+ * These exist for the use of local sockets only when we are
+ * collecting responses from all cluster nodes
+ */
+struct localsock_bits {
+ struct node_reply *replies;
+ int num_replies;
+ int expected_replies;
+ time_t sent_time; /* So we can check for timeouts */
+ int in_progress; /* Only execute one cmd at a time per client */
+ int sent_out; /* Flag to indicate that a command was sent
+ to remote nodes */
+ void *private; /* Private area for command processor use */
+ void *cmd; /* Whole command as passed down local socket */
+ int cmd_len; /* Length of above */
+ int pipe; /* Pipe to send PRE completion status down */
+ int finished; /* Flag to tell subthread to exit */
+ int all_success; /* Set to 0 if any node (or the pre_command)
+ failed */
+ struct local_client *pipe_client;
+ pthread_t threadid;
+ enum { PRE_COMMAND, POST_COMMAND, QUIT } state;
+ pthread_mutex_t mutex; /* Main thread and worker synchronisation */
+ pthread_cond_t cond;
+
+ pthread_mutex_t reply_mutex; /* Protect reply structure */
+};
+
+/* Entries for PIPE clients */
+struct pipe_bits {
+ struct local_client *client; /* Actual (localsock) client */
+ pthread_t threadid; /* Our own copy of the thread id */
+};
+
+/* Entries for Network socket clients */
+struct netsock_bits {
+ void *private;
+ int flags;
+};
+
+typedef int (*fd_callback_t) (struct local_client * fd, char *buf, int len,
+ char *csid, struct local_client ** new_client);
+
+/* One of these for each fd we are listening on */
+struct local_client {
+ int fd;
+ enum { CLUSTER_MAIN_SOCK, CLUSTER_DATA_SOCK, LOCAL_RENDEZVOUS,
+ LOCAL_SOCK, THREAD_PIPE, CLUSTER_INTERNAL } type;
+ struct local_client *next;
+ unsigned short xid;
+ fd_callback_t callback;
+
+ union {
+ struct localsock_bits localsock;
+ struct pipe_bits pipe;
+ struct netsock_bits net;
+ } bits;
+};
+
+#ifdef DEBUG
+#define DEBUGLOG(fmt, args...) fprintf(stderr, "CLVMD[%d]: %ld ", getpid(), time(NULL) ); fprintf(stderr, fmt, ## args)
+#else
+#define DEBUGLOG(fmt, args...)
+#endif
+
+#ifndef max
+#define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+/* The real command processor is in clvmd-command.c */
+extern int do_command(struct local_client *client, struct clvm_header *msg,
+ int msglen, char **buf, int buflen, int *retlen);
+
+/* Pre and post command routines are called only on the local node */
+extern int do_pre_command(struct local_client *client);
+extern int do_post_command(struct local_client *client);
+
+extern int add_client(struct local_client *new_client);
+
+extern void clvmd_cluster_init_completed(void);
+
+#endif
--- /dev/null
+/******************************************************************************
+*******************************************************************************
+**
+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+**
+** This copyrighted material is made available to anyone wishing to use,
+** modify, copy, or redistribute it subject to the terms and conditions
+** of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/* CMAN socket interface header,
+ may be include by user or kernel code */
+
+#ifndef __CNXMAN_SOCKET_H
+#define __CNXMAN_SOCKET_H
+
+/* Just made these up but the address family must be less than 32 (NPROTO) */
+#define AF_CLUSTER 31
+#define PF_CLUSTER AF_CLUSTER
+
+/* Protocol(socket) types */
+#define CLPROTO_MASTER 2
+#define CLPROTO_CLIENT 3
+
+/* Setsockopt -- maybe should be ioctls?? */
+#define CLU_SET_MULTICAST 100
+#define CLU_JOIN_CLUSTER 101
+#define CLU_LEAVE_CLUSTER 102
+#define CLU_SET_RCVONLY 103
+#define CLU_SET_UNICAST 104
+#define KCL_SET_MULTICAST 105
+#define KCL_SET_RCVONLY 106
+#define KCL_SET_UNICAST 107
+#define KCL_SET_NODENAME 108
+#define CLU_SET_NODENAME 109
+
+/* ioctls -- should register these properly */
+#define SIOCCLUSTER_NOTIFY _IOW('x', 0x01, int)
+#define SIOCCLUSTER_REMOVENOTIFY _IO( 'x', 0x02)
+#define SIOCCLUSTER_GETMEMBERS _IOR('x', 0x03, struct cl_cluster_nodelist)
+#define SIOCCLUSTER_SETEXPECTED_VOTES _IOW('x', 0x04, int)
+#define SIOCCLUSTER_ISQUORATE _IO( 'x', 0x05)
+#define SIOCCLUSTER_ISLISTENING _IOW('x', 0x06, struct cl_listen_request)
+#define SIOCCLUSTER_GETALLMEMBERS _IOR('x', 0x07, struct cl_cluster_nodelist)
+#define SIOCCLUSTER_SET_VOTES _IOW('x', 0x08, int)
+#define SIOCCLUSTER_GET_VERSION _IOR('x', 0x09, struct cl_version)
+#define SIOCCLUSTER_SET_VERSION _IOW('x', 0x0a, struct cl_version)
+#define SIOCCLUSTER_ISACTIVE _IO( 'x', 0x0b)
+#define SIOCCLUSTER_KILLNODE _IOW('x', 0x0c, int)
+#define SIOCCLUSTER_GET_JOINCOUNT _IO( 'x', 0x0d)
+#define SIOCCLUSTER_SERVICE_REGISTER _IOW('x', 0x0e, char)
+#define SIOCCLUSTER_SERVICE_UNREGISTER _IO('x', 0x0f)
+#define SIOCCLUSTER_SERVICE_JOIN _IO( 'x', 0x10)
+#define SIOCCLUSTER_SERVICE_LEAVE _IO( 'x', 0x20)
+#define SIOCCLUSTER_SERVICE_SETSIGNAL _IOW('x', 0x30, int)
+#define SIOCCLUSTER_SERVICE_STARTDONE _IOW('x', 0x40, unsigned int)
+#define SIOCCLUSTER_SERVICE_GETEVENT _IOR('x', 0x50, struct cl_service_event)
+#define SIOCCLUSTER_SERVICE_GETMEMBERS _IOR('x', 0x60, struct cl_cluster_node)
+#define SIOCCLUSTER_SERVICE_GLOBALID _IOR('x', 0x70, uint32_t)
+#define SIOCCLUSTER_SERVICE_SETLEVEL _IOR('x', 0x80, int)
+#define SIOCCLUSTER_GETNODE _IOWR('x', 0x90, struct cl_cluster_node)
+#define SIOCCLUSTER_BARRIER _IOW('x', 0x0a0, struct cl_barrier_info)
+
+/* Maximum size of a cluster message */
+#define MAX_CLUSTER_MESSAGE 1500
+#define MAX_CLUSTER_MEMBER_NAME_LEN 255
+#define MAX_BARRIER_NAME_LEN 33
+#define MAX_SA_ADDR_LEN 12
+#define MAX_CLUSTER_NAME_LEN 16
+
+/* Well-known cluster port numbers */
+#define CLUSTER_PORT_MEMBERSHIP 1 /* Mustn't block during cluster
+ * transitions! */
+#define CLUSTER_PORT_SERVICES 2
+#define CLUSTER_PORT_SYSMAN 10 /* Remote execution daemon */
+#define CLUSTER_PORT_CLVMD 11 /* Cluster LVM daemon */
+#define CLUSTER_PORT_SLM 12 /* LVM SLM (simple lock manager) */
+
+/* Port numbers above this will be blocked when the cluster is inquorate or in
+ * transition */
+#define HIGH_PROTECTED_PORT 9
+
+/* Reasons for leaving the cluster */
+#define CLUSTER_LEAVEFLAG_DOWN 0 /* Normal shutdown */
+#define CLUSTER_LEAVEFLAG_KILLED 1
+#define CLUSTER_LEAVEFLAG_PANIC 2
+#define CLUSTER_LEAVEFLAG_REMOVED 3 /* This one can reduce quorum */
+#define CLUSTER_LEAVEFLAG_REJECTED 4 /* Not allowed into the cluster in the
+ * first place */
+#define CLUSTER_LEAVEFLAG_INCONSISTENT 5 /* Our view of the cluster is
+ * in a minority */
+#define CLUSTER_LEAVEFLAG_DEAD 6 /* Discovered to be dead */
+#define CLUSTER_LEAVEFLAG_FORCE 0x10 /* Forced by command-line */
+
+/* OOB messages sent to a local socket */
+#define CLUSTER_OOB_MSG_PORTCLOSED 1
+#define CLUSTER_OOB_MSG_STATECHANGE 2
+#define CLUSTER_OOB_MSG_SERVICEEVENT 3
+
+/* Sendmsg flags, these are above the normal sendmsg flags so they don't
+ * interfere */
+#define MSG_NOACK 0x010000 /* Don't need an ACK for this message */
+#define MSG_QUEUE 0x020000 /* Queue the message for sending later */
+#define MSG_MULTICAST 0x080000 /* Message was sent to all nodes in the cluster
+ */
+#define MSG_ALLINT 0x100000 /* Send out of all interfaces */
+
+typedef enum { NODESTATE_REMOTEMEMBER, NODESTATE_JOINING, NODESTATE_MEMBER,
+ NODESTATE_DEAD } nodestate_t;
+
+
+struct sockaddr_cl {
+ unsigned short scl_family;
+ unsigned char scl_flags;
+ unsigned char scl_port;
+ int scl_nodeid;
+};
+
+/* This is how we pass the multicast socket into kernel space. addr is the
+ * multicast address to use in the address family of the socket (eg for UDP it
+ * might be 255.255.255.0) */
+struct cl_multicast_sock {
+ int fd; /* FD of master socket to do multicast on */
+ int number; /* Socket number, to match up recvonly & bcast
+ * sockets */
+};
+
+/* Cluster configuration info passed when we join the cluster */
+struct cl_join_cluster_info {
+ unsigned char votes;
+ unsigned int expected_votes;
+ unsigned int two_node;
+ unsigned int config_version;
+
+ char cluster_name[17];
+};
+
+
+/* This is the structure, per node, returned from the membership ioctl */
+struct cl_cluster_node {
+ unsigned int size;
+ unsigned int node_id;
+ unsigned int us;
+ unsigned int leave_reason;
+ unsigned int incarnation;
+ nodestate_t state;
+ char name[MAX_CLUSTER_MEMBER_NAME_LEN];
+ unsigned char votes;
+};
+
+/* The struct passed to the membership ioctls */
+struct cl_cluster_nodelist {
+ uint32_t max_members;
+ struct cl_cluster_node *nodes;
+};
+
+/* Structure passed to SIOCCLUSTER_ISLISTENING */
+struct cl_listen_request {
+ unsigned char port;
+ int nodeid;
+};
+
+/* A Cluster PORTCLOSED message - received by a local user as an OOB message */
+struct cl_portclosed_oob {
+ unsigned char cmd; /* CLUSTER_OOB_MSG_PORTCLOSED */
+ unsigned char port;
+};
+
+/* Get all version numbers or set the config version */
+struct cl_version {
+ unsigned int major;
+ unsigned int minor;
+ unsigned int patch;
+ unsigned int config;
+};
+
+/* structure passed to barrier ioctls */
+struct cl_barrier_info {
+ char cmd;
+ char name[MAX_BARRIER_NAME_LEN];
+ unsigned int flags;
+ unsigned long arg;
+};
+
+typedef enum { SERVICE_EVENT_STOP, SERVICE_EVENT_START, SERVICE_EVENT_FINISH,
+ SERVICE_EVENT_LEAVEDONE } service_event_t;
+
+typedef enum { SERVICE_START_FAILED, SERVICE_START_JOIN, SERVICE_START_LEAVE }
+ service_start_t;
+
+struct cl_service_event {
+ service_event_t type;
+ service_start_t start_type;
+ unsigned int event_id;
+ unsigned int last_stop;
+ unsigned int last_start;
+ unsigned int last_finish;
+ unsigned int node_count;
+};
+
+
+/* Commands to the barrier ioctl */
+#define BARRIER_IOCTL_REGISTER 1
+#define BARRIER_IOCTL_CHANGE 2
+#define BARRIER_IOCTL_DELETE 3
+#define BARRIER_IOCTL_WAIT 4
+
+/* Attributes of a barrier - bitmask */
+#define BARRIER_ATTR_AUTODELETE 1
+#define BARRIER_ATTR_MULTISTEP 2
+#define BARRIER_ATTR_MANUAL 4
+#define BARRIER_ATTR_ENABLED 8
+#define BARRIER_ATTR_CALLBACK 16
+
+/* Attribute setting commands */
+#define BARRIER_SETATTR_AUTODELETE 1
+#define BARRIER_SETATTR_MULTISTEP 2
+#define BARRIER_SETATTR_ENABLED 3
+#define BARRIER_SETATTR_NODES 4
+#define BARRIER_SETATTR_CALLBACK 5
+#define BARRIER_SETATTR_TIMEOUT 6
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 1997-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* library functions for Cluster LVM Daemon */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <search.h>
+#include <errno.h>
+
+#include "clvm.h"
+#include "libclvm.h"
+
+/* CLVM in hex! */
+#define LVM_SIGNATURE 0x434C564D
+
+#define MAX_CLUSTER_MEMBER_NAME_LEN 255
+
+/* NOTE: the LVMD uses the socket FD as the client ID, this means
+ that any client that calls fork() will inherit the context of
+ it's parent. */
+static int clvmd_sock = -1;
+
+static int open_local_sock(void)
+{
+ int local_socket;
+ struct sockaddr_un sockaddr;
+
+ /* Open local socket */
+ local_socket = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (local_socket < 0) {
+ perror("Can't create local socket");
+ return -1;
+ }
+
+ fcntl(local_socket, F_SETFD, !FD_CLOEXEC);
+
+ strcpy(sockaddr.sun_path, CLVMD_SOCKNAME);
+ sockaddr.sun_family = AF_UNIX;
+ if (connect
+ (local_socket, (struct sockaddr *) &sockaddr, sizeof(sockaddr))) {
+ int saved_errno = errno;
+
+ close(local_socket);
+
+ errno = saved_errno;
+ return -1;
+ }
+ return local_socket;
+}
+
+/* Send a request and return the status */
+static int send_request(char *inbuf, int inlen, char **retbuf)
+{
+ char outbuf[PIPE_BUF];
+ struct clvm_header *outheader = (struct clvm_header *) outbuf;
+ int len;
+ int off;
+ fd_set fds;
+
+ FD_ZERO(&fds);
+ FD_SET(clvmd_sock, &fds);
+
+ /* Send it to CLVMD */
+ if (write(clvmd_sock, inbuf, inlen) != inlen) {
+ perror("Error writing to CLVMD");
+ return -1;
+ }
+
+ /* Get the response */
+ if ((len = read(clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
+ perror("Error reading CLVMD");
+ return -1;
+ }
+ if (len == 0) {
+ fprintf(stderr, "EOF reading CLVMD");
+ errno = ENOTCONN;
+ return -1;
+ }
+
+ /* Allocate buffer */
+ *retbuf = malloc(len + outheader->arglen);
+ if (!*retbuf) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ /* Copy the header */
+ memcpy(*retbuf, outbuf, len);
+ outheader = (struct clvm_header *) *retbuf;
+
+ /* Read the returned values */
+ off = 1; /* we've already read the first byte */
+
+ while (off < outheader->arglen && len > 0) {
+ len = read(clvmd_sock, outheader->args + off, PIPE_BUF);
+ if (len > 0)
+ off += len;
+ }
+
+ /* Was it an error ? */
+ if (outheader->status < 0) {
+ errno = -outheader->status;
+ return -2;
+ }
+ return 0;
+}
+
+/* Build the structure header and parse-out wildcard node names */
+static void build_header(struct clvm_header *head, int cmd, const char *node,
+ void *data, int len)
+{
+ head->cmd = cmd;
+ head->status = 0;
+ head->flags = 0;
+ head->clientid = 0;
+ head->arglen = len;
+ if (node) {
+ /* Allow a couple of special node names:
+ "*" for all nodes,
+ "." for the local node only
+ */
+ if (strcmp(node, "*") == 0) {
+ head->node[0] = '\0';
+ } else if (strcmp(node, ".") == 0) {
+ head->node[0] = '\0';
+ head->flags = CLVMD_FLAG_LOCAL;
+ } else {
+ strcpy(head->node, node);
+ }
+ } else {
+ head->node[0] = '\0';
+ }
+}
+
+/* Send a message to a(or all) node(s) in the cluster */
+int lvm_cluster_write(char cmd, char *node, void *data, int len)
+{
+ char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
+ char *retbuf = NULL;
+ int status;
+ struct clvm_header *head = (struct clvm_header *) outbuf;
+
+ if (clvmd_sock == -1)
+ clvmd_sock = open_local_sock();
+ if (clvmd_sock == -1)
+ return -1;
+
+ build_header(head, cmd, node, data, len);
+ memcpy(head->node + strlen(head->node) + 1, data, len);
+
+ status =
+ send_request(outbuf,
+ sizeof(struct clvm_header) + strlen(head->node) + len,
+ &retbuf);
+ if (retbuf)
+ free(retbuf);
+
+ return status;
+}
+
+/* API: Send a message to a(or all) node(s) in the cluster
+ and wait for replies */
+int lvm_cluster_request(char cmd, const char *node, void *data, int len,
+ lvm_response_t ** response, int *num)
+{
+ char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
+ int *outptr;
+ char *inptr;
+ char *retbuf = NULL;
+ int status;
+ int i;
+ int num_responses = 0;
+ struct clvm_header *head = (struct clvm_header *) outbuf;
+ lvm_response_t *rarray;
+
+ *num = 0;
+
+ if (clvmd_sock == -1)
+ clvmd_sock = open_local_sock();
+ if (clvmd_sock == -1)
+ return -1;
+
+ build_header(head, cmd, node, data, len);
+ memcpy(head->node + strlen(head->node) + 1, data, len);
+
+ status =
+ send_request(outbuf,
+ sizeof(struct clvm_header) + strlen(head->node) + len,
+ &retbuf);
+ if (status == 0 || status == -2) {
+ /* Count the number of responses we got */
+ head = (struct clvm_header *) retbuf;
+ inptr = head->args;
+ while (inptr[0]) {
+ num_responses++;
+ inptr += strlen(inptr) + 1;
+ inptr += sizeof(int);
+ inptr += strlen(inptr) + 1;
+ }
+
+ /* Allocate response array. With an extra pair of INTs on the front to sanity
+ check the pointer when we are given it back to free */
+ outptr =
+ malloc(sizeof(lvm_response_t) * num_responses +
+ sizeof(int) * 2);
+ if (!outptr) {
+ if (retbuf)
+ free(retbuf);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ *response = (lvm_response_t *) (outptr + 2);
+ outptr[0] = LVM_SIGNATURE;
+ outptr[1] = num_responses;
+ rarray = *response;
+
+ /* Unpack the response into an lvm_response_t array */
+ inptr = head->args;
+ i = 0;
+ while (inptr[0]) {
+ strcpy(rarray[i].node, inptr);
+ inptr += strlen(inptr) + 1;
+
+ rarray[i].status = *(int *) inptr;
+ inptr += sizeof(int);
+
+ rarray[i].response = malloc(strlen(inptr) + 1);
+ if (rarray[i].response == NULL) {
+ /* Free up everything else and return error */
+ int j;
+ for (j = 0; j < i; j++)
+ free(rarray[i].response);
+ free(outptr);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ strcpy(rarray[i].response, inptr);
+ rarray[i].len = strlen(inptr);
+ inptr += strlen(inptr) + 1;
+ i++;
+ }
+ *num = num_responses;
+ *response = rarray;
+ }
+
+ if (retbuf)
+ free(retbuf);
+ return status;
+}
+
+/* API: Free reply array */
+int lvm_cluster_free_request(lvm_response_t * response)
+{
+ int *ptr = (int *) response - 2;
+ int i;
+ int num;
+
+ /* Check it's ours to free */
+ if (response == NULL || *ptr != LVM_SIGNATURE) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ num = ptr[1];
+ for (i = 0; i < num; i++) {
+ free(response[i].response);
+ }
+ free(ptr);
+
+ return 0;
+}
+
+/* These are a "higher-level" API providing black-box lock/unlock
+ functions for cluster LVM...maybe */
+
+/* Set by lock(), used by unlock() */
+static int num_responses;
+static lvm_response_t *response;
+
+int lvm_lock_for_cluster(char scope, char *name, int verbosity)
+{
+ int status;
+ int i;
+ char *args;
+ int len;
+
+ if (name) {
+ len = strlen(name) + 2;
+ args = alloca(len);
+ strcpy(args + 1, name);
+ } else {
+ len = 2;
+ args = alloca(len);
+ args[1] = '\0';
+ }
+ args[0] = scope;
+
+ status = lvm_cluster_request(CLVMD_CMD_LOCK,
+ "", args, len, &response, &num_responses);
+
+ /* If any nodes were down then display them and return an error */
+ for (i = 0; i < num_responses; i++) {
+ if (response[i].status == -EHOSTDOWN) {
+ if (verbosity)
+ fprintf(stderr,
+ "clvmd not running on node %s\n",
+ response[i].node);
+ status = -1;
+ }
+ }
+
+ /* If there was an error then free the memory now as the caller won't
+ want to do the unlock */
+ if (status) {
+ int saved_errno = errno;
+ lvm_cluster_free_request(response);
+ num_responses = 0;
+ errno = saved_errno;
+ }
+ return status;
+}
+
+int lvm_unlock_for_cluster(char scope, char *name, int verbosity)
+{
+ int status;
+ int i;
+ int len;
+ int failed;
+ int num_unlock_responses;
+ char *args;
+ lvm_response_t *unlock_response;
+
+ /* We failed - this should not have been called */
+ if (num_responses == 0)
+ return 0;
+
+ if (name) {
+ len = strlen(name) + 2;
+ args = alloca(len);
+ strcpy(args + 1, name);
+ } else {
+ len = 2;
+ args = alloca(len);
+ args[1] = '\0';
+ }
+ args[0] = scope;
+
+ /* See if it failed anywhere */
+ failed = 0;
+ for (i = 0; i < num_responses; i++) {
+ if (response[i].status != 0)
+ failed++;
+ }
+
+ /* If it failed on any nodes then we only unlock on
+ the nodes that succeeded */
+ if (failed) {
+ for (i = 0; i < num_responses; i++) {
+ /* Unlock the ones that succeeded */
+ if (response[i].status == 0) {
+ status = lvm_cluster_request(CLVMD_CMD_UNLOCK,
+ response[i].node,
+ args, len,
+ &unlock_response,
+ &num_unlock_responses);
+ if (status) {
+ if (verbosity)
+ fprintf(stderr,
+ "cluster command to node %s failed: %s\n",
+ response[i].node,
+ strerror(errno));
+ } else if (unlock_response[0].status != 0) {
+ if (verbosity > 1)
+ fprintf(stderr,
+ "unlock on node %s failed: %s\n",
+ response[i].node,
+ strerror(unlock_response
+ [0].status));
+ }
+ lvm_cluster_free_request(unlock_response);
+ } else {
+ if (verbosity)
+ fprintf(stderr,
+ "command on node %s failed: '%s' - will be left locked\n",
+ response[i].node,
+ strerror(response[i].status));
+ }
+ }
+ } else {
+ /* All OK, we can do a full cluster unlock */
+ status = lvm_cluster_request(CLVMD_CMD_UNLOCK,
+ "",
+ args, len,
+ &unlock_response,
+ &num_unlock_responses);
+ if (status) {
+ if (verbosity > 1)
+ fprintf(stderr, "cluster command failed: %s\n",
+ strerror(errno));
+ } else {
+ for (i = 0; i < num_unlock_responses; i++) {
+ if (unlock_response[i].status != 0) {
+ if (verbosity > 1)
+ fprintf(stderr,
+ "unlock on node %s failed: %s\n",
+ response[i].node,
+ strerror(unlock_response
+ [0].status));
+ }
+ }
+ }
+ lvm_cluster_free_request(unlock_response);
+ }
+ lvm_cluster_free_request(response);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 1997-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LIBCLVM_H
+#define _LIBCLVM_H
+
+typedef struct lvm_response {
+ char node[255];
+ char *response;
+ int status;
+ int len;
+
+} lvm_response_t;
+
+extern int lvm_cluster_request(char cmd, const char *node, void *data, int len,
+ lvm_response_t ** response, int *num);
+extern int lvm_cluster_write(char cmd, char *node, void *data, int len);
+extern int lvm_cluster_free_request(lvm_response_t * response);
+
+/* The "high-level" API */
+extern int lvm_lock_for_cluster(char scope, char *name, int verbosity);
+extern int lvm_unlock_for_cluster(char scope, char *name, int verbosity);
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <syslog.h>
+#include <assert.h>
+
+#include "libdlm.h"
+#include "clvm.h"
+#include "clvmd-comms.h"
+#include "clvmd.h"
+#include "lvm-functions.h"
+
+/* LVM2 headers */
+#include "toolcontext.h"
+#include "log.h"
+#include "activate.h"
+#include "hash.h"
+#include "locking.h"
+
+static struct cmd_context *cmd = NULL;
+static struct hash_table *lv_hash = NULL;
+
+struct lv_info {
+ int lock_id;
+ int lock_mode;
+};
+
+/* Return the mode a lock is currently held at (or -1 if not held) */
+static int get_current_lock(char *resource)
+{
+ struct lv_info *lvi;
+
+ lvi = hash_lookup(lv_hash, resource);
+ if (lvi) {
+ return lvi->lock_mode;
+ } else {
+ return -1;
+ }
+}
+
+/* Called at shutdown to tidy the lockspace */
+void unlock_all()
+{
+ struct hash_node *v;
+ hash_iterate(v, lv_hash) {
+ struct lv_info *lvi = hash_get_data(lv_hash, v);
+
+ sync_unlock(hash_get_key(lv_hash, v), lvi->lock_id);
+ }
+}
+
+/* Gets a real lock and keeps the info in the hash table */
+int hold_lock(char *resource, int mode, int flags)
+{
+ int status;
+ int saved_errno;
+ struct lv_info *lvi;
+
+ flags &= LKF_NOQUEUE; /* Only LKF_NOQUEUE is valid here */
+
+ lvi = hash_lookup(lv_hash, resource);
+ if (lvi) {
+ /* Already exists - convert it */
+ status =
+ sync_lock(resource, mode, LKF_CONVERT | flags,
+ &lvi->lock_id);
+ saved_errno = errno;
+ if (!status)
+ lvi->lock_mode = mode;
+
+ if (status) {
+ DEBUGLOG("hold_lock. convert to %d failed: %s\n", mode,
+ strerror(errno));
+ }
+ errno = saved_errno;
+ } else {
+ lvi = malloc(sizeof(struct lv_info));
+ if (!lvi)
+ return -1;
+
+ lvi->lock_mode = mode;
+ status = sync_lock(resource, mode, flags, &lvi->lock_id);
+ saved_errno = errno;
+ if (status) {
+ free(lvi);
+ DEBUGLOG("hold_lock. lock at %d failed: %s\n", mode,
+ strerror(errno));
+ } else {
+ hash_insert(lv_hash, resource, lvi);
+ }
+ errno = saved_errno;
+ }
+ return status;
+}
+
+/* Unlock and remove it from the hash table */
+int hold_unlock(char *resource)
+{
+ struct lv_info *lvi;
+ int status;
+ int saved_errno;
+
+ lvi = hash_lookup(lv_hash, resource);
+
+ if (!lvi) {
+ DEBUGLOG("hold_unlock, lock not already held\n");
+ return 0;
+ }
+
+ status = sync_unlock(resource, lvi->lock_id);
+ saved_errno = errno;
+ if (!status) {
+ hash_remove(lv_hash, resource);
+ free(lvi);
+ } else {
+ DEBUGLOG("hold_unlock. unlock failed(%d): %s\n", status,
+ strerror(errno));
+ }
+
+ errno = saved_errno;
+ return status;
+}
+
+/* Watch the return codes here.
+ liblvm API functions return 1(true) for success, 0(false) for failure and don't set errno.
+ libdlm API functions return 0 for success, -1 for failure and do set errno.
+ These functions here return 0 for success or >0 for failure (where the retcode is errno)
+*/
+
+/* Activate LV exclusive or non-exclusive */
+static int do_activate_lv(char *resource, int mode)
+{
+ int oldmode;
+ int status;
+ int activate_lv;
+ struct lvinfo lvi;
+
+ /* Is it already open ? */
+ oldmode = get_current_lock(resource);
+ if (oldmode == mode) {
+ return 0; /* Nothing to do */
+ }
+
+ /* Does the config file want us to activate this LV ? */
+ if (!lv_activation_filter(cmd, resource, &activate_lv))
+ return EIO;
+
+ if (!activate_lv)
+ return 0; /* Success, we did nothing! */
+
+ /* Do we need to activate exclusively? */
+ if (activate_lv == 2)
+ mode = LKM_EXMODE;
+
+ /* OK, try to get the lock */
+ status = hold_lock(resource, mode, LKF_NOQUEUE);
+ if (status)
+ return errno;
+
+ /* If it's suspended then resume it */
+ if (!lv_info_by_lvid(cmd, resource, &lvi))
+ return EIO;
+
+ if (lvi.suspended)
+ if (!lv_resume(cmd, resource))
+ return EIO;
+
+ /* Now activate it */
+ if (!lv_activate(cmd, resource))
+ return EIO;
+
+ return 0;
+}
+
+/* Resume the LV if it was active */
+static int do_resume_lv(char *resource)
+{
+ int oldmode;
+
+ /* Is it open ? */
+ oldmode = get_current_lock(resource);
+ if (oldmode == -1) {
+ DEBUGLOG("do_deactivate_lock, lock not already held\n");
+ return 0; /* We don't need to do anything */
+ }
+
+ if (!lv_resume_if_active(cmd, resource))
+ return EIO;
+
+ return 0;
+}
+
+/* Suspend the device if active */
+static int do_suspend_lv(char *resource)
+{
+ int oldmode;
+ struct lvinfo lvi;
+
+ /* Is it open ? */
+ oldmode = get_current_lock(resource);
+ if (oldmode == -1) {
+ DEBUGLOG("do_suspend_lv, lock held at %d\n", oldmode);
+ return 0; /* Not active, so it's OK */
+ }
+
+ /* Only suspend it if it exists */
+ if (!lv_info_by_lvid(cmd, resource, &lvi))
+ return EIO;
+
+ if (lvi.exists) {
+ if (!lv_suspend_if_active(cmd, resource)) {
+ return EIO;
+ }
+ }
+ return 0;
+}
+
+static int do_deactivate_lv(char *resource)
+{
+ int oldmode;
+ int status;
+
+ /* Is it open ? */
+ oldmode = get_current_lock(resource);
+ if (oldmode == -1) {
+ DEBUGLOG("do_deactivate_lock, lock not already held\n");
+ return 0; /* We don't need to do anything */
+ }
+
+ if (!lv_deactivate(cmd, resource))
+ return EIO;
+
+ status = hold_unlock(resource);
+ if (status)
+ return errno;
+
+ return 0;
+}
+
+/* This is the LOCK_LV part that happens on all nodes in the cluster -
+ it is responsible for the interaction with device-mapper and LVM */
+int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
+{
+ int status = 0;
+
+ DEBUGLOG("do_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
+ resource, command, lock_flags);
+
+ if (!cmd->config_valid || config_files_changed(cmd)) {
+ /* Reinitialise various settings inc. logging, filters */
+ if (!refresh_toolcontext(cmd)) {
+ log_error("Updated config file invalid. Aborting.");
+ return EINVAL;
+ }
+ }
+
+ switch (command) {
+ case LCK_LV_EXCLUSIVE:
+ status = do_activate_lv(resource, LKM_EXMODE);
+ break;
+
+ case LCK_LV_SUSPEND:
+ status = do_suspend_lv(resource);
+ break;
+
+ case LCK_UNLOCK:
+ case LCK_LV_RESUME: /* if active */
+ status = do_resume_lv(resource);
+ break;
+
+ case LCK_LV_ACTIVATE:
+ status = do_activate_lv(resource, LKM_CRMODE);
+ break;
+
+ case LCK_LV_DEACTIVATE:
+ status = do_deactivate_lv(resource);
+ break;
+
+ default:
+ DEBUGLOG("Invalid LV command 0x%x\n", command);
+ status = EINVAL;
+ break;
+ }
+
+ /* clean the pool for another command */
+ pool_empty(cmd->mem);
+
+ DEBUGLOG("Command return is %d\n", status);
+ return status;
+}
+
+/* Functions to do on the local node only BEFORE the cluster-wide stuff above happens */
+int pre_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
+{
+ /* Nearly all the stuff happens cluster-wide. Apart from SUSPEND. Here we get the
+ lock out on this node (because we are the node modifying the metadata)
+ before suspending cluster-wide.
+ */
+ if (command == LCK_LV_SUSPEND) {
+ DEBUGLOG("pre_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
+ resource, command, lock_flags);
+
+ if (hold_lock(resource, LKM_PWMODE, LKF_NOQUEUE))
+ return errno;
+ }
+ return 0;
+}
+
+/* Functions to do on the local node only AFTER the cluster-wide stuff above happens */
+int post_lock_lv(unsigned char command, unsigned char lock_flags,
+ char *resource)
+{
+ /* Opposite of above, done on resume after a metadata update */
+ if (command == LCK_LV_RESUME) {
+ int oldmode;
+
+ DEBUGLOG
+ ("post_lock_lv: resource '%s', cmd = 0x%x, flags = %d\n",
+ resource, command, lock_flags);
+
+ /* If the lock state is PW then restore it to what it was */
+ oldmode = get_current_lock(resource);
+ if (oldmode == LKM_PWMODE) {
+ struct lvinfo lvi;
+
+ if (!lv_info_by_lvid(cmd, resource, &lvi))
+ return EIO;
+
+ if (lvi.exists) {
+ if (hold_lock(resource, LKM_CRMODE, 0))
+ return errno;
+ } else {
+ if (hold_unlock(resource))
+ return errno;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Check if a VG is un use by LVM1 so we don't stomp on it */
+int do_check_lvm1(char *vgname)
+{
+ int status;
+
+ status = check_lvm1_vg_inactive(cmd, vgname);
+
+ return status == 1 ? 0 : EBUSY;
+}
+
+/*
+ * Ideally, clvmd should be started before any LVs are active
+ * but this may not be the case...
+ * I suppose this also comes in handy if clvmd crashes, not that it would!
+ */
+static void *get_initial_state()
+{
+ char lv[64], vg[64], flags[25];
+ char uuid[65];
+ char line[255];
+ FILE *lvs =
+ popen
+ ("/sbin/lvm lvs --nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr",
+ "r");
+
+ if (!lvs)
+ return NULL;
+
+ while (fgets(line, sizeof(line), lvs)) {
+ if (sscanf(line, "%s %s %s\n", vg, lv, flags) == 3) {
+ /* States: s:suspended a:active S:dropped snapshot I:invalid snapshot */
+ if (flags[4] == 'a' || flags[4] == 's') { /* is it active or suspended? */
+ /* Convert hyphen-separated UUIDs into one */
+ memcpy(&uuid[0], &vg[0], 6);
+ memcpy(&uuid[6], &vg[7], 4);
+ memcpy(&uuid[10], &vg[12], 4);
+ memcpy(&uuid[14], &vg[17], 4);
+ memcpy(&uuid[18], &vg[22], 4);
+ memcpy(&uuid[22], &vg[27], 4);
+ memcpy(&uuid[26], &vg[32], 6);
+ memcpy(&uuid[32], &lv[0], 6);
+ memcpy(&uuid[38], &lv[7], 4);
+ memcpy(&uuid[42], &lv[12], 4);
+ memcpy(&uuid[46], &lv[17], 4);
+ memcpy(&uuid[50], &lv[22], 4);
+ memcpy(&uuid[54], &lv[27], 4);
+ memcpy(&uuid[58], &lv[32], 6);
+ uuid[64] = '\0';
+
+ DEBUGLOG("getting initial lock for %s\n", uuid);
+ hold_lock(uuid, LKM_CRMODE, LKF_NOQUEUE);
+ }
+ }
+ }
+ fclose(lvs);
+ return NULL;
+}
+
+void init_lvhash()
+{
+ /* Create hash table for keeping LV locks & status */
+ lv_hash = hash_create(100);
+}
+
+/* Called to initialise the LVM context of the daemon */
+int init_lvm(void)
+{
+ if (!(cmd = create_toolcontext(NULL))) {
+ log_error("Failed to allocate command context");
+ return 0;
+ }
+
+ /* Use LOG_DAEMON for syslog messages instead of LOG_USER */
+ init_syslog(LOG_DAEMON);
+
+ get_initial_state();
+
+ return 1;
+}
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Functions in lvm-functions.c */
+
+#ifndef _LVM_FUNCTIONS_H
+#define _LVM_FUNCTIONS_H
+
+extern int pre_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
+ char *resource);
+extern int do_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
+ char *resource);
+extern int post_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
+ char *resource);
+extern int do_check_lvm1(char *vgname);
+extern int init_lvm(void);
+extern void init_lvhash(void);
+
+extern int hold_unlock(char *resource);
+extern int hold_lock(char *resource, int mode, int flags);
+extern void unlock_all(void);
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Routines dealing with the System LV */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/utsname.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <mntent.h>
+
+#include "libdlm.h"
+#include "log.h"
+#include "list.h"
+#include "locking.h"
+#include "system-lv.h"
+#include "clvmd-comms.h"
+#ifdef HAVE_CCS
+#include "ccs.h"
+#endif
+
+#define SYSTEM_LV_FILESYSTEM "ext2"
+#define SYSTEM_LV_MOUNTPOINT "/tmp/.clvmd-XXXXXX"
+
+extern char *config_filename(void);
+
+static char system_lv_name[PATH_MAX] = { '\0' };
+static char mount_point[PATH_MAX] = { '\0' };
+static int mounted = 0;
+static int mounted_rw = 0;
+static int lockid;
+static const char *lock_name = "CLVM_SYSTEM_LV";
+
+/* Look in /proc/mounts or (as a last resort) /etc/mtab to
+ see if the system-lv is mounted. If it is mounted and we
+ think it's not then abort because we don't have the right
+ lock status and we don't know what other processes are doing with it.
+
+ Returns 1 for mounted, 0 for not mounted so it matches the condition
+ of the "mounted" static variable above.
+*/
+static int is_really_mounted(void)
+{
+ FILE *mountfile;
+ struct mntent *ment;
+
+ mountfile = setmntent("/proc/mounts", "r");
+ if (!mountfile) {
+ mountfile = setmntent("/etc/mtab", "r");
+ if (!mountfile) {
+ log_error("Unable to open /proc/mounts or /etc/mtab");
+ return -1;
+ }
+ }
+
+ /* Look for system LV name in the file */
+ do {
+ ment = getmntent(mountfile);
+ if (ment) {
+ if (strcmp(ment->mnt_fsname, system_lv_name) == 0) {
+ endmntent(mountfile);
+ return 1;
+ }
+ }
+ }
+ while (ment);
+
+ endmntent(mountfile);
+ return 0;
+}
+
+/* Get the system LV name from the config file */
+static int find_system_lv(void)
+{
+ if (system_lv_name[0] == '\0') {
+#ifdef HAVE_CCS
+ int error;
+ ccs_node_t *ctree;
+
+ /* Read the cluster config file */
+ /* Open the config file */
+ error = open_ccs_file(&ctree, "clvm.ccs");
+ if (error) {
+ perror("reading config file");
+ return -1;
+ }
+
+ strcpy(system_lv_name, find_ccs_str(ctree,
+ "cluster/systemlv", '/',
+ "/dev/vg/system_lv"));
+
+ /* Finished with config file */
+ close_ccs_file(ctree);
+#else
+ if (getenv("CLVMD_SYSTEM_LV"))
+ strcpy(system_lv_name, getenv("CLVMD_SYSTEM_LV"));
+ else
+ return -1;
+#endif
+ }
+
+ /* See if it has been mounted outside our control */
+ if (is_really_mounted() != mounted) {
+ log_error
+ ("The system LV state has been mounted/umounted outside the control of clvmd\n"
+ "it cannot not be used for cluster communications until this is fixed.\n");
+ return -1;
+ }
+ return 0;
+}
+
+/* No prizes */
+int system_lv_umount(void)
+{
+ if (!mounted)
+ return 0;
+
+ if (umount(mount_point) < 0) {
+ log_error("umount of system LV (%s) failed: %m\n",
+ system_lv_name);
+ return -1;
+ }
+
+ sync_unlock(lock_name, lockid);
+ mounted = 0;
+
+ /* Remove the mount point */
+ rmdir(mount_point);
+
+ return 0;
+}
+
+int system_lv_mount(int readwrite)
+{
+ int status;
+ int saved_errno;
+ int fd;
+
+ if (find_system_lv()) {
+ errno = EBUSY;
+ return -1;
+ }
+
+ /* Is it already mounted suitably? */
+ if (mounted) {
+ if (!readwrite || (readwrite && mounted_rw)) {
+ return 0;
+ } else {
+ /* Mounted RO and we need RW */
+ if (system_lv_umount() < 0)
+ return -1;
+ }
+ }
+
+ /* Randomize the mount point */
+ strcpy(mount_point, SYSTEM_LV_MOUNTPOINT);
+ fd = mkstemp(mount_point);
+ if (fd < 0) {
+ log_error("mkstemp for system LV mount point failed: %m\n");
+ return -1;
+ }
+
+ /* Race condition here but there's no mkstemp for directories */
+ close(fd);
+ unlink(mount_point);
+ mkdir(mount_point, 0600);
+
+ /* Make sure we have a system-lv lock */
+ status =
+ sync_lock(lock_name, (readwrite) ? LKM_EXMODE : LKM_CRMODE, 0,
+ &lockid);
+ if (status < 0)
+ return -1;
+
+ /* Mount it */
+ if (mount(system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM,
+ MS_MGC_VAL | MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_SYNCHRONOUS
+ | (readwrite ? 0 : MS_RDONLY), NULL) < 0) {
+ /* mount(2) returns EINVAL if the volume has no FS on it. So, if we want to
+ write to it we try to make a filesystem in it and retry the mount */
+ if (errno == EINVAL && readwrite) {
+ char cmd[256];
+
+ log_error("Attempting mkfs on system LV device %s\n",
+ system_lv_name);
+ snprintf(cmd, sizeof(cmd), "/sbin/mkfs -t %s %s",
+ SYSTEM_LV_FILESYSTEM, system_lv_name);
+ system(cmd);
+
+ if (mount
+ (system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM,
+ MS_MGC_VAL | MS_NOSUID | MS_NODEV | MS_NOEXEC |
+ MS_SYNCHRONOUS | (readwrite ? 0 : MS_RDONLY),
+ NULL) == 0)
+ goto mounted;
+ }
+
+ saved_errno = errno;
+ log_error("mount of system LV (%s, %s, %s) failed: %m\n",
+ system_lv_name, mount_point, SYSTEM_LV_FILESYSTEM);
+ sync_unlock(lock_name, lockid);
+ errno = saved_errno;
+ return -1;
+ }
+
+ mounted:
+/* Set the internal flags */
+ mounted = 1;
+ mounted_rw = readwrite;
+
+ return 0;
+}
+
+/* Erase *all* files in the root directory of the system LV.
+ This *MUST* be called with an appropriate lock held!
+ The LV is left mounted RW because it is assumed that the
+ caller wants to write something here after clearing some space */
+int system_lv_eraseall(void)
+{
+ DIR *dir;
+ struct dirent *ent;
+ char fname[PATH_MAX];
+
+ /* Must be mounted R/W */
+ system_lv_mount(1);
+
+ dir = opendir(mount_point);
+ if (!dir)
+ return -1;
+
+ while ((ent = readdir(dir))) {
+ struct stat st;
+ snprintf(fname, sizeof(fname), "%s/%s", mount_point,
+ ent->d_name);
+
+ if (stat(fname, &st)) {
+ if (S_ISREG(st.st_mode))
+ unlink(fname);
+ }
+ }
+ closedir(dir);
+ return 0;
+}
+
+/* This is a "high-level" routine - it mounts the system LV, writes
+ the data into a file named after this node and then umounts the LV
+ again */
+int system_lv_write_data(char *data, ssize_t len)
+{
+ struct utsname nodeinfo;
+ char fname[PATH_MAX];
+ int outfile;
+ ssize_t thiswrite;
+ ssize_t written;
+
+ if (system_lv_mount(1))
+ return -1;
+
+ /* Build the file name we are goingto use. */
+ uname(&nodeinfo);
+ snprintf(fname, sizeof(fname), "%s/%s", mount_point, nodeinfo.nodename);
+
+ /* Open the file for output */
+ outfile = open(fname, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ if (outfile < 0) {
+ int saved_errno = errno;
+ system_lv_umount();
+ errno = saved_errno;
+ return -1;
+ }
+
+ written = 0;
+ do {
+ thiswrite = write(outfile, data + written, len - written);
+ if (thiswrite > 0)
+ written += thiswrite;
+
+ } while (written < len && thiswrite > 0);
+
+ close(outfile);
+
+ system_lv_umount();
+ return (thiswrite < 0) ? -1 : 0;
+}
+
+/* This is a "high-level" routine - it mounts the system LV, reads
+ the data from a named file and then umounts the LV
+ again */
+int system_lv_read_data(char *fname_base, char *data, ssize_t *len)
+{
+ char fname[PATH_MAX];
+ int outfile;
+ struct stat st;
+ ssize_t filesize;
+ ssize_t thisread;
+ ssize_t readbytes;
+
+ if (system_lv_mount(0))
+ return -1;
+
+ /* Build the file name we are going to use. */
+ snprintf(fname, sizeof(fname), "%s/%s", mount_point, fname_base);
+
+ /* Get the file size and stuff. Actually we only need the file size but
+ this will also check that the file exists */
+ if (stat(fname, &st) < 0) {
+ int saved_errno = errno;
+
+ log_error("stat of file %s on system LV failed: %m\n", fname);
+ system_lv_umount();
+ errno = saved_errno;
+ return -1;
+ }
+ filesize = st.st_size;
+
+ outfile = open(fname, O_RDONLY);
+ if (outfile < 0) {
+ int saved_errno = errno;
+
+ log_error("open of file %s on system LV failed: %m\n", fname);
+ system_lv_umount();
+ errno = saved_errno;
+ return -1;
+ }
+
+ readbytes = 0;
+ do {
+ thisread =
+ read(outfile, data + readbytes, filesize - readbytes);
+ if (thisread > 0)
+ readbytes += thisread;
+
+ } while (readbytes < filesize && thisread > 0);
+
+ close(outfile);
+
+ system_lv_umount();
+
+ *len = readbytes;
+ return (thisread < 0) ? -1 : 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _CLVM_SYSTEM_LV_H
+#define _CLVM_SYSTEM_LV_H
+
+/* Prototypes for System-LV functions */
+
+/* "low-level" functions */
+extern int system_lv_umount(void);
+extern int system_lv_mount(int readwrite);
+extern int system_lv_eraseall(void);
+
+/* "high-level" functions */
+extern int system_lv_write_data(char *data, ssize_t len);
+extern int system_lv_read_data(char *fname_base, char *data, ssize_t *len);
+
+#endif
--- /dev/null
+/******************************************************************************
+*******************************************************************************
+**
+** Copyright (C) Sistina Software, Inc. 2002-2003 All rights reserved.
+**
+*******************************************************************************
+******************************************************************************/
+
+/* This provides the inter-clvmd communications for a system without CMAN.
+ There is a listening TCP socket which accepts new connections in the
+ normal way.
+ It can also make outgoing connnections to the other clvmd nodes.
+*/
+
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <syslog.h>
+#include <netdb.h>
+#include <assert.h>
+
+#include "ccs.h"
+#include "clvm.h"
+#include "clvmd-comms.h"
+#include "clvmd.h"
+#include "clvmd-gulm.h"
+#include "hash.h"
+
+#define DEFAULT_TCP_PORT 21064
+
+static int listen_fd = -1;
+static int tcp_port;
+struct hash_table *sock_hash;
+
+static int get_tcp_port(int default_port);
+static int get_our_ip_address(char *addr, int *family);
+static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
+ struct local_client **new_client);
+
+/* Called by init_cluster() to open up the listening socket */
+// TODO: IPv6 compat.
+int init_comms()
+{
+ struct sockaddr *addr = NULL;
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ int addr_len;
+ int family;
+ char address[MAX_CSID_LEN];
+
+ sock_hash = hash_create(100);
+ tcp_port = get_tcp_port(DEFAULT_TCP_PORT);
+
+ /* Get IP address and IP type */
+ get_our_ip_address(address, &family);
+ if (family == AF_INET)
+ {
+ memcpy(&addr4.sin_addr, addr, sizeof(struct in_addr));
+ addr = (struct sockaddr *)&addr4;
+ addr4.sin_port = htons(tcp_port);
+ addr_len = sizeof(addr4);
+ }
+ else
+ {
+ memcpy(&addr6.sin6_addr, addr, sizeof(struct in6_addr));
+ addr = (struct sockaddr *)&addr6;
+ addr6.sin6_port = htons(tcp_port);
+ addr_len = sizeof(addr6);
+ }
+
+ listen_fd = socket(family, SOCK_STREAM, 0);
+
+ if (listen_fd < 0)
+ {
+ return -1;
+ }
+ else
+ {
+ int one = 1;
+ setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
+ }
+
+ addr->sa_family = family;
+
+ if (bind(listen_fd, addr, addr_len) < 0)
+ {
+ DEBUGLOG("Can't bind to port\n");
+ syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
+ close(listen_fd);
+ return -1;
+ }
+
+ listen(listen_fd, 5);
+
+ return 0;
+}
+
+void tcp_remove_client(char *csid)
+ {
+ struct local_client *client;
+ DEBUGLOG("tcp_remove_client\n");
+
+ /* Don't actually close the socket here - that's the
+ job of clvmd.c whch will do the job when it notices the
+ other end has gone. We just need to remove the client(s) from
+ the hash table so we don't try to use it for sending any more */
+ client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+ if (client)
+ {
+ hash_remove_binary(sock_hash, csid, MAX_CSID_LEN);
+ }
+
+ /* Look for a mangled one too */
+ csid[0] ^= 0x80;
+
+ client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+ if (client)
+ {
+ hash_remove_binary(sock_hash, csid, MAX_CSID_LEN);
+ }
+
+ /* Put it back as we found it */
+ csid[0] ^= 0x80;
+}
+
+int alloc_client(int fd, char *csid, struct local_client **new_client)
+{
+ struct local_client *client;
+
+ DEBUGLOG("alloc_client %d csid = [%d.%d.%d.%d]\n", fd,csid[0],csid[1],csid[2],csid[3]);
+
+ /* Create a local_client and return it */
+ client = malloc(sizeof(struct local_client));
+ if (!client)
+ {
+ DEBUGLOG("malloc failed\n");
+ return -1;
+ }
+
+ memset(client, 0, sizeof(struct local_client));
+ client->fd = fd;
+ client->type = CLUSTER_DATA_SOCK;
+ client->callback = read_from_tcpsock;
+ if (new_client)
+ *new_client = client;
+
+ /* Add to our list of node sockets */
+ if (hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN))
+ {
+ DEBUGLOG("alloc_client mangling CSID for second connection\n");
+ /* This is a duplicate connection but we can't close it because
+ the other end may already have started sending.
+ So, we mangle the IP address and keep it, all sending will
+ go out of the main FD
+ */
+ csid[0] ^= 0x80;
+ client->bits.net.flags = 1; /* indicate mangled CSID */
+
+ /* If it still exists then kill the connection as we should only
+ ever have one incoming connection from each node */
+ if (hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN))
+ {
+ DEBUGLOG("Multiple incoming connections from node\n");
+ syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
+
+ free(client);
+ errno = ECONNREFUSED;
+ return -1;
+ }
+ }
+ hash_insert_binary(sock_hash, csid, MAX_CSID_LEN, client);
+
+ return 0;
+}
+
+int get_main_cluster_fd()
+{
+ return listen_fd;
+}
+
+
+/* Read on main comms (listen) socket, accept it */
+int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid,
+ struct local_client **new_client)
+{
+ int newfd;
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ int status;
+ char name[MAX_CLUSTER_MEMBER_NAME_LEN];
+
+ DEBUGLOG("cluster_fd_callback\n");
+ *new_client = NULL;
+ newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
+
+ DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
+ if (!newfd)
+ {
+ syslog(LOG_ERR, "error in accept: %m");
+ errno = EAGAIN;
+ return -1; /* Don't return an error or clvmd will close the listening FD */
+ }
+
+ /* Check that the client is a member of the cluster
+ and reject if not.
+ // FIXME: IPv4 specific
+ */
+ if (name_from_csid((char *)&addr.sin_addr.s_addr, name) < 0)
+ {
+ char *ip = (char *)&addr.sin_addr.s_addr;
+ syslog(LOG_ERR, "Got connect from non-cluster node %d.%d.%d.%d\n",
+ ip[0], ip[1], ip[2], ip[3]);
+ DEBUGLOG("Got connect from non-cluster node %d.%d.%d.%d\n",
+ ip[0], ip[1], ip[2], ip[3]);
+ close(newfd);
+
+ errno = EAGAIN;
+ return -1;
+ }
+
+ status = alloc_client(newfd, (char *)&addr.sin_addr.s_addr, new_client);
+ if (status)
+ {
+ DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
+ close(newfd);
+ /* See above... */
+ errno = EAGAIN;
+ return -1;
+ }
+ DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
+ return newfd;
+}
+
+
+static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
+ struct local_client **new_client)
+{
+ struct sockaddr_in addr;
+ socklen_t slen = sizeof(addr);
+ int status;
+
+ DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
+ *new_client = NULL;
+
+ /* Get "csid" */
+ getpeername(client->fd, (struct sockaddr *)&addr, &slen);
+ memcpy(csid, &addr.sin_addr.s_addr, MAX_CSID_LEN);
+
+ status = read(client->fd, buf, len);
+
+ DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
+
+ /* Remove it from the hash table if there's an error, clvmd will
+ remove the socket from its lists and free the client struct */
+ if (status == 0 ||
+ (status < 0 && errno != EAGAIN && errno != EINTR))
+ {
+ char remcsid[MAX_CSID_LEN];
+
+ memcpy(remcsid, csid, MAX_CSID_LEN);
+ close(client->fd);
+
+ /* If the csid was mangled, then make sure we remove the right entry */
+ if (client->bits.net.flags)
+ remcsid[0] ^= 0x80;
+ hash_remove_binary(sock_hash, remcsid, MAX_CSID_LEN);
+
+ /* Tell cluster manager layer */
+ add_down_node(remcsid);
+ }
+ return status;
+}
+
+static int connect_csid(char *csid, struct local_client **newclient)
+{
+ int fd;
+ struct sockaddr_in addr;
+ int status;
+
+ DEBUGLOG("Connecting socket\n");
+ fd = socket(PF_INET, SOCK_STREAM, 0);
+
+ if (fd < 0)
+ {
+ syslog(LOG_ERR, "Unable to create new socket: %m");
+ return -1;
+ }
+
+ addr.sin_family = AF_INET;
+ memcpy(&addr.sin_addr.s_addr, csid, MAX_CSID_LEN);
+ addr.sin_port = htons(tcp_port);
+
+ DEBUGLOG("Connecting socket %d\n", fd);
+ if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in)) < 0)
+ {
+ syslog(LOG_ERR, "Unable to connect to remote node: %m");
+ DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ status = alloc_client(fd, csid, newclient);
+ if (status)
+ close(fd);
+ else
+ add_client(*newclient);
+
+ /* If we can connect to it, it must be running a clvmd */
+ add_up_node(csid);
+ return status;
+}
+
+/* Send a message to a known CSID */
+static int tcp_send_message(void *buf, int msglen, unsigned char *csid, const char *errtext)
+{
+ int status;
+ struct local_client *client;
+ char ourcsid[MAX_CSID_LEN];
+
+ assert(csid);
+
+ DEBUGLOG("tcp_send_message, csid = [%d.%d.%d.%d], msglen = %d\n", csid[0],csid[1],csid[2],csid[3], msglen);
+
+ /* Don't connect to ourself */
+ get_our_csid(ourcsid);
+ if (memcmp(csid, ourcsid, MAX_CSID_LEN) == 0)
+ return msglen;
+
+ client = hash_lookup_binary(sock_hash, csid, MAX_CSID_LEN);
+ if (!client)
+ {
+ status = connect_csid(csid, &client);
+ if (status)
+ return -1;
+ }
+ DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
+
+ return write(client->fd, buf, msglen);
+}
+
+
+int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext)
+{
+ int status=0;
+
+ DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
+
+ /* If csid is NULL then send to all known (not just connected) nodes */
+ if (!csid)
+ {
+ void *context = NULL;
+ char loop_csid[MAX_CSID_LEN];
+
+ /* Loop round all gulm-known nodes */
+ while (get_next_node_csid(&context, loop_csid))
+ {
+ status = tcp_send_message(buf, msglen, loop_csid, errtext);
+ if (status == 0 ||
+ (status < 0 && (errno == EAGAIN || errno == EINTR)))
+ break;
+ }
+ }
+ else
+ {
+
+ status = tcp_send_message(buf, msglen, csid, errtext);
+ }
+ return status;
+}
+
+static int get_tcp_port(int default_port)
+{
+ int ccs_handle;
+ int port = default_port;
+ char *portstr;
+
+ ccs_handle = ccs_connect();
+ if (ccs_handle)
+ {
+ return port;
+ }
+
+ if (!ccs_get(ccs_handle, "//clvm/@port", &portstr))
+ {
+ port = atoi(portstr);
+ free(portstr);
+
+ if (port <= 0 && port >= 65536)
+ port = default_port;
+ }
+ ccs_disconnect(ccs_handle);
+
+ DEBUGLOG("Using port %d for communications\n", port);
+ return port;
+}
+
+/* To get our own IP address we get the locally bound address of the
+ socket that's talking to GULM in the assumption(eek) that it will
+ be on the "right" network in a multi-homed system */
+static int get_our_ip_address(char *addr, int *family)
+{
+ /* Use a sockaddr_in6 to make sure it's big enough */
+ struct sockaddr_in6 saddr;
+ int socklen = sizeof(saddr);
+
+ if (!getsockname(gulm_fd(), (struct sockaddr *)&saddr, &socklen))
+ {
+ if (saddr.sin6_family == AF_INET6)
+ {
+ memcpy(addr, &saddr.sin6_addr, sizeof(saddr.sin6_addr));
+ }
+ else
+ {
+ struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
+ memcpy(addr, &sin4->sin_addr, sizeof(sin4->sin_addr));
+ }
+ return 0;
+ }
+ return -1;
+}
+
+/* Public version of above for those that don't care what protocol
+ we're using */
+void get_our_csid(char *csid)
+{
+ static char our_csid[MAX_CSID_LEN];
+ static int got_csid = 0;
+
+ if (!got_csid)
+ {
+ int family;
+
+ memset(our_csid, 0, sizeof(our_csid));
+ if (get_our_ip_address(our_csid, &family))
+ {
+ got_csid = 1;
+ }
+ }
+ memcpy(csid, our_csid, MAX_CSID_LEN);
+}
+
+/* Get someone else's IP address from DNS */
+int get_ip_address(char *node, char *addr)
+{
+ struct hostent *he;
+
+ memset(addr, 0, MAX_CSID_LEN);
+
+ // TODO: what do we do about multi-homed hosts ???
+ // CCSs ip_interfaces solved this but some bugger removed it.
+
+ /* Try IPv6 first. The man page for gethostbyname implies that
+ it will lookup ip6 & ip4 names, but it seems not to */
+ he = gethostbyname2(node, AF_INET6);
+ if (!he)
+ he = gethostbyname2(node, AF_INET);
+ if (!he)
+ return -1;
+
+ /* For IPv4 address just use the lower 4 bytes */
+ memcpy(&addr, he->h_addr_list[0],
+ he->h_length);
+
+ return 0;
+}
--- /dev/null
+#include <netinet/in.h>
+
+#define MAX_CLUSTER_MESSAGE 1600
+#define MAX_CSID_LEN sizeof(struct in6_addr)
+#define MAX_CLUSTER_MEMBER_NAME_LEN 128
+
+extern int init_comms(void);
+../daemons/clvmd/clvm.h
../lib/activate/activate.h
../lib/activate/targets.h
../lib/cache/lvmcache.h
format_pool/pool_label.c
endif
+ifeq ("@CLUSTER@", "internal")
+ SOURCES += locking/cluster_locking.c
+endif
+
+ifeq ("@CLUSTER@", "shared")
+ SUBDIRS += locking
+endif
+
ifeq ("@SNAPSHOTS@", "internal")
SOURCES += snapshot/snapshot.c
endif
--- /dev/null
+#
+# Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
+# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+SOURCES = cluster_locking.c
+
+LIB_SHARED = liblvm2clusterlock.so
+
+include $(top_srcdir)/make.tmpl
+
+.PHONY: install
+
+install: liblvm2clusterlock.so
+ $(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
+ $(libdir)/liblvm2clusterlock.so.$(LIB_VERSION)
+ $(LN_S) -f liblvm2clusterlock.so.$(LIB_VERSION) \
+ $(libdir)/liblvm2clusterlock.so
+
--- /dev/null
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Locking functions for LVM.
+ * The main purpose of this part of the library is to serialise LVM
+ * management operations across a cluster.
+ */
+
+#include "lib.h"
+#include "clvm.h"
+#include "lvm-string.h"
+#include "locking.h"
+#include "locking_types.h"
+
+#include <stddef.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#ifndef CLUSTER_LOCKING_INTERNAL
+int lock_resource(struct cmd_context *cmd, const char *resource, int flags);
+void locking_end(void);
+int locking_init(int type, struct config_tree *cf, uint32_t *flags);
+#endif
+
+typedef struct lvm_response {
+ char node[255];
+ char *response;
+ int status;
+ int len;
+} lvm_response_t;
+
+/*
+ * This gets stuck at the start of memory we allocate so we
+ * can sanity-check it at deallocation time
+ */
+#define LVM_SIGNATURE 0x434C564D
+
+/*
+ * NOTE: the LVMD uses the socket FD as the client ID, this means
+ * that any client that calls fork() will inherit the context of
+ * it's parent.
+ */
+static int _clvmd_sock = -1;
+
+/* FIXME Install SIGPIPE handler? */
+
+/* Open connection to the Cluster Manager daemon */
+static int _open_local_sock(void)
+{
+ int local_socket;
+ struct sockaddr_un sockaddr;
+
+ /* Open local socket */
+ if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
+ log_error("Local socket creation failed: %s", strerror(errno));
+ return -1;
+ }
+
+ memset(&sockaddr, 0, sizeof(sockaddr));
+ memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
+
+ sockaddr.sun_family = AF_UNIX;
+
+ if (connect(local_socket,(struct sockaddr *) &sockaddr,
+ sizeof(sockaddr))) {
+ int saved_errno = errno;
+
+ log_error("connect() failed on local socket: %s",
+ strerror(errno));
+ if (close(local_socket))
+ stack;
+
+ errno = saved_errno;
+ return -1;
+ }
+
+ return local_socket;
+}
+
+/* Send a request and return the status */
+static int _send_request(char *inbuf, int inlen, char **retbuf)
+{
+ char outbuf[PIPE_BUF];
+ struct clvm_header *outheader = (struct clvm_header *) outbuf;
+ int len;
+ int off;
+ int buflen;
+ int err;
+
+ /* Send it to CLVMD */
+ rewrite:
+ if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
+ if (err == -1 && errno == EINTR)
+ goto rewrite;
+ log_error("Error writing data to clvmd: %s", strerror(errno));
+ return 0;
+ }
+
+ /* Get the response */
+ reread:
+ if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
+ if (errno == EINTR)
+ goto reread;
+ log_error("Error reading data from clvmd: %s", strerror(errno));
+ return 0;
+ }
+
+ if (len == 0) {
+ log_error("EOF reading CLVMD");
+ errno = ENOTCONN;
+ return 0;
+ }
+
+ /* Allocate buffer */
+ buflen = len + outheader->arglen;
+ *retbuf = dbg_malloc(buflen);
+ if (!*retbuf) {
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* Copy the header */
+ memcpy(*retbuf, outbuf, len);
+ outheader = (struct clvm_header *) *retbuf;
+
+ /* Read the returned values */
+ off = 1; /* we've already read the first byte */
+
+ while (off < outheader->arglen && len > 0) {
+ len = read(_clvmd_sock, outheader->args + off,
+ buflen - off - offsetof(struct clvm_header, args));
+ if (len > 0)
+ off += len;
+ }
+
+ /* Was it an error ? */
+ if (outheader->status < 0) {
+ errno = -outheader->status;
+ log_error("cluster send request failed: %s", strerror(errno));
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Build the structure header and parse-out wildcard node names */
+static void _build_header(struct clvm_header *head, int cmd, const char *node,
+ int len)
+{
+ head->cmd = cmd;
+ head->status = 0;
+ head->flags = 0;
+ head->clientid = 0;
+ head->arglen = len;
+
+ if (node) {
+ /*
+ * Allow a couple of special node names:
+ * "*" for all nodes,
+ * "." for the local node only
+ */
+ if (strcmp(node, "*") == 0) {
+ head->node[0] = '\0';
+ } else if (strcmp(node, ".") == 0) {
+ head->node[0] = '\0';
+ head->flags = CLVMD_FLAG_LOCAL;
+ } else
+ strcpy(head->node, node);
+ } else
+ head->node[0] = '\0';
+}
+
+/*
+ * Send a message to a(or all) node(s) in the cluster and wait for replies
+ */
+static int _cluster_request(char cmd, const char *node, void *data, int len,
+ lvm_response_t ** response, int *num)
+{
+ char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
+ int *outptr;
+ char *inptr;
+ char *retbuf = NULL;
+ int status;
+ int i;
+ int num_responses = 0;
+ struct clvm_header *head = (struct clvm_header *) outbuf;
+ lvm_response_t *rarray;
+
+ *num = 0;
+
+ if (_clvmd_sock == -1)
+ _clvmd_sock = _open_local_sock();
+
+ if (_clvmd_sock == -1)
+ return 0;
+
+ _build_header(head, cmd, node, len);
+ memcpy(head->node + strlen(head->node) + 1, data, len);
+
+ status = _send_request(outbuf, sizeof(struct clvm_header) +
+ strlen(head->node) + len, &retbuf);
+ if (!status)
+ goto out;
+
+ /* Count the number of responses we got */
+ head = (struct clvm_header *) retbuf;
+ inptr = head->args;
+ while (inptr[0]) {
+ num_responses++;
+ inptr += strlen(inptr) + 1;
+ inptr += sizeof(int);
+ inptr += strlen(inptr) + 1;
+ }
+
+ /*
+ * Allocate response array.
+ * With an extra pair of INTs on the front to sanity
+ * check the pointer when we are given it back to free
+ */
+ outptr = dbg_malloc(sizeof(lvm_response_t) * num_responses +
+ sizeof(int) * 2);
+ if (!outptr) {
+ errno = ENOMEM;
+ status = 0;
+ goto out;
+ }
+
+ *response = (lvm_response_t *) (outptr + 2);
+ outptr[0] = LVM_SIGNATURE;
+ outptr[1] = num_responses;
+ rarray = *response;
+
+ /* Unpack the response into an lvm_response_t array */
+ inptr = head->args;
+ i = 0;
+ while (inptr[0]) {
+ strcpy(rarray[i].node, inptr);
+ inptr += strlen(inptr) + 1;
+
+ rarray[i].status = *(int *) inptr;
+ inptr += sizeof(int);
+
+ rarray[i].response = dbg_malloc(strlen(inptr) + 1);
+ if (rarray[i].response == NULL) {
+ /* Free up everything else and return error */
+ int j;
+ for (j = 0; j < i; j++)
+ dbg_free(rarray[i].response);
+ free(outptr);
+ errno = ENOMEM;
+ status = -1;
+ goto out;
+ }
+
+ strcpy(rarray[i].response, inptr);
+ rarray[i].len = strlen(inptr);
+ inptr += strlen(inptr) + 1;
+ i++;
+ }
+ *num = num_responses;
+ *response = rarray;
+
+ out:
+ if (retbuf)
+ dbg_free(retbuf);
+
+ return status;
+}
+
+/* Free reply array */
+static int _cluster_free_request(lvm_response_t * response)
+{
+ int *ptr = (int *) response - 2;
+ int i;
+ int num;
+
+ /* Check it's ours to free */
+ if (response == NULL || *ptr != LVM_SIGNATURE) {
+ errno = EINVAL;
+ return 0;
+ }
+
+ num = ptr[1];
+
+ for (i = 0; i < num; i++) {
+ dbg_free(response[i].response);
+ }
+
+ dbg_free(ptr);
+
+ return 1;
+}
+
+static int _lock_for_cluster(unsigned char cmd, unsigned int flags, char *name)
+{
+ int status;
+ int i;
+ char *args;
+ const char *node = "";
+ int len;
+ int saved_errno = errno;
+ lvm_response_t *response = NULL;
+ int num_responses;
+
+ assert(name);
+
+ len = strlen(name) + 3;
+ args = alloca(len);
+ strcpy(args + 2, name);
+
+ args[0] = flags & 0xBF; /* Maskoff LOCAL flag */
+ args[1] = 0; /* Not used now */
+
+ /*
+ * VG locks are just that: locks, and have no side effects
+ * so we only need to do them on the local node because all
+ * locks are cluster-wide.
+ * Also, if the lock is exclusive it makes no sense to try to
+ * acquire it on all nodes, so just do that on the local node too.
+ */
+ if (cmd == CLVMD_CMD_LOCK_VG ||
+ (flags & LCK_TYPE_MASK) == LCK_EXCL ||
+ (flags & LCK_LOCAL))
+ node = ".";
+
+ status = _cluster_request(cmd, node, args, len,
+ &response, &num_responses);
+
+ /* If any nodes were down then display them and return an error */
+ for (i = 0; i < num_responses; i++) {
+ if (response[i].status == -EHOSTDOWN) {
+ log_error("clvmd not running on node %s",
+ response[i].node);
+ status = 0;
+ } else if (response[i].status) {
+ log_error("Error locking on node %s: %s",
+ response[i].node,
+ response[i].response[0] ?
+ response[i].response :
+ strerror(response[i].status));
+ status = 0;
+ }
+ }
+
+ saved_errno = errno;
+ _cluster_free_request(response);
+ errno = saved_errno;
+
+ return status;
+}
+
+/* API entry point for LVM */
+#ifdef CLUSTER_LOCKING_INTERNAL
+static int _lock_resource(struct cmd_context *cmd, const char *resource,
+ int flags)
+#else
+int lock_resource(struct cmd_context *cmd, const char *resource, int flags)
+#endif
+{
+ char lockname[PATH_MAX];
+ int cluster_cmd = 0;
+
+ assert(strlen(resource) < sizeof(lockname));
+
+ switch (flags & LCK_SCOPE_MASK) {
+ case LCK_VG:
+ /* If the VG name is empty then lock the unused PVs */
+ if (!resource || !*resource)
+ lvm_snprintf(lockname, sizeof(lockname), "P_orphans");
+ else
+ lvm_snprintf(lockname, sizeof(lockname), "V_%s",
+ resource);
+
+ cluster_cmd = CLVMD_CMD_LOCK_VG;
+ flags &= LCK_TYPE_MASK;
+ break;
+
+ case LCK_LV:
+ cluster_cmd = CLVMD_CMD_LOCK_LV;
+ strcpy(lockname, resource);
+ flags &= 0xffdf; /* Mask off HOLD flag */
+ break;
+
+ default:
+ log_error("Unrecognised lock scope: %d",
+ flags & LCK_SCOPE_MASK);
+ return 0;
+ }
+
+ /* Send a message to the cluster manager */
+ log_very_verbose("Locking %s at 0x%x", lockname, flags);
+
+ return _lock_for_cluster(cluster_cmd, flags, lockname);
+}
+
+#ifdef CLUSTER_LOCKING_INTERNAL
+static void _locking_end(void)
+#else
+void locking_end(void)
+#endif
+{
+ if (_clvmd_sock != -1 && close(_clvmd_sock))
+ stack;
+
+ _clvmd_sock = -1;
+}
+
+#ifdef CLUSTER_LOCKING_INTERNAL
+static void _reset_locking(void)
+#else
+void reset_locking(void)
+#endif
+{
+ if (close(_clvmd_sock))
+ stack;
+
+ _clvmd_sock = _open_local_sock();
+ if (_clvmd_sock == -1)
+ stack;
+}
+
+#ifdef CLUSTER_LOCKING_INTERNAL
+int init_cluster_locking(struct locking_type *locking, struct config_tree *cft)
+{
+ locking->lock_resource = _lock_resource;
+ locking->fin_locking = _locking_end;
+ locking->reset_locking = _reset_locking;
+ locking->flags = LCK_PRE_MEMLOCK;
+
+ _clvmd_sock = _open_local_sock();
+ if (_clvmd_sock == -1)
+ return 0;
+
+ return 1;
+}
+#else
+int locking_init(int type, struct config_tree *cf, uint32_t *flags)
+{
+ _clvmd_sock = _open_local_sock();
+ if (_clvmd_sock == -1)
+ return 0;
+
+ /* Ask LVM to lock memory before calling us */
+ *flags |= LCK_PRE_MEMLOCK;
+
+ return 1;
+}
+#endif
return 1;
#endif
+#ifdef CLUSTER_LOCKING_INTERNAL
+ case 3:
+ if (!init_cluster_locking(&_locking, cft))
+ break;
+ log_very_verbose("Cluster locking enabled.");
+ return 1;
+#endif
+
default:
log_error("Unknown locking type requested.");
return 0;
int init_file_locking(struct locking_type *locking, struct config_tree *cf);
int init_external_locking(struct locking_type *locking, struct config_tree *cf);
+int init_cluster_locking(struct locking_type *locking, struct config_tree *cf);
--- /dev/null
+#!/bin/sh
+#
+# Edit an lvm.conf file to enable cluster locking.
+#
+# $1 is the directory where the locking library is installed.
+# $2 (optional) is the config file
+# $3 (optional) is the locking library name
+#
+#
+PREFIX=$1
+LVMCONF=$2
+LIB=$3
+
+if [ -z "$PREFIX" ]
+then
+ echo "usage: $0 <prefix> [<config file>] [<library>]"
+ echo ""
+ echo "<prefix> location of the cluster locking shared library. (no default)"
+ echo "<config file> name of the LVM config file (default: /etc/lvm/lvm.conf)"
+ echo "<library> name of the shared library (default: liblvm2clusterlock.so)"
+ echo ""
+ exit 0
+fi
+
+[ -z "$LVMCONF" ] && LVMCONF="/etc/lvm/lvm.conf"
+[ -z "$LIB" ] && LIB="liblvm2clusterlock.so"
+
+if [ "${PREFIX:0:1}" != "/" ]
+then
+ echo "Prefix must be an absolute path name (starting with a /)"
+ exit 12
+fi
+
+if [ ! -f "$LVMCONF" ]
+then
+ echo "$LVMCONF does not exist"
+ exit 10
+fi
+
+if [ ! -f "$PREFIX/$LIB" ]
+then
+ echo "$PREFIX/$LIB does not exist, did you do a \"make install\" ?"
+ exit 11
+fi
+
+
+SCRIPTFILE=`mktemp -t lvmscript.XXXXXXXXXX`
+TMPFILE=`mktemp -t lvmtmp.XXXXXXXXXX`
+
+
+# Flags so we know which parts of the file we can replace and which need
+# adding. These are return codes from grep, so zero means it IS present!
+have_type=1
+have_dir=1
+have_library=1
+have_global=1
+
+grep -q '^[[:blank:]]*locking_type[[:blank:]]*=' $LVMCONF
+have_type=$?
+
+grep -q '^[[:blank:]]*library_dir[[:blank:]]*=' $LVMCONF
+have_dir=$?
+
+grep -q '^[[:blank:]]*locking_library[[:blank:]]*=' $LVMCONF
+have_library=$?
+
+# Those options are in section "global {" so we must have one if any are present.
+if [ "$have_type" = "0" -o "$have_dir" = "0" -o "$have_library" = "0" ]
+then
+
+ # See if we can find it...
+ grep -q '^[[:blank:]]*global[[:blank:]]*{' $LVMCONF
+ have_global=$?
+
+ if [ "$have_global" = "1" ]
+ then
+ echo "global keys but no 'global {' found, can't edit file"
+ exit 12
+ fi
+fi
+
+# So if we don't have "global {" we need to create one and
+# populate it
+
+if [ "$have_global" = "1" ]
+then
+ cat $LVMCONF - <<EOF > $TMPFILE
+global {
+ # Enable locking for cluster LVM
+ locking_type = 2
+ library_dir = "$PREFIX"
+ locking_library = "$LIB"
+}
+EOF
+ if [ $? != 0 ]
+ then
+ echo "failed to create temporary config file, $LVMCONF not updated"
+ exit 1
+ fi
+else
+ #
+ # We have a "global {" section, so add or replace the
+ # locking entries as appropriate
+ #
+
+ if [ "$have_type" = "0" ]
+ then
+ SEDCMD=" s/^[[:blank:]]*locking_type[[:blank:]]*=.*/\ \ \ \ locking_type = 2/g"
+ else
+ SEDCMD=" /global[[:blank:]]*{/a\ \ \ \ locking_type = 2"
+ fi
+
+ if [ "$have_dir" = "0" ]
+ then
+ SEDCMD="${SEDCMD}\ns'^[[:blank:]]*library_dir[[:blank:]]*=.*'\ \ \ \ library_dir = \"$PREFIX\"'g"
+ else
+ SEDCMD="${SEDCMD}\n/global[[:blank:]]*{/a\ \ \ \ library_dir = \"$PREFIX\""
+ fi
+
+ if [ "$have_library" = "0" ]
+ then
+ SEDCMD="${SEDCMD}\ns/^[[:blank:]]*locking_library[[:blank:]]*=.*/\ \ \ \ locking_library = \"$LIB\"/g"
+ else
+ SEDCMD="${SEDCMD}\n/global[[:blank:]]*{/a\ \ \ \ locking_library = \"$LIB\""
+ fi
+
+ echo -e $SEDCMD > $SCRIPTFILE
+ sed <$LVMCONF >$TMPFILE -f $SCRIPTFILE
+ if [ $? != 0 ]
+ then
+ echo "sed failed, $LVMCONF not updated"
+ exit 1
+ fi
+fi
+
+# Now we have a suitably editted config file in a temp place,
+# backup the original and copy our new one into place.
+
+cp $LVMCONF $LVMCONF.nocluster
+if [ $? != 0 ]
+ then
+ echo "failed to backup old config file, $LVMCONF not updated"
+ exit 2
+fi
+
+cp $TMPFILE $LVMCONF
+if [ $? != 0 ]
+ then
+ echo "failed to copy new config file into place, check $LVMCONF is still OK"
+ exit 3
+fi
+
+rm -f $SCRIPTFILE $TMPFILE
+
--- /dev/null
+#!/bin/bash
+#
+# /etc/rc.d/init.d/clvmd
+#
+# Starts the clvm daemon
+# NOTE: These startup levels may not be right yet - it depends on where
+# the rest of the cluster startup goes.
+#
+# chkconfig: 345 72 5
+# description: distributes LVM commands in a clustered environment. \
+# a clvmd must be run on all nodes in a cluster for clustered LVM \
+# operations to work.
+# processname: clvmd
+
+# Source function library.
+. /etc/init.d/functions
+
+BINARY=/usr/sbin/clvmd
+LOCKFILE=/var/lock/subsys/clvmd
+
+test -x "$BINARY" || exit 0
+
+RETVAL=0
+
+#
+# See how we were called.
+#
+
+prog="clvmd"
+
+start() {
+ # Check if clvmd is already running
+ if [ ! -f "$LOCKFILE" ]; then
+ echo -n $"Starting $prog: "
+ daemon $BINARY
+ RETVAL=$?
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ echo
+ fi
+ return $RETVAL
+}
+
+stop() {
+ echo -n $"Stopping $prog: "
+ killproc $BINARY
+ RETVAL=$?
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ echo
+ return $RETVAL
+}
+
+
+restart() {
+ stop
+ start
+}
+
+reload() {
+ restart
+}
+
+status_clvm() {
+ status $BINARY
+}
+
+case "$1" in
+start)
+ start
+ ;;
+stop)
+ stop
+ ;;
+reload|restart)
+ restart
+ ;;
+condrestart)
+ if [ -f $LOCKFILE ]; then
+ restart
+ fi
+ ;;
+status)
+ status_clvm
+ ;;
+*)
+ echo $"Usage: $0 {start|stop|restart|condrestart|status}"
+ exit 1
+esac
+
+exit $?
+exit $RETVAL