cluster: RHEL5 - [rgmanager] Add intelligent "follow-service" logic script
Lon Hohberger
lon@fedoraproject.org
Mon Jan 12 14:49:00 GMT 2009
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=f397e2e210651446205c319c6b60a051f55fa415
Commit: f397e2e210651446205c319c6b60a051f55fa415
Parent: 705b1fc5472c69e09738149f4d95c9875971d76c
Author: Mark Hlawatschek <hlawatschek@atix.de>
AuthorDate: Mon Jan 12 09:39:32 2009 -0500
Committer: Lon Hohberger <lhh@redhat.com>
CommitterDate: Mon Jan 12 09:39:32 2009 -0500
[rgmanager] Add intelligent "follow-service" logic script
---
rgmanager/src/resources/follow-service.sl | 151 +++++++++++++++++++++++++++++
1 files changed, 151 insertions(+), 0 deletions(-)
diff --git a/rgmanager/src/resources/follow-service.sl b/rgmanager/src/resources/follow-service.sl
new file mode 100644
index 0000000..639fb7c
--- /dev/null
+++ b/rgmanager/src/resources/follow-service.sl
@@ -0,0 +1,151 @@
+% follow-service.sl
+%
+% Description: Implements the "follow service" mechanism based on the Red Hat RIND event
+% scripting mechanism.
+%
+% Author: Marc Grimme, Mark Hlawatschek, October 2008
+% Support: support@atix.de
+% License: GNU General Public License (GPL)
+% Copyright: (c) 2008 ATIX AG
+
+
+debug("*** follow-service.sl");
+
+
+%
+% Returns a list of nodes for the given service that are online and in the failoverdomain.
+%
+define nodelist_online(service_name) {
+ variable nodes, nofailback, restricted, ordered, node_list;
+ nodes=nodes_online();
+
+ (nofailback, restricted, ordered, node_list) = service_domain_info(service_name);
+
+ return intersection(nodes, node_list);
+}
+
+%
+% Idea:
+% General purpose function of a construct when Service(svc1) and Service(svc2)
+% should not be running on the same node even after failover.
+% There are to options to influence the behaviour. If both services have to be
+% running on the same node (only one node is left in the failovergroup) what
+% service is the master and should both services be running or only the master
+% service survives. If master is not svc1 or svc2 both service might run on the
+% same node. If master is either svc1 or svc2 the specified one will be the
+% surviving service.
+% If followslave is not 0 the svc1 always follows svc2. That means it will be
+% started on on the same node as svc1. And if available svc2 will be relocated
+% to any other node.
+%
+define follow_service(svc1, svc2, master) %, followslave)
+{
+ variable state, owner_svc1, owner_svc2;
+ variable nodes1, nodes2, allowed;
+
+ debug("*** FOLLOW_SERVICE: follow_service(",svc1,", ",svc2,", ", master, ")");
+ debug("*** FOLLOW_SERVICE: event_type: ", event_type, ", service_name: ", service_name, ", service_state: ", service_state);
+
+ %
+ % setup the master
+ %
+ if ((master != svc1) and (master != svc2)) {
+ debug("*** FOLLOW_SERVICE: master=NULL");
+ master=NULL;
+ }
+
+ % get infos we need to decide further
+ (owner_svc1, state) = service_status(svc1);
+ (owner_svc2, state) = service_status(svc2);
+ nodes1 = nodelist_online(svc1);
+ nodes2 = nodelist_online(svc2);
+ debug("*** FOLLOW_SERVICE: service_status(",svc1,"): ", service_status(svc1));
+ debug("*** FOLLOW_SERVICE: owner_svc1: ", owner_svc1, ", owner_svc2: ", owner_svc2, ", nodes1: ", nodes1, ", nodes2: ", nodes2);
+
+ if (((event_type == EVENT_NODE) and (owner_svc1 == node_id) and (node_state == NODE_OFFLINE) and (owner_svc2 >=0)) or
+ ((event_type == EVENT_SERVICE) and (service_name == svc1) and (service_state == "recovering" ) and (owner_svc2 >= 0))) {
+ %
+ % uh oh, the owner of the master server died. Restart it
+ % on the node running the slave server or if we should not
+ % follow the slave start it somewhere else.
+ % We should end up here if svc1 has to be restarted
+
+ %
+ % If this was a service event, don't execute the default event
+ % script trigger after this script completes.
+ %
+ if (event_type == EVENT_SERVICE) {
+ stop_processing();
+ }
+ % were to start svc2
+ allowed=subtract(nodes2, owner_svc2);
+ if (length(allowed) > 1) {
+ allowed=subtract(allowed, service_last_owner);
+ }
+ debug("*** FOLLOW SERVICE: service event triggered following svc2 to ",owner_svc2, " svc2 on : ",allowed);
+
+ % either svc1 is the master or there are node were to start svc2
+ if ((master == svc1) or (length(allowed) > 0)) {
+ ()=service_start(svc1, owner_svc2);
+ }
+ % either svc2 is the master or there are node were to start svc2
+ if ((master == svc2) or (length(allowed) > 0)) {
+ ()=service_stop(svc2);
+ ()=service_start(svc2, allowed);
+ }
+ }
+ else if (((event_type == EVENT_NODE) and (owner_svc2 == node_id) and (node_state == NODE_OFFLINE) and (owner_svc2 >=0)) or
+ ((event_type == EVENT_SERVICE) and (service_name == svc2) and (service_state == "recovering" ) and (owner_svc1 >= 0))) {
+ %
+ % uh oh, the owner of the svc2 died. Restart it
+ % on any other node but not the one running the svc1.
+ % If svc1 is the only one left only start it there
+ % if master==svc2
+ %
+ % Just relocate svc2 or if svc2 is master stop svc1 and start svc2 on owner_svc1
+
+ %
+ % If this was a service event, don't execute the default event
+ % script trigger after this script completes.
+ %
+
+ if (event_type == EVENT_SERVICE) {
+ stop_processing();
+ }
+
+ allowed=subtract(nodes2, owner_svc1);
+ if (length(allowed) > 1) {
+ allowed=subtract(allowed, service_last_owner);
+ }
+
+ debug("*** FOLLOW SERVICE: service event triggered relocating svc2 to ",allowed, " svc1 on : ",owner_svc1);
+
+ if (length(allowed) > 0) {
+ ()=service_stop(svc2);
+ ()=service_start(svc2, allowed);
+ } else if (master == svc2) {
+ ()=service_stop(svc1);
+ ()=service_start(svc2, owner_svc1);
+ }
+ }
+ else if (((event_type == EVENT_SERVICE) and (service_state == "started") and (owner_svc2 == owner_svc1) and (owner_svc1 > 0) and (owner_svc2 > 0)) or
+ ((event_type == EVENT_CONFIG) and (owner_svc2 == owner_svc1))) {
+ allowed=subtract(nodes2, owner_svc1);
+ debug("*** FOLLOW SERVICE: service event both running on same node triggered.", allowed);
+ if (length(allowed) > 0) {
+ %()=service_stop(svc1);
+ %()=service_start(svc1, owner_svc2);
+ ()=service_stop(svc2);
+ ()=service_start(svc2, allowed);
+ } else if ((master == svc2) and (owner_svc2 > 0)){
+ debug("*** FOLLOW SERVICE: will stop service .", svc1);
+ ()=service_stop(svc1);
+ } else if ((master == svc1) and (owner_svc1 > 0)) {
+ debug("*** FOLLOW SERVICE: will stop service .", svc2);
+ ()=service_stop(svc2);
+ } else {
+ debug("*** FOLLOW SERVICE: both services running on the same node or only one is running.", allowed, ", ", master);
+ }
+ }
+ return;
+}
More information about the Cluster-cvs
mailing list