[[http://tmade.de|Home tmade.de]] [[http://wiki.tmade.de|Home Wiki]] ===== Pacemaker ===== See also: http://clusterlabs.org/doc/en-US/Pacemaker/1.1/html/Pacemaker_Explained/ ==== Installation ==== Required packages (for SLES 11 SP2): libpacemaker-devel libpacemaker3 pacemaker pacemaker-mgmt pacemaker-mgmt-client pacemaker-mgmt-devel xorg-x11-fonts xorg-x11-fonts-core xorg-x11-libXau xorg-x11-libXau-32bit xorg-x11-xauth Installation for Ubuntu Server 12.04, 16.04, 18.04: apt-get install pacemaker Installation for SLES12: zypper in pacemaker After pacemaker-installation: corosync-keygen #Create "authkey" (has to be executed in local console!!) /etc/corosync/authkey #Copy "authkey" to other nodes ==== Useful commands ==== The following commands can be used to configure and manage pacemaker. crm configure show #Show resource configuration crm resource list #Show configured resources crm_verify -L #Verify resource configuration crm_mon –i 2 #Show resource configuration with update (interval of 2 seconds) crm_mon -r -1 #Show resource configuration crm_mon -1 #Show all resources (one shot) crm_mon -1nfV crm configure property no-quorum-policy=ignore #Ignore quorum crm configure property stonith-enabled=false #Deactivate stonith (shoot the other node in the head) crm configure rsc_defaults resource-stickiness=100 #Set default-quantifier to "100" crm_resource -D -r Apache2 -t primitive #delete resource "Apache2" crm resource cleanup Apache2 #Clean resource "Apache2" (orphaned or with errors) crm configure delete Apache #Delete resource "Apache". Note: Primitive resources has to been deleted _before_ the group which contains the resource! crm resource migrate Apache2 node2.site #Migrate resource "Apache2" to node2 crm resource unmigrate Apache2 #Unmigrate resource "Apache2" (set "weight" to default) crm resource stop Apache2 #Stopping resource "Apache2" crm resource start Apache2 #Starting resource "Apache2" cibadmin -E --force #Delete complete configuration. Before resources will be deleted, it´s recommended to stop them before ("crm resource stop resource_name"), to avoid deleting "orphaned resources"! cibadmin -U -x /var/lib/heartbeat/crm/Apache.xml #Add/ update complete configuration crm configure primitive failover-ip ocf:heartbeat:IPaddr params ip=192.168.1.10 op monitor interval=10s crm_node -R nodex #Remove node "nodex" crm_node -p #Show nodes in cluster crm node status #show attributes crm node list #show status crm node maintenance NODENAME #set maintenance crm node ready NODENAME #remove from maintenance crm node online NODENAME #Set the standby node crmadmin -D #Show current DC crm ra list stonith crm ra list lsb crm_simulate -sL #show live scores Configure Cluster: crm configure load update crm-bs.txt #load properties Example crm-bs.txt:


# enter the following to crm-bs.txt
property $id="cib-bootstrap-options" \
no-quorum-policy="ignore" \
stonith-enabled="true" \
stonith-action="reboot" \
stonith-timeout="150s"
rsc_defaults $id="rsc-options" \
resource-stickiness="1000" \
migration-threshold="5000"
op_defaults $id="op-options" \
timeout="600"

Delete cluster property: crm configure crm(live/a4t181)configure# delete cib-bootstrap-options Configure with "crm configure":


crm configure property stonith-enabled=false no-quorum-policy=ignore cluster-infrastructure=corosync have-watchdog=false cluster-name=lbcluster
crm configure primitive vIP1-0 ocf:heartbeat:IPaddr2 \
params ip="10.251.49.50" nic="eth0" cidr_netmask="25" \
op monitor interval="10s" timeout="20s"
crm configure primitive ping-gateway ocf:pacemaker:ping \
        meta target-role="Started" \
        op monitor interval="10" timeout="60" \
		params host_list="10.251.49.1" multiplier="1000" timeout="20"
crm configure clone pingclone ping-gateway \
        meta target-role="Started"	
crm configure location vIP1-0_cons vIP1-0 200: myhostname
crm configure rsc_defaults rsc-options: \
        resource-stickiness=1000 \
        migration-threshold=5000
crm configure op_defaults op-options: \
        timeout=600

Restart: systemctl stop pacemaker.service && systemctl stop corosync.service systemctl stop pacemaker.service && systemctl restart corosync.service && systemctl start pacemaker.service Change parameter: cibadmin -Q | grep ip Output: To change e.g. the IP execute: cibadmin -M --crm_xml '' cibadmin -M --xml-text '' ==== Stonith ==== See the list of stonith devices: stonith -L ====Links==== https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html/Pacemaker_Explained/s-cluster-options.html ==== Configuration ==== Path: /usr/lib/ocf/resource.d/heartbeat/ #OCF-Init-Scripts /etc/corosync/corosync.conf #Configuration /var/lib/heartbeat/crm/cib.xml #"Last" updated resource configuration file on SLES 11/ Ubuntu Server 12.04 /var/lib/pacemaker/cib/cib.xml #"Last" updated resource configuration file on Ubuntu Server 14.04, SLES 12 /etc/init.d/openais #Service init-script to start/stop pacemaker on SLES 11 /etc/init.d/corosync #Service init-script to start/stop pacemaker on Ubuntu Server 12.04 (previously edit "/etc/default/corosync": # start corosync at boot [yes|no] -> START=yes) ====SLES11==== Example "/etc/corosync/corosync.conf" (SLES 11):


aisexec {
	group:	root
	user:	root
}
service {
	use_mgmtd:	no
	ver:	0
	name:	pacemaker
}
totem {
	rrp_mode:	passive
	token_retransmits_before_loss_const:	10
	join:	60
	max_messages:	20
	vsftype:	none
	token:	5000
	consensus:	6000
	secauth:	on
	version:	2
	threads:	0
	transport:	udpu
	interface {
		#Pacemaker nodes:
                member {
                        #Load-Balancer1
                        #memberaddr:    10.0.0.193
                        memberaddr:     lb_node1                       #DNS for hostname has to be configured
                }
                member {
                        #Load-Balancer2
                        #memberaddr:    10.0.0.194
                        memberaddr:     lb_node2                       #DNS for hostname has to be configured
                }
                #Network:
		bindnetaddr:	10.0.0.0
		mcastport:	5405
		ringnumber:	0
	}
	clear_node_high_bit:	yes
	stonith-enabled=true
}
logging {
	to_logfile:	yes
	to_stderr:	no
	to_syslog:	yes
	debug:	off
	timestamp:	on
	logfile:	/var/log/cluster/corosync.log
	fileline:	off
	syslog_facility:	daemon
}
amf {
	mode:	disable
}

====SLES12==== Example "/etc/corosync/corosync.conf" (SLES 12):


# Please read the corosync.conf.5 manual page
totem {
        version: 2

        crypto_cipher: none
        crypto_hash: none

        clear_node_high_bit: yes

        interface {
                ringnumber: 0
                bindnetaddr: 192.168.150.0
                mcastport: 5405
                ttl: 1
        }
        transport: udpu
}

logging {
        fileline: off
        #to_logfile: no
        to_syslog: yes
        #logfile: /var/log/cluster/corosync.log
        debug: off
        timestamp: on
        logger_subsys {
                subsys: QUORUM
                debug: off
        }
}

nodelist {
        node {
                ring0_addr: 192.168.150.12
                name: testnode1
                nodeid: 1
        }

        node {
                ring0_addr: 192.168.150.13
                name: testnode1
                nodeid: 2
        }
}

quorum {
        # Enable and configure quorum subsystem (default: off)
        # see also corosync.conf.5 and votequorum.5
        provider: corosync_votequorum
        expected_votes: 2
}

====Ubuntu 12.04==== Example "/etc/corosync/corosync.conf" (Ubuntu Server 12.4):


# Please read the openais.conf.5 manual page

totem {
        version: 2

        # How long before declaring a token lost (ms)
        token: 3000

        # How many token retransmits before forming a new configuration
        token_retransmits_before_loss_const: 10

        # How long to wait for join messages in the membership protocol (ms)
        join: 60

        # How long to wait for consensus to be achieved before starting a new round of membership configuration (ms)
        consensus: 3600

        # Turn off the virtual synchrony filter
        vsftype: none

        # Number of messages that may be sent by one processor on receipt of the token
        max_messages: 20

        # Limit generated nodeids to 31-bits (positive signed integers)
        clear_node_high_bit: yes

        # Disable encryption
        #secauth: off
        secauth: on

        # How many threads to use for encryption/decryption
        threads: 0

        # Optionally assign a fixed node id (integer)
        # nodeid: 1234

        # This specifies the mode of redundant ring, which may be none, active, or passive.
        rrp_mode: none

#       interface {
#               # The following values need to be set based on your environment
#               ringnumber: 0
                #bindnetaddr: 127.0.0.1
#               bindnetaddr: 192.168.0.203
#               mcastaddr: 226.94.1.1
#               mcastport: 5405
#       }
        interface {
                member {
                        memberaddr: 192.168.0.201
                }
                member {
                        memberaddr: 192.168.0.203
                }

                ringnumber: 0
                bindnetaddr: 192.168.0.0
                mcastport: 5405
                ttl: 1
        }
        transport: udpu


}

amf {
        mode: disabled
}

service {
        # Load the Pacemaker Cluster Resource Manager
        ver:       0
        name:      pacemaker
}

aisexec {
        user:   root
        group:  root
}

logging {
        fileline: off
        to_stderr: yes
        to_logfile: no
        to_syslog: yes
        syslog_facility: daemon
        debug: off
        timestamp: on
        logger_subsys {
                subsys: AMF
                debug: off
                tags: enter|leave|trace1|trace2|trace3|trace4|trace6
        }
}

===Ubuntu 16.04 ====


totem {
  version: 2
  cluster_name: lbcluster
  transport: udpu
  interface {
    ringnumber: 0
    bindnetaddr: 192.168.150.128
    broadcast: yes
    mcastport: 5405
  }
}

quorum {
  provider: corosync_votequorum
  expected_votes: 3
  #two_node: 1
}

nodelist {
  node {
    ring0_addr: 192.168.150.230
    name: node002
    nodeid: 1
  }
  node {
    ring0_addr: 192.168.150.239
    name: node006
    nodeid: 2
  }
  node {
    ring0_addr: 192.168.150.243
    name: node007
    nodeid: 3
  }
}

logging {
  # Log the source file and line where messages are being
  # generated. When in doubt, leave off. Potentially useful for
  # debugging.
  # Log with syslog facility daemon.
  #syslog_facility: daemon
  fileline: off
  to_logfile: yes
  to_stderr: no
  debug: off
  logfile: /var/log/corosync/corosync.log
  to_syslog: yes
  timestamp: on
}

==== OCF-Ressource ==== Example ressource-configuration (2 x virtIP, pound, pen):

==== Snippets ====

==== Manually Resource Add ==== To add a resource manually run: crm configure To login into "crm(live)configure#" Afterwards you can add a virtual IP (vIP1-0) by running: primitive vIP1-1 ocf:heartbeat:IPaddr2 params ip="10.6.3.121" nic="eth0" cidr_netmask="24" operations $id="vIP1-1-operations" op monitor interval="10s" timeout="20s" To add the new resource "vIP1-1" to group "lb-cluster-2" run: group lb-cluster-2 vIP1-1 meta target_role=Started group lb-cluster-group-name resource1 resource2 resource3 Or from shell: crm configure group lb-cluster-group-name resource1 resource2 resource3 To set constraints for group "lb-cluster-2" with ID "lb-cluster-2_cons" with score "200" to run on host "hostname": location lb-cluster-2_cons lb-cluster-2 200: hostname Stickyness: crm configure rsc_defaults resource-stickiness=500 To save the changes: commit To delete (resource has to be stopped before!) a resource run: delete resourceID commit Example:


root@albano:/etc/corosync# crm configure
crm(live)configure# primitive lb-nginx ocf:heartbeat:nginx operations $id="lb-nginx" op start interval="0" timeout="90s" op stop interval="0" timeout="120s" op monitor interval="10s" timeout="20s"
crm(live)configure# primitive vIP1-1 ocf:heartbeat:IPaddr2 params ip="10.6.3.121" nic="eth0" cidr_netmask="24" operations $id="vIP1-1-operations" op monitor interval="10s" timeout="20s"
crm(live)configure# group lb-cluster-2 vIP1-1 lb-nginx meta target_role=Started
crm(live)configure# location lb-cluster-2_cons lb-cluster-2 200: hostname
crm(live)configure# commit

==== OCF-Init-Script ==== Example OCF-Init-Script for "pound" Loadbalancing (/usr/lib/ocf/resource.d/heartbeat/pound):


#!/bin/sh
#
#	Manages Pound as a Linux-HA resource
#
# params:
#   OCF_RESKEY_pound
#	OCF_RESKEY_config
#   OCF_RESKEY_pid
#	OCF_RESKEY_poundctl
#
###############################################################################
# Initialization:

#. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs
OCF_ROOT=/usr/lib/ocf

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

: ${OCF_RESKEY_pound="/usr/local/pound/sbin/pound"}
: ${OCF_RESKEY_pid="/var/run/pound_ocf.pid"}
: ${OCF_RESKEY_config="/etc/pound.cfg"}
: ${OCF_RESKEY_poundctl="/usr/local/pound/sbin/poundctl"}
: ${OCF_RESKEY_socket="/var/run/pound.socket"}
##############################################################################
# helpers:

pound_socket()
{
	grep -i control /etc/pound.cfg | cut -d " " -f2 | sed 's/[\"]//g'
}

pound_pid()
{
	cat $OCF_RESKEY_pid
}

###############################################################################
# interface:

usage()
{
cat < 


  1.0
  This is an OCF resource agent for the Pound reverse proxy
  OCF resource agent for Pound
  
    
      Path to the Pound executable
      Pound executable
      
	  
    
    
      Path to the poundctl executable
      poundctl executable
      
	  
    
    
      Pound's config file
      Pound's config
          
    
    
      Pound's process ID file
      Pound's PID
      
	  
    
  
  
    
    
    
    
    
    
 

END
}

pound_validate()
{
	if [ ! -f $OCF_RESKEY_pound ]; then
		ocf_log err "$OCF_RESKEY_pound doesn't exist";
		exit $OCF_ERR_CONFIGURED;
	fi

	if [ ! -x $OCF_RESKEY_pound ]; then
		ocf_log err "$OCF_RESKEY_pound is not executable"
		exit $OCF_ERR_PERM
	fi

	if [ ! -f $OCF_RESKEY_poundctl ]; then
		ocf_log err "$OCF_RESKEY_poundctl doesn't exist";
		exit $OCF_ERR_CONFIGURED;
	fi

	if [ ! -x $OCF_RESKEY_poundctl ]; then
		ocf_log err "$OCF_RESKEY_poundctl is not executable"
		exit $OCF_ERR_PERM
	fi

	if [ ! -f $OCF_RESKEY_config ]; then
		ocf_log err "Config file $OCF_RESKEY_config doesn't exist";
		exit $OCF_ERR_CONFIGURED;
	fi

	msg=`$OCF_RESKEY_pound -c -f $OCF_RESKEY_config 2>&1`
	if [ $? -ne 0 ]; then
		ocf_log err "$msg"
		exit $OCF_ERR_CONFIGURED
	fi

	return $OCF_SUCCESS
}

pound_status()
{
	pound_monitor
}

pound_monitor()
{
	local ret

	if [ ! -f $OCF_RESKEY_pid ]; then
		ocf_log debug "Pound is not running"
		return $OCF_NOT_RUNNING;
	else
		if ! $OCF_RESKEY_poundctl -c `pound_socket` > /dev/null 2>&1; then
			ocf_log debug "Pound is not running"
			[ -f $OCF_RESKEY_pid ] && rm $OCF_RESKEY_pid
			return $OCF_NOT_RUNNING;
		fi

		return $OCF_SUCCESS;
	fi
}

pound_start()
{
	local ret

	pound_monitor
	ret=$?

	if [ $ret -eq $OCF_SUCCESS ]; then
		ocf_log info "Pound is already running (PID: `pound_pid`), doing nothing"
		return $OCF_SUCCESS;
	fi

	$OCF_RESKEY_pound -f $OCF_RESKEY_config -p $OCF_RESKEY_pid > /dev/null 2>&1
	ret=$?

	if [ $ret -ne 0 ]; then
		ocf_log err "Pound failed to start: $ret"
		return $ret;
	fi

# lets rely on start timeout here...
	while ! pound_monitor; do
		sleep 1
	done

	ocf_log info "Pound started successfully (PID: `pound_pid`)"
	return $OCF_SUCCESS;
}

pound_stop()
{
	local ret

	pound_monitor
	ret=$?

	if ! pound_monitor; then
		ocf_log info "Pound stopped successfully";
		return $OCF_SUCCESS;
	fi

	kill -s TERM `pound_pid` > /dev/null 2>&1

	while pound_monitor; do
		sleep 1
	done

	ocf_log info "Pound stopped successfully";
	return $OCF_SUCCESS;
}


case $__OCF_ACTION in
  start)	pound_validate; pound_start;;
  stop)		pound_validate; pound_stop;;
  status)	pound_status;;
  monitor)	pound_monitor;;
  meta-data)	meta_data; exit $OCF_SUCCESS;;
  validate-all)	pound_validate; exit $OCF_SUCCESS;;
  *)		usage; exit $OCF_ERR_UNIMPLEMENTED;;
esac
exit $?