Required packages (for SLES 11 SP2):
libpacemaker-devel libpacemaker3 pacemaker pacemaker-mgmt pacemaker-mgmt-client pacemaker-mgmt-devel xorg-x11-fonts xorg-x11-fonts-core xorg-x11-libXau xorg-x11-libXau-32bit xorg-x11-xauth
Installation for Ubuntu Server 12.04, 16.04, 18.04:
apt-get install pacemaker
Installation for SLES12:
zypper in pacemaker
After pacemaker-installation:
corosync-keygen #Create "authkey" (has to be executed in local console!!) /etc/corosync/authkey #Copy "authkey" to other nodes
The following commands can be used to configure and manage pacemaker.
crm configure show #Show resource configuration
crm resource list #Show configured resources
crm_verify -L #Verify resource configuration
crm_mon –i 2 #Show resource configuration with update (interval of 2 seconds)
crm_mon -r -1 #Show resource configuration
crm_mon -1 #Show all resources (one shot)
crm_mon -1nfV
crm configure property no-quorum-policy=ignore #Ignore quorum
crm configure property stonith-enabled=false #Deactivate stonith (shoot the other node in the head)
crm configure rsc_defaults resource-stickiness=100 #Set default-quantifier to "100"
crm_resource -D -r Apache2 -t primitive #delete resource "Apache2"
crm resource cleanup Apache2 #Clean resource "Apache2" (orphaned or with errors)
crm configure delete Apache #Delete resource "Apache". Note: Primitive resources has to been deleted _before_ the group which contains the resource!
crm resource migrate Apache2 node2.site #Migrate resource "Apache2" to node2
crm resource unmigrate Apache2 #Unmigrate resource "Apache2" (set "weight" to default)
crm resource stop Apache2 #Stopping resource "Apache2"
crm resource start Apache2 #Starting resource "Apache2"
cibadmin -E --force #Delete complete configuration. Before resources will be deleted, it´s recommended to stop them before ("crm resource stop resource_name"), to avoid deleting "orphaned resources"!
cibadmin -U -x /var/lib/heartbeat/crm/Apache.xml #Add/ update complete configuration
crm configure primitive failover-ip ocf:heartbeat:IPaddr params ip=192.168.1.10 op monitor interval=10s
crm_node -R nodex #Remove node "nodex"
crm_node -p #Show nodes in cluster
crm node status #show attributes
crm node list #show status
crm node maintenance NODENAME #set maintenance
crm node ready NODENAME #remove from maintenance
crm node online NODENAME #Set the standby node
crmadmin -D #Show current DC
crm ra list stonith
crm ra list lsb
crm_simulate -sL #show live scores
Configure Cluster:
crm configure load update crm-bs.txt #load properties
Example crm-bs.txt:
# enter the following to crm-bs.txt property $id="cib-bootstrap-options" \ no-quorum-policy="ignore" \ stonith-enabled="true" \ stonith-action="reboot" \ stonith-timeout="150s" rsc_defaults $id="rsc-options" \ resource-stickiness="1000" \ migration-threshold="5000" op_defaults $id="op-options" \ timeout="600"
Delete cluster property:
crm configure crm(live/a4t181)configure# delete cib-bootstrap-options
Configure with “crm configure”:
crm configure property stonith-enabled=false no-quorum-policy=ignore cluster-infrastructure=corosync have-watchdog=false cluster-name=lbcluster
crm configure primitive vIP1-0 ocf:heartbeat:IPaddr2 \
params ip="10.251.49.50" nic="eth0" cidr_netmask="25" \
op monitor interval="10s" timeout="20s"
crm configure primitive ping-gateway ocf:pacemaker:ping \
meta target-role="Started" \
op monitor interval="10" timeout="60" \
params host_list="10.251.49.1" multiplier="1000" timeout="20"
crm configure clone pingclone ping-gateway \
meta target-role="Started"
crm configure location vIP1-0_cons vIP1-0 200: myhostname
crm configure rsc_defaults rsc-options: \
resource-stickiness=1000 \
migration-threshold=5000
crm configure op_defaults op-options: \
timeout=600
Restart:
systemctl stop pacemaker.service && systemctl stop corosync.service systemctl stop pacemaker.service && systemctl restart corosync.service && systemctl start pacemaker.service
Change parameter:
cibadmin -Q | grep ip
Output:
<nvpair id="vIP1-0-instance_attributes-ip" name="ip" value="192.168.1.5"/>
To change e.g. the IP execute:
cibadmin -M --crm_xml '<nvpair id="vIP1-0-instance_attributes-ip" name="ip" value="192.168.1.66"/>' cibadmin -M --xml-text '<nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="lbcluster"/>'
See the list of stonith devices:
stonith -L
Path:
/usr/lib/ocf/resource.d/heartbeat/ #OCF-Init-Scripts /etc/corosync/corosync.conf #Configuration /var/lib/heartbeat/crm/cib.xml #"Last" updated resource configuration file on SLES 11/ Ubuntu Server 12.04 /var/lib/pacemaker/cib/cib.xml #"Last" updated resource configuration file on Ubuntu Server 14.04, SLES 12 /etc/init.d/openais #Service init-script to start/stop pacemaker on SLES 11 /etc/init.d/corosync #Service init-script to start/stop pacemaker on Ubuntu Server 12.04 (previously edit "/etc/default/corosync": # start corosync at boot [yes|no] -> START=yes)
Example “/etc/corosync/corosync.conf” (SLES 11):
aisexec {
group: root
user: root
}
service {
use_mgmtd: no
ver: 0
name: pacemaker
}
totem {
rrp_mode: passive
token_retransmits_before_loss_const: 10
join: 60
max_messages: 20
vsftype: none
token: 5000
consensus: 6000
secauth: on
version: 2
threads: 0
transport: udpu
interface {
#Pacemaker nodes:
member {
#Load-Balancer1
#memberaddr: 10.0.0.193
memberaddr: lb_node1 #DNS for hostname has to be configured
}
member {
#Load-Balancer2
#memberaddr: 10.0.0.194
memberaddr: lb_node2 #DNS for hostname has to be configured
}
#Network:
bindnetaddr: 10.0.0.0
mcastport: 5405
ringnumber: 0
}
clear_node_high_bit: yes
stonith-enabled=true
}
logging {
to_logfile: yes
to_stderr: no
to_syslog: yes
debug: off
timestamp: on
logfile: /var/log/cluster/corosync.log
fileline: off
syslog_facility: daemon
}
amf {
mode: disable
}
Example “/etc/corosync/corosync.conf” (SLES 12):
# Please read the corosync.conf.5 manual page
totem {
version: 2
crypto_cipher: none
crypto_hash: none
clear_node_high_bit: yes
interface {
ringnumber: 0
bindnetaddr: 192.168.150.0
mcastport: 5405
ttl: 1
}
transport: udpu
}
logging {
fileline: off
#to_logfile: no
to_syslog: yes
#logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: QUORUM
debug: off
}
}
nodelist {
node {
ring0_addr: 192.168.150.12
name: testnode1
nodeid: 1
}
node {
ring0_addr: 192.168.150.13
name: testnode1
nodeid: 2
}
}
quorum {
# Enable and configure quorum subsystem (default: off)
# see also corosync.conf.5 and votequorum.5
provider: corosync_votequorum
expected_votes: 2
}
Example “/etc/corosync/corosync.conf” (Ubuntu Server 12.4):
# Please read the openais.conf.5 manual page
totem {
version: 2
# How long before declaring a token lost (ms)
token: 3000
# How many token retransmits before forming a new configuration
token_retransmits_before_loss_const: 10
# How long to wait for join messages in the membership protocol (ms)
join: 60
# How long to wait for consensus to be achieved before starting a new round of membership configuration (ms)
consensus: 3600
# Turn off the virtual synchrony filter
vsftype: none
# Number of messages that may be sent by one processor on receipt of the token
max_messages: 20
# Limit generated nodeids to 31-bits (positive signed integers)
clear_node_high_bit: yes
# Disable encryption
#secauth: off
secauth: on
# How many threads to use for encryption/decryption
threads: 0
# Optionally assign a fixed node id (integer)
# nodeid: 1234
# This specifies the mode of redundant ring, which may be none, active, or passive.
rrp_mode: none
# interface {
# # The following values need to be set based on your environment
# ringnumber: 0
#bindnetaddr: 127.0.0.1
# bindnetaddr: 192.168.0.203
# mcastaddr: 226.94.1.1
# mcastport: 5405
# }
interface {
member {
memberaddr: 192.168.0.201
}
member {
memberaddr: 192.168.0.203
}
ringnumber: 0
bindnetaddr: 192.168.0.0
mcastport: 5405
ttl: 1
}
transport: udpu
}
amf {
mode: disabled
}
service {
# Load the Pacemaker Cluster Resource Manager
ver: 0
name: pacemaker
}
aisexec {
user: root
group: root
}
logging {
fileline: off
to_stderr: yes
to_logfile: no
to_syslog: yes
syslog_facility: daemon
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
tags: enter|leave|trace1|trace2|trace3|trace4|trace6
}
}
totem {
version: 2
cluster_name: lbcluster
transport: udpu
interface {
ringnumber: 0
bindnetaddr: 192.168.150.128
broadcast: yes
mcastport: 5405
}
}
quorum {
provider: corosync_votequorum
expected_votes: 3
#two_node: 1
}
nodelist {
node {
ring0_addr: 192.168.150.230
name: node002
nodeid: 1
}
node {
ring0_addr: 192.168.150.239
name: node006
nodeid: 2
}
node {
ring0_addr: 192.168.150.243
name: node007
nodeid: 3
}
}
logging {
# Log the source file and line where messages are being
# generated. When in doubt, leave off. Potentially useful for
# debugging.
# Log with syslog facility daemon.
#syslog_facility: daemon
fileline: off
to_logfile: yes
to_stderr: no
debug: off
logfile: /var/log/corosync/corosync.log
to_syslog: yes
timestamp: on
}
Example ressource-configuration (2 x virtIP, pound, pen):
<cib>
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-expected-quorum-votes" name="expected-quorum-votes" value="2"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="ignore"/>
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="false"/>
</cluster_property_set>
</crm_config>
<nodes>
<!-- Node 1 -->
<node id="lb_node1" type="normal" uname="lb_node1">
<instance_attributes id="lb_node1">
<nvpair id="lb_node1-standby" name="standby" value="off"/>
</instance_attributes>
</node>
<!-- Node 2 -->
<node id="lb_node2" type="normal" uname="lb_node2">
<instance_attributes id="lb_node2">
<nvpair id="lb_node2-standby" name="standby" value="off"/>
</instance_attributes>
</node>
</nodes>
<resources>
<!-- Cluster 0 (virtIP + pound) -->
<group id="lb-cluster-1">
<meta_attributes id="lb-cluster-1-meta_attributes">
<nvpair id="lb-cluster-1-meta_attributes-target-role" name="target-role" value="Started"/>
</meta_attributes>
<!-- virtuelle IP0 -->
<primitive class="ocf" id="vIP1-0" provider="heartbeat" type="IPaddr2">
<operations id="vIP1-0-operations">
<op id="vIP1-0-op-monitor-10s" interval="10s" name="monitor" timeout="20s"/>
</operations>
<instance_attributes id="vIP1-0-instance_attributes">
<nvpair id="vIP1-0-instance_attributes-ip" name="ip" value="10.0.0.190"/>
<nvpair id="vIP1-0-instance_attributes-cidr_netmask" name="cidr_netmask" value="255.255.255.0"/>
<nvpair id="vIP1-0-instance_attributes-nic" name="nic" value="eth0"/>
</instance_attributes>
</primitive>
<!-- OCF-File pound -->
<primitive class="ocf" provider="heartbeat" type="pound" id="lb-pound">
<operations id="pound-operations">
<op id="lb-pound-monitor-10s" interval="10s" name="monitor" timeout="20s"/>
</operations>
</primitive>
</group>
<!-- Cluster 1 (virtIP + pen) -->
<group id="lb-cluster-2">
<meta_attributes id="lb-cluster-2-meta_attributes">
<nvpair id="lb-cluster-2-meta_attributes-target-role" name="target-role" value="Started"/>
</meta_attributes>
<!-- virtuelle IP1 -->
<primitive class="ocf" id="vIP1-1" provider="heartbeat" type="IPaddr2">
<operations id="vIP1-1-operations">
<op id="vIP1-1-op-monitor-10s" interval="10s" name="monitor" timeout="20s"/>
</operations>
<instance_attributes id="vIP1-1-instance_attributes">
<nvpair id="vIP1-1-instance_attributes-ip" name="ip" value="10.0.0.188"/>
<nvpair id="vIP1-1-instance_attributes-cidr_netmask" name="cidr_netmask" value="255.255.255.0"/>
<nvpair id="vIP1-1-instance_attributes-nic" name="nic" value="eth0"/>
</instance_attributes>
</primitive>
<!-- OCF-Files pen -->
<primitive class="ocf" provider="heartbeat" type="pen_ocf_80" id="pen_lb_80">
<operations id="pen_ocf_80-operations">
<op id="pen_lb_80-monitor-10s" interval="10s" name="monitor" timeout="20s"/>
</operations>
</primitive>
<primitive class="ocf" provider="heartbeat" type="pen_ocf_443" id="pen_lb_443">
<operations id="pen_ocf_443-operations">
<op id="pen_lb_443-monitor-10s" interval="10s" name="monitor" timeout="20s"/>
</operations>
</primitive>
</group>
</resources>
<constraints>
<!-- Default-Node to run ressource on -->
<!-- score = priority ressource -->
<rsc_location id="lb-cluster-1_cons" node="lb_node1" rsc="lb-cluster-1" score="0"/>
<rsc_location id="lb-cluster-2_cons" node="lb_node1" rsc="lb-cluster-2" score="200"/>
</constraints>
<op_defaults>
</op_defaults>
<rsc_defaults>
<meta_attributes id="rsc_defaults-options">
<nvpair id="rsc_defaults-options-resource-stickiness" name="resource-stickiness" value="100"/>
</meta_attributes>
</rsc_defaults>
</configuration>
</cib>
<constraints>
<rsc_location id="loc-1" rsc="Webserver" node="sles-1" score="200"/>
<rsc_location id="loc-2-dont-run" rsc="Webserver" node="sles-2" score="-INFINITY"/>
<rsc_location id="loc-3-dont-run" rsc="Database" node="sles-1" score="-INFINITY"/>
<rsc_location id="loc-4" rsc="Database" node="sles-2" score="200"/>
</constraints>
<constraints>
<rsc_order id="order-1" first="Database" then="Webserver" />
<rsc_order id="order-2" first="IP" then="Webserver" score="0"/>
</constraints>
To add a resource manually run:
crm configure
To login into “crm(live)configure#”
Afterwards you can add a virtual IP (vIP1-0) by running:
primitive vIP1-1 ocf:heartbeat:IPaddr2 params ip="10.6.3.121" nic="eth0" cidr_netmask="24" operations $id="vIP1-1-operations" op monitor interval="10s" timeout="20s"
To add the new resource “vIP1-1” to group “lb-cluster-2” run:
group lb-cluster-2 vIP1-1 meta target_role=Started group lb-cluster-group-name resource1 resource2 resource3
Or from shell:
crm configure group lb-cluster-group-name resource1 resource2 resource3
To set constraints for group “lb-cluster-2” with ID “lb-cluster-2_cons” with score “200” to run on host “hostname”:
location lb-cluster-2_cons lb-cluster-2 200: hostname
Stickyness:
crm configure rsc_defaults resource-stickiness=500
To save the changes:
commit
To delete (resource has to be stopped before!) a resource run:
delete resourceID commit
Example:
root@albano:/etc/corosync# crm configure crm(live)configure# primitive lb-nginx ocf:heartbeat:nginx operations $id="lb-nginx" op start interval="0" timeout="90s" op stop interval="0" timeout="120s" op monitor interval="10s" timeout="20s" crm(live)configure# primitive vIP1-1 ocf:heartbeat:IPaddr2 params ip="10.6.3.121" nic="eth0" cidr_netmask="24" operations $id="vIP1-1-operations" op monitor interval="10s" timeout="20s" crm(live)configure# group lb-cluster-2 vIP1-1 lb-nginx meta target_role=Started crm(live)configure# location lb-cluster-2_cons lb-cluster-2 200: hostname crm(live)configure# commit
Example OCF-Init-Script for “pound” Loadbalancing (/usr/lib/ocf/resource.d/heartbeat/pound):
#!/bin/sh
#
# Manages Pound as a Linux-HA resource
#
# params:
# OCF_RESKEY_pound
# OCF_RESKEY_config
# OCF_RESKEY_pid
# OCF_RESKEY_poundctl
#
###############################################################################
# Initialization:
#. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs
OCF_ROOT=/usr/lib/ocf
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_RESKEY_pound="/usr/local/pound/sbin/pound"}
: ${OCF_RESKEY_pid="/var/run/pound_ocf.pid"}
: ${OCF_RESKEY_config="/etc/pound.cfg"}
: ${OCF_RESKEY_poundctl="/usr/local/pound/sbin/poundctl"}
: ${OCF_RESKEY_socket="/var/run/pound.socket"}
##############################################################################
# helpers:
pound_socket()
{
grep -i control /etc/pound.cfg | cut -d " " -f2 | sed 's/[\"]//g'
}
pound_pid()
{
cat $OCF_RESKEY_pid
}
###############################################################################
# interface:
usage()
{
cat <<END
usage: $0 (start|stop|status|monitor|meta-data|validate-all)
$0 manages Pound as an HA resource.
END
}
meta_data()
{
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="pound">
<version>1.0</version>
<longdesc lang="en">This is an OCF resource agent for the Pound reverse proxy</longdesc>
<shortdesc lang="en">OCF resource agent for Pound</shortdesc>
<parameters>
<parameter name="pound" unique="0" required="0">
<longdesc lang="en">Path to the Pound executable</longdesc>
<shortdesc lang="en">Pound executable</shortdesc>
<!--<content type="string" default="/usr/sbin/pound" />-->
<content type="string" default="/usr/local/pound/sbin/pound" />
</parameter>
<parameter name="poundctl" unique="0" required="0">
<longdesc lang="en">Path to the poundctl executable</longdesc>
<shortdesc lang="en">poundctl executable</shortdesc>
<!--<content type="string" default="/usr/sbin/poundctl" />-->
<content type="string" default="/usr/local/pound/sbin/poundctl" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">Pound's config file</longdesc>
<shortdesc lang="en">Pound's config</shortdesc>
<content type="string" default="/etc/pound/pound.cfg" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">Pound's process ID file</longdesc>
<shortdesc lang="en">Pound's PID</shortdesc>
<!--<content type="string" default="/var/run/pound.pid" />-->
<content type="string" default="/var/run/pound_ocf.pid" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="status" timeout="30" />
<action name="monitor" depth="0" start-delay="10" interval="10" timeout="30" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
}
pound_validate()
{
if [ ! -f $OCF_RESKEY_pound ]; then
ocf_log err "$OCF_RESKEY_pound doesn't exist";
exit $OCF_ERR_CONFIGURED;
fi
if [ ! -x $OCF_RESKEY_pound ]; then
ocf_log err "$OCF_RESKEY_pound is not executable"
exit $OCF_ERR_PERM
fi
if [ ! -f $OCF_RESKEY_poundctl ]; then
ocf_log err "$OCF_RESKEY_poundctl doesn't exist";
exit $OCF_ERR_CONFIGURED;
fi
if [ ! -x $OCF_RESKEY_poundctl ]; then
ocf_log err "$OCF_RESKEY_poundctl is not executable"
exit $OCF_ERR_PERM
fi
if [ ! -f $OCF_RESKEY_config ]; then
ocf_log err "Config file $OCF_RESKEY_config doesn't exist";
exit $OCF_ERR_CONFIGURED;
fi
msg=`$OCF_RESKEY_pound -c -f $OCF_RESKEY_config 2>&1`
if [ $? -ne 0 ]; then
ocf_log err "$msg"
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
pound_status()
{
pound_monitor
}
pound_monitor()
{
local ret
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log debug "Pound is not running"
return $OCF_NOT_RUNNING;
else
if ! $OCF_RESKEY_poundctl -c `pound_socket` > /dev/null 2>&1; then
ocf_log debug "Pound is not running"
[ -f $OCF_RESKEY_pid ] && rm $OCF_RESKEY_pid
return $OCF_NOT_RUNNING;
fi
return $OCF_SUCCESS;
fi
}
pound_start()
{
local ret
pound_monitor
ret=$?
if [ $ret -eq $OCF_SUCCESS ]; then
ocf_log info "Pound is already running (PID: `pound_pid`), doing nothing"
return $OCF_SUCCESS;
fi
$OCF_RESKEY_pound -f $OCF_RESKEY_config -p $OCF_RESKEY_pid > /dev/null 2>&1
ret=$?
if [ $ret -ne 0 ]; then
ocf_log err "Pound failed to start: $ret"
return $ret;
fi
# lets rely on start timeout here...
while ! pound_monitor; do
sleep 1
done
ocf_log info "Pound started successfully (PID: `pound_pid`)"
return $OCF_SUCCESS;
}
pound_stop()
{
local ret
pound_monitor
ret=$?
if ! pound_monitor; then
ocf_log info "Pound stopped successfully";
return $OCF_SUCCESS;
fi
kill -s TERM `pound_pid` > /dev/null 2>&1
while pound_monitor; do
sleep 1
done
ocf_log info "Pound stopped successfully";
return $OCF_SUCCESS;
}
case $__OCF_ACTION in
start) pound_validate; pound_start;;
stop) pound_validate; pound_stop;;
status) pound_status;;
monitor) pound_monitor;;
meta-data) meta_data; exit $OCF_SUCCESS;;
validate-all) pound_validate; exit $OCF_SUCCESS;;
*) usage; exit $OCF_ERR_UNIMPLEMENTED;;
esac
exit $?