[[http://tmade.de|Home tmade.de]]
[[http://wiki.tmade.de|Home Wiki]]
===== Pacemaker =====
See also:
http://clusterlabs.org/doc/en-US/Pacemaker/1.1/html/Pacemaker_Explained/
==== Installation ====
Required packages (for SLES 11 SP2):
libpacemaker-devel
libpacemaker3
pacemaker
pacemaker-mgmt
pacemaker-mgmt-client
pacemaker-mgmt-devel
xorg-x11-fonts
xorg-x11-fonts-core
xorg-x11-libXau
xorg-x11-libXau-32bit
xorg-x11-xauth
Installation for Ubuntu Server 12.04, 16.04, 18.04:
apt-get install pacemaker
Installation for SLES12:
zypper in pacemaker
After pacemaker-installation:
corosync-keygen #Create "authkey" (has to be executed in local console!!)
/etc/corosync/authkey #Copy "authkey" to other nodes
==== Useful commands ====
The following commands can be used to configure and manage pacemaker.
crm configure show #Show resource configuration
crm resource list #Show configured resources
crm_verify -L #Verify resource configuration
crm_mon –i 2 #Show resource configuration with update (interval of 2 seconds)
crm_mon -r -1 #Show resource configuration
crm_mon -1 #Show all resources (one shot)
crm_mon -1nfV
crm configure property no-quorum-policy=ignore #Ignore quorum
crm configure property stonith-enabled=false #Deactivate stonith (shoot the other node in the head)
crm configure rsc_defaults resource-stickiness=100 #Set default-quantifier to "100"
crm_resource -D -r Apache2 -t primitive #delete resource "Apache2"
crm resource cleanup Apache2 #Clean resource "Apache2" (orphaned or with errors)
crm configure delete Apache #Delete resource "Apache". Note: Primitive resources has to been deleted _before_ the group which contains the resource!
crm resource migrate Apache2 node2.site #Migrate resource "Apache2" to node2
crm resource unmigrate Apache2 #Unmigrate resource "Apache2" (set "weight" to default)
crm resource stop Apache2 #Stopping resource "Apache2"
crm resource start Apache2 #Starting resource "Apache2"
cibadmin -E --force #Delete complete configuration. Before resources will be deleted, it´s recommended to stop them before ("crm resource stop resource_name"), to avoid deleting "orphaned resources"!
cibadmin -U -x /var/lib/heartbeat/crm/Apache.xml #Add/ update complete configuration
crm configure primitive failover-ip ocf:heartbeat:IPaddr params ip=192.168.1.10 op monitor interval=10s
crm_node -R nodex #Remove node "nodex"
crm_node -p #Show nodes in cluster
crm node status #show attributes
crm node list #show status
crm node maintenance NODENAME #set maintenance
crm node ready NODENAME #remove from maintenance
crm node online NODENAME #Set the standby node
crmadmin -D #Show current DC
crm ra list stonith
crm ra list lsb
crm_simulate -sL #show live scores
Configure Cluster:
crm configure load update crm-bs.txt #load properties
Example crm-bs.txt:
# enter the following to crm-bs.txt
property $id="cib-bootstrap-options" \
no-quorum-policy="ignore" \
stonith-enabled="true" \
stonith-action="reboot" \
stonith-timeout="150s"
rsc_defaults $id="rsc-options" \
resource-stickiness="1000" \
migration-threshold="5000"
op_defaults $id="op-options" \
timeout="600"
Delete cluster property:
crm configure
crm(live/a4t181)configure# delete cib-bootstrap-options
Configure with "crm configure":
crm configure property stonith-enabled=false no-quorum-policy=ignore cluster-infrastructure=corosync have-watchdog=false cluster-name=lbcluster
crm configure primitive vIP1-0 ocf:heartbeat:IPaddr2 \
params ip="10.251.49.50" nic="eth0" cidr_netmask="25" \
op monitor interval="10s" timeout="20s"
crm configure primitive ping-gateway ocf:pacemaker:ping \
meta target-role="Started" \
op monitor interval="10" timeout="60" \
params host_list="10.251.49.1" multiplier="1000" timeout="20"
crm configure clone pingclone ping-gateway \
meta target-role="Started"
crm configure location vIP1-0_cons vIP1-0 200: myhostname
crm configure rsc_defaults rsc-options: \
resource-stickiness=1000 \
migration-threshold=5000
crm configure op_defaults op-options: \
timeout=600
Restart:
systemctl stop pacemaker.service && systemctl stop corosync.service
systemctl stop pacemaker.service && systemctl restart corosync.service && systemctl start pacemaker.service
Change parameter:
cibadmin -Q | grep ip
Output:
To change e.g. the IP execute:
cibadmin -M --crm_xml ''
cibadmin -M --xml-text ''
==== Stonith ====
See the list of stonith devices:
stonith -L
====Links====
https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/1.1/html/Pacemaker_Explained/s-cluster-options.html
==== Configuration ====
Path:
/usr/lib/ocf/resource.d/heartbeat/ #OCF-Init-Scripts
/etc/corosync/corosync.conf #Configuration
/var/lib/heartbeat/crm/cib.xml #"Last" updated resource configuration file on SLES 11/ Ubuntu Server 12.04
/var/lib/pacemaker/cib/cib.xml #"Last" updated resource configuration file on Ubuntu Server 14.04, SLES 12
/etc/init.d/openais #Service init-script to start/stop pacemaker on SLES 11
/etc/init.d/corosync #Service init-script to start/stop pacemaker on Ubuntu Server 12.04 (previously edit "/etc/default/corosync": # start corosync at boot [yes|no] -> START=yes)
====SLES11====
Example "/etc/corosync/corosync.conf" (SLES 11):
aisexec {
group: root
user: root
}
service {
use_mgmtd: no
ver: 0
name: pacemaker
}
totem {
rrp_mode: passive
token_retransmits_before_loss_const: 10
join: 60
max_messages: 20
vsftype: none
token: 5000
consensus: 6000
secauth: on
version: 2
threads: 0
transport: udpu
interface {
#Pacemaker nodes:
member {
#Load-Balancer1
#memberaddr: 10.0.0.193
memberaddr: lb_node1 #DNS for hostname has to be configured
}
member {
#Load-Balancer2
#memberaddr: 10.0.0.194
memberaddr: lb_node2 #DNS for hostname has to be configured
}
#Network:
bindnetaddr: 10.0.0.0
mcastport: 5405
ringnumber: 0
}
clear_node_high_bit: yes
stonith-enabled=true
}
logging {
to_logfile: yes
to_stderr: no
to_syslog: yes
debug: off
timestamp: on
logfile: /var/log/cluster/corosync.log
fileline: off
syslog_facility: daemon
}
amf {
mode: disable
}
====SLES12====
Example "/etc/corosync/corosync.conf" (SLES 12):
# Please read the corosync.conf.5 manual page
totem {
version: 2
crypto_cipher: none
crypto_hash: none
clear_node_high_bit: yes
interface {
ringnumber: 0
bindnetaddr: 192.168.150.0
mcastport: 5405
ttl: 1
}
transport: udpu
}
logging {
fileline: off
#to_logfile: no
to_syslog: yes
#logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: QUORUM
debug: off
}
}
nodelist {
node {
ring0_addr: 192.168.150.12
name: testnode1
nodeid: 1
}
node {
ring0_addr: 192.168.150.13
name: testnode1
nodeid: 2
}
}
quorum {
# Enable and configure quorum subsystem (default: off)
# see also corosync.conf.5 and votequorum.5
provider: corosync_votequorum
expected_votes: 2
}
====Ubuntu 12.04====
Example "/etc/corosync/corosync.conf" (Ubuntu Server 12.4):
# Please read the openais.conf.5 manual page
totem {
version: 2
# How long before declaring a token lost (ms)
token: 3000
# How many token retransmits before forming a new configuration
token_retransmits_before_loss_const: 10
# How long to wait for join messages in the membership protocol (ms)
join: 60
# How long to wait for consensus to be achieved before starting a new round of membership configuration (ms)
consensus: 3600
# Turn off the virtual synchrony filter
vsftype: none
# Number of messages that may be sent by one processor on receipt of the token
max_messages: 20
# Limit generated nodeids to 31-bits (positive signed integers)
clear_node_high_bit: yes
# Disable encryption
#secauth: off
secauth: on
# How many threads to use for encryption/decryption
threads: 0
# Optionally assign a fixed node id (integer)
# nodeid: 1234
# This specifies the mode of redundant ring, which may be none, active, or passive.
rrp_mode: none
# interface {
# # The following values need to be set based on your environment
# ringnumber: 0
#bindnetaddr: 127.0.0.1
# bindnetaddr: 192.168.0.203
# mcastaddr: 226.94.1.1
# mcastport: 5405
# }
interface {
member {
memberaddr: 192.168.0.201
}
member {
memberaddr: 192.168.0.203
}
ringnumber: 0
bindnetaddr: 192.168.0.0
mcastport: 5405
ttl: 1
}
transport: udpu
}
amf {
mode: disabled
}
service {
# Load the Pacemaker Cluster Resource Manager
ver: 0
name: pacemaker
}
aisexec {
user: root
group: root
}
logging {
fileline: off
to_stderr: yes
to_logfile: no
to_syslog: yes
syslog_facility: daemon
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
tags: enter|leave|trace1|trace2|trace3|trace4|trace6
}
}
===Ubuntu 16.04 ====
totem {
version: 2
cluster_name: lbcluster
transport: udpu
interface {
ringnumber: 0
bindnetaddr: 192.168.150.128
broadcast: yes
mcastport: 5405
}
}
quorum {
provider: corosync_votequorum
expected_votes: 3
#two_node: 1
}
nodelist {
node {
ring0_addr: 192.168.150.230
name: node002
nodeid: 1
}
node {
ring0_addr: 192.168.150.239
name: node006
nodeid: 2
}
node {
ring0_addr: 192.168.150.243
name: node007
nodeid: 3
}
}
logging {
# Log the source file and line where messages are being
# generated. When in doubt, leave off. Potentially useful for
# debugging.
# Log with syslog facility daemon.
#syslog_facility: daemon
fileline: off
to_logfile: yes
to_stderr: no
debug: off
logfile: /var/log/corosync/corosync.log
to_syslog: yes
timestamp: on
}
==== OCF-Ressource ====
Example ressource-configuration (2 x virtIP, pound, pen):
==== Snippets ====
==== Manually Resource Add ====
To add a resource manually run:
crm configure
To login into "crm(live)configure#"
Afterwards you can add a virtual IP (vIP1-0) by running:
primitive vIP1-1 ocf:heartbeat:IPaddr2 params ip="10.6.3.121" nic="eth0" cidr_netmask="24" operations $id="vIP1-1-operations" op monitor interval="10s" timeout="20s"
To add the new resource "vIP1-1" to group "lb-cluster-2" run:
group lb-cluster-2 vIP1-1 meta target_role=Started
group lb-cluster-group-name resource1 resource2 resource3
Or from shell:
crm configure group lb-cluster-group-name resource1 resource2 resource3
To set constraints for group "lb-cluster-2" with ID "lb-cluster-2_cons" with score "200" to run on host "hostname":
location lb-cluster-2_cons lb-cluster-2 200: hostname
Stickyness:
crm configure rsc_defaults resource-stickiness=500
To save the changes:
commit
To delete (resource has to be stopped before!) a resource run:
delete resourceID
commit
Example:
root@albano:/etc/corosync# crm configure
crm(live)configure# primitive lb-nginx ocf:heartbeat:nginx operations $id="lb-nginx" op start interval="0" timeout="90s" op stop interval="0" timeout="120s" op monitor interval="10s" timeout="20s"
crm(live)configure# primitive vIP1-1 ocf:heartbeat:IPaddr2 params ip="10.6.3.121" nic="eth0" cidr_netmask="24" operations $id="vIP1-1-operations" op monitor interval="10s" timeout="20s"
crm(live)configure# group lb-cluster-2 vIP1-1 lb-nginx meta target_role=Started
crm(live)configure# location lb-cluster-2_cons lb-cluster-2 200: hostname
crm(live)configure# commit
==== OCF-Init-Script ====
Example OCF-Init-Script for "pound" Loadbalancing (/usr/lib/ocf/resource.d/heartbeat/pound):
#!/bin/sh
#
# Manages Pound as a Linux-HA resource
#
# params:
# OCF_RESKEY_pound
# OCF_RESKEY_config
# OCF_RESKEY_pid
# OCF_RESKEY_poundctl
#
###############################################################################
# Initialization:
#. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs
OCF_ROOT=/usr/lib/ocf
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_RESKEY_pound="/usr/local/pound/sbin/pound"}
: ${OCF_RESKEY_pid="/var/run/pound_ocf.pid"}
: ${OCF_RESKEY_config="/etc/pound.cfg"}
: ${OCF_RESKEY_poundctl="/usr/local/pound/sbin/poundctl"}
: ${OCF_RESKEY_socket="/var/run/pound.socket"}
##############################################################################
# helpers:
pound_socket()
{
grep -i control /etc/pound.cfg | cut -d " " -f2 | sed 's/[\"]//g'
}
pound_pid()
{
cat $OCF_RESKEY_pid
}
###############################################################################
# interface:
usage()
{
cat <
1.0
This is an OCF resource agent for the Pound reverse proxy
OCF resource agent for Pound
Path to the Pound executable
Pound executable
Path to the poundctl executable
poundctl executable
Pound's config file
Pound's config
Pound's process ID file
Pound's PID
END
}
pound_validate()
{
if [ ! -f $OCF_RESKEY_pound ]; then
ocf_log err "$OCF_RESKEY_pound doesn't exist";
exit $OCF_ERR_CONFIGURED;
fi
if [ ! -x $OCF_RESKEY_pound ]; then
ocf_log err "$OCF_RESKEY_pound is not executable"
exit $OCF_ERR_PERM
fi
if [ ! -f $OCF_RESKEY_poundctl ]; then
ocf_log err "$OCF_RESKEY_poundctl doesn't exist";
exit $OCF_ERR_CONFIGURED;
fi
if [ ! -x $OCF_RESKEY_poundctl ]; then
ocf_log err "$OCF_RESKEY_poundctl is not executable"
exit $OCF_ERR_PERM
fi
if [ ! -f $OCF_RESKEY_config ]; then
ocf_log err "Config file $OCF_RESKEY_config doesn't exist";
exit $OCF_ERR_CONFIGURED;
fi
msg=`$OCF_RESKEY_pound -c -f $OCF_RESKEY_config 2>&1`
if [ $? -ne 0 ]; then
ocf_log err "$msg"
exit $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
pound_status()
{
pound_monitor
}
pound_monitor()
{
local ret
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log debug "Pound is not running"
return $OCF_NOT_RUNNING;
else
if ! $OCF_RESKEY_poundctl -c `pound_socket` > /dev/null 2>&1; then
ocf_log debug "Pound is not running"
[ -f $OCF_RESKEY_pid ] && rm $OCF_RESKEY_pid
return $OCF_NOT_RUNNING;
fi
return $OCF_SUCCESS;
fi
}
pound_start()
{
local ret
pound_monitor
ret=$?
if [ $ret -eq $OCF_SUCCESS ]; then
ocf_log info "Pound is already running (PID: `pound_pid`), doing nothing"
return $OCF_SUCCESS;
fi
$OCF_RESKEY_pound -f $OCF_RESKEY_config -p $OCF_RESKEY_pid > /dev/null 2>&1
ret=$?
if [ $ret -ne 0 ]; then
ocf_log err "Pound failed to start: $ret"
return $ret;
fi
# lets rely on start timeout here...
while ! pound_monitor; do
sleep 1
done
ocf_log info "Pound started successfully (PID: `pound_pid`)"
return $OCF_SUCCESS;
}
pound_stop()
{
local ret
pound_monitor
ret=$?
if ! pound_monitor; then
ocf_log info "Pound stopped successfully";
return $OCF_SUCCESS;
fi
kill -s TERM `pound_pid` > /dev/null 2>&1
while pound_monitor; do
sleep 1
done
ocf_log info "Pound stopped successfully";
return $OCF_SUCCESS;
}
case $__OCF_ACTION in
start) pound_validate; pound_start;;
stop) pound_validate; pound_stop;;
status) pound_status;;
monitor) pound_monitor;;
meta-data) meta_data; exit $OCF_SUCCESS;;
validate-all) pound_validate; exit $OCF_SUCCESS;;
*) usage; exit $OCF_ERR_UNIMPLEMENTED;;
esac
exit $?