Page tree

Welcome to FreeSoftwareServers Confluence Wiki

Skip to end of metadata
Go to start of metadata

CentOS:

http://galeracluster.com/documentation-webpages/monitoringthecluster.html

In my setup, all "checks" will be done from the SQL Arbitrator VM.

In that light, PWDless SSH via RSA Key setup + My.CNF files should be used.

nano /root/.my.cnf && chmod 400 /root/.my.cnf && chown root:root /root/.my.cnf
[client]
user=root
password=password
[mysql]
user=root
password=password
[mysqldump]
user=root
password=password
[mysqldiff]
user=root
password=password
nano /usr/share/xymon-client/ext/wsrep_check.sh
#!/bin/bash
# Created by FreeSoftwareServers
# http://galeracluster.com/documentation-webpages/monitoringthecluster.html

##XYMon Starting Variables
COLUMN=sql_repl
COLOR=green
SERVER=$(hostname -f)
MSG="Checking the Galera Cluster"

sql1=sql1
sql1log="/tmp/$sql1.replstat"
rm "$sql1log"
ssh root@"$sql1" "mysql -e \"SHOW STATUS LIKE 'wsrep%'\"" >> "$sql1log"

sql2=sql2
sql2log="/tmp/$sql2.replstat"
rm "$sql2log"
ssh root@"$sql2" "mysql -e \"SHOW STATUS LIKE 'wsrep%'\"" >> "$sql2log"

# Check Cluster UUID Matches
# Each node in the cluster should provide the same value. 
# When a node carries a different value, this indicates that it is no longer connected to rest of the cluster. 
# Once the node reestablishes connectivity, it realigns itself with the other nodes.
sql1uuid="$(cat $sql1log | grep wsrep_cluster_state_uuid)"
sql2uuid="$(cat $sql2log | grep wsrep_cluster_state_uuid)"

if [ "$sql1uuid" == "$sql2uuid" ]; then
	sqluuid="good"
else
	ERROR="The Cluster UUID Doesn't Match!"
fi

# Check that Clustersize is what it should be
propersize=3
sql1csize="$(cat $sql1log | grep wsrep_cluster_size | grep -o '[0-9]*')"
sql2csize="$(cat $sql2log | grep wsrep_cluster_size | grep -o '[0-9]*')"

if [ "$sql1csize" == "$sql2csize" ]; then
        sqlcsize="good"
else
	ERROR="The Cluster Size doesn't Match the predefined Cluster Size!"
fi

# Check Cluster Status = Primary

sql1status="$(cat $sql1log | grep Primary)"
sql2status="$(cat $sql2log | grep Primary)"

if [ "$sql1status" == "$sql2status" ]; then
        sqlstatus="good"
else
	ERROR="The Cluster nodes reported non Primary Status"
fi

# Check WSREP Ready
# When the node returns a value of ON it can accept write-sets from the cluster

sql1wsrepready="$(cat $sql1log | grep ready | grep ON)"
sql2wsrepready="$(cat $sql2log | grep ready | grep ON)"

if [ "$sql1wsrepready" == "$sql2wsrepready" ]; then
        sqlwsrepready="good"
else
	ERROR="A Node is not ready to accept write-sets, wsrep_ready check failed!"
fi

# Check WSREP Connected
# When the value is ON, the node has a network connection to one or more other nodes forming a cluster component.

sql1wsrepcon="$(cat $sql1log | grep connected | grep ON)"
sql2wsrepcon="$(cat $sql2log | grep connected | grep ON)"

if [ "$sql1wsrepcon" == "$sql2wsrepcon" ]; then
        sqlwsrepcon="good"
else
	ERROR="There is a node with Network Connectivity Issues!"
fi

# Check local state comment
# When the node is part of the Primary Component, the typical return values are Joining, Waiting on SST, Joined, Synced or Donor.
# In the event that the node is part of a nonoperational component, the return value is Initialized.

sql1wsreplocal="$(cat $sql1log | grep comment | grep Initialized)"
sql2wsreplocal="$(cat $sql2log | grep comment | grep Initialized)"

if [ "$sql1wsreplocal" == "$sql2wsreplocal" ]; then
        sqlwsreplocal="good"
else
	ERROR="A node reported it hasn't joined the cluster and is 'Initialized'"
fi

##### XYMon Status #####
cmd="$(echo "SQL1 Status" && cat $sql1log && echo "##########" && echo "SQL2 Status" && cat $sql2log)"

if [ -n "$ERRORS" ]

 then
    COLOR=red
    MSG="An error has been detected in the Galera Cluster
    ""
    "${ERRORS}"
    "${cmd}"
    "
else
    MSG="Galera Cluster Healthy
    ""
    "${cmd}"
    "
fi

# Leave the rest of script alone
# Tell Xymon about it
$XYMON $XYMSRV "status $MACHINE.$COLUMN $COLOR `date`

${MSG}
"

rm "$sql1log"
rm "$sql2log"
exit 0
chown xymon:xymon /usr/share/xymon-client/ext/wsrep_check.sh 
chmod 777 /usr/share/xymon-client/ext/wsrep_check.sh  
cat << 'EOL' >/etc/xymon-client/client.d/wsrep_check.cfg 
[sql_repl] 
ENVFILE $XYMONCLIENTHOME/etc/xymonclient.cfg 
CMD $XYMONCLIENTHOME/ext/wsrep_check.sh 
LOGFILE $XYMONCLIENTHOME/logs/wsrep_check.log 
INTERVAL 15m 
EOL
/usr/share/xymon-client/bin/xymoncmd /usr/share/xymon-client/ext/wsrep_check.sh 


 

  • No labels