diff --git a/README.md b/README.md index 08cb0e0..1acb604 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,47 @@ average per CPU. Different to check_load, the load is divided by the number of CPU units and therefore normalized. So the same warning and critical levels fit for any server. +If a server's load is often high, you can find tipps in [Isolating Linux High System Load](https://www.tummy.com/articles/isolating-heavy-load/), in short words: + + - see the load average in `uptime` + - check system logs with `dmesg` + - see `vmstat`: + - high `WA` (wait) column: CPU is often waiting, you probably have high disk load + - check `swap` → `si` and `so`, if they are often much above 0, you are out of memory + - check memory usage of processes with `ps awwlx --sort=vsz` + - solution: add more memory + - if `cpu` → `ID` (idle) is around 0, your CPU is overloaded + - if `SY` (system) is high, there could be large directories, e.g. mail spam + - if `SY` (system) is high, it could be the firewall iptables + - if `US` (user-space) is high, check with `top` for CPU consuming processes + - if `io` → `bi` and `bo` (in/out) are high, check with `iostat` or `sudo iotop` + + mountpoint.sh ------------- -Icinga plugin to check whether a given path is available. Detects -problems, e.g. with glusterfs: In case of a problem, there is an error -message such as "socket not connected". Called with path to check. +Icinga plugin to check whether a given mountpoint is +available. Detects problems, e.g. with glusterfs: In case of a +problem, there is an error message such as "socket not +connected". Called with path to check. + +diskio.sh +--------- + +Icinga plugin to check whether a given path is writable and checks the +performance of the given path. Detects problems with the filesystem or +specific io performance. Writes a test file using `dd` and reports +write speed. It is recommended to use a subdirectory tmp for writing +test files. + +gluster-geo-replication.sh +-------------------------- + +Icinga plugin to check gluster geo replication. Returns OK, if no +connection is Faulty and one connection is Active. Returns CRITICAL, +if one connection is Faulty- Returns WARNING, if no connection is +Active. + Installation ============ @@ -37,6 +72,7 @@ Update ------ cd /opt/icinga-checks + sudo git pull Configure --------- diff --git a/diskio.sh b/diskio.sh new file mode 100755 index 0000000..ec1d93f --- /dev/null +++ b/diskio.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +if test -e /etc/doskio.conf; then + . /etc/doskio.conf +fi + +path=( ${CHECKPATHES} ) +filename=tmp/test.${HOSTNAME} +while test $# -gt 0; do + case $1 in + (-h|--help) cat < filename for temporary file (default: ${filename}) + +PATH + +Path to check. + +DESCRIPTION + +Icinga plugin to check whether a given path is writable and checks the +performance of the given path. Detects problems with the filesystem or +specific io performance. + +EOF + exit;; + (-t|--tmpfile) shift; filename=$1;; + (*) if ! test -d $1; then + echo "ERROR: please specify an existing path, not $1" 1>&2 + exit 1 + fi + path+=( "$1" );; + esac + if test $# -lt 1; then + echo "ERROR: missing option, try $0 --help" + exit 1 + fi + shift +done +if test ${#path[@]} -eq 0; then + echo "ERROR: missing path, try $0 --help" + exit 1 +fi +result=0 +for file in "${path[@]}"; do + check=$(dd if=/dev/zero of=${file}/${filename} bs=2M count=2 iflag=fullblock 2>&1 | sed -n 's/.*, *\([^, ]*\) *\([^, ]*\), *\([^ ,]*\) *\([^, ]*\)/'"${file//\//\\/}"'; time=\1\2 speed=\3\4/p') + status=$? + if rm ${file}/${filename} 2> /dev/null && test $status -eq 0; then + status="OK - $check" + else + status="CRITICAL - ${file}" + result=2 + fi + echo ${status} +done +exit $result diff --git a/gluster-geo-replication.sh b/gluster-geo-replication.sh new file mode 100755 index 0000000..2b717e6 --- /dev/null +++ b/gluster-geo-replication.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +while test $# -gt 0; do + case $1 in + (-h|--help) cat <&2 + exit 2 + fi + sourcevolume=$1 + targetvolume=$2 + replicationuser=$3 + targethost=$4 + break;; + esac + if test $# -lt 1; then + echo "ERROR: missing option, try $0 --help" + exit 2 + fi + shift +done +if test $# -ne 4; then + echo "ERROR: wrong number of options $*" 1>&2 + exit 2 +fi + +rep=$(sudo gluster volume geo-replication \ + ${sourcevolume} \ + ${replicationuser}@${targethost}::${targetvolume} \ + status \ + | awk 'BEGIN {status="OK"; res=""} NR>3 {res=res " - " $1 " → " $7} NR>3 && $7=="Faulty" {status="CRITICAL"} END {print status res}') +if test $? -ne 0; then + echo "CRITICAL - wrong configuration" + exit 2 +elif [[ $rep =~ ^OK ]]; then + if [[ $rep =~ Active ]]; then + echo $rep + exit 0 + else + echo WARNING${rep#OK} + exit 1 + fi +else + echo $rep + exit 2 +fi + diff --git a/mountpoint.sh b/mountpoint.sh index 618adc8..794932c 100755 --- a/mountpoint.sh +++ b/mountpoint.sh @@ -39,22 +39,27 @@ fi time=$(mktemp) out=$(mktemp) err=$(mktemp) -if /usr/bin/time -f "elapsed=%es" -qo $time ls "$path" > $out 2> $err && test -s $out; then - if [[ $(<$time) =~ elapsed=0.0[012]s ]]; then - level=0 - STATUS="OK - ${path}:" +if /usr/bin/time -f "elapsed=%es" -qo $time mount | grep -q 'on '"$path"' '; then + if /usr/bin/time -f "elapsed=%es" -qo $time ls "$path" > $out 2> $err && test -s $out; then + if [[ $(<$time) =~ elapsed=0.0[012]s ]]; then + level=0 + STATUS="OK - ${path}:" + else + level=1 + STATUS="WARNING - ${path}:" + fi else - level=1 - STATUS="WARNING - ${path}:" - fi -else - if test -s $err; then - level=2 - STATUS="CRITICAL - $(<$err):" - else - level=1 - STATUS="CRITICAL - ${path} is empty:" + if test -s $err; then + level=2 + STATUS="CRITICAL - $(<$err):" + else + level=2 + STATUS="CRITICAL - ${path} is empty:" + fi fi +else + level=2 + STATUS="CRITICAL - ${path} not mounted:" fi echo ${STATUS} $(<$time) rm $time $out $err 2>&1 > /dev/null