new checks diskio.sh and gluster-geo-replication.sh
This commit is contained in:
42
README.md
42
README.md
@@ -15,12 +15,47 @@ average per CPU. Different to check_load, the load is divided by the
|
||||
number of CPU units and therefore normalized. So the same warning and
|
||||
critical levels fit for any server.
|
||||
|
||||
If a server's load is often high, you can find tipps in [Isolating Linux High System Load](https://www.tummy.com/articles/isolating-heavy-load/), in short words:
|
||||
|
||||
- see the load average in `uptime`
|
||||
- check system logs with `dmesg`
|
||||
- see `vmstat`:
|
||||
- high `WA` (wait) column: CPU is often waiting, you probably have high disk load
|
||||
- check `swap` → `si` and `so`, if they are often much above 0, you are out of memory
|
||||
- check memory usage of processes with `ps awwlx --sort=vsz`
|
||||
- solution: add more memory
|
||||
- if `cpu` → `ID` (idle) is around 0, your CPU is overloaded
|
||||
- if `SY` (system) is high, there could be large directories, e.g. mail spam
|
||||
- if `SY` (system) is high, it could be the firewall iptables
|
||||
- if `US` (user-space) is high, check with `top` for CPU consuming processes
|
||||
- if `io` → `bi` and `bo` (in/out) are high, check with `iostat` or `sudo iotop`
|
||||
|
||||
|
||||
mountpoint.sh
|
||||
-------------
|
||||
|
||||
Icinga plugin to check whether a given path is available. Detects
|
||||
problems, e.g. with glusterfs: In case of a problem, there is an error
|
||||
message such as "socket not connected". Called with path to check.
|
||||
Icinga plugin to check whether a given mountpoint is
|
||||
available. Detects problems, e.g. with glusterfs: In case of a
|
||||
problem, there is an error message such as "socket not
|
||||
connected". Called with path to check.
|
||||
|
||||
diskio.sh
|
||||
---------
|
||||
|
||||
Icinga plugin to check whether a given path is writable and checks the
|
||||
performance of the given path. Detects problems with the filesystem or
|
||||
specific io performance. Writes a test file using `dd` and reports
|
||||
write speed. It is recommended to use a subdirectory tmp for writing
|
||||
test files.
|
||||
|
||||
gluster-geo-replication.sh
|
||||
--------------------------
|
||||
|
||||
Icinga plugin to check gluster geo replication. Returns OK, if no
|
||||
connection is Faulty and one connection is Active. Returns CRITICAL,
|
||||
if one connection is Faulty- Returns WARNING, if no connection is
|
||||
Active.
|
||||
|
||||
|
||||
Installation
|
||||
============
|
||||
@@ -37,6 +72,7 @@ Update
|
||||
------
|
||||
|
||||
cd /opt/icinga-checks
|
||||
sudo git pull
|
||||
|
||||
Configure
|
||||
---------
|
||||
|
58
diskio.sh
Executable file
58
diskio.sh
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/bin/bash
|
||||
|
||||
if test -e /etc/doskio.conf; then
|
||||
. /etc/doskio.conf
|
||||
fi
|
||||
|
||||
path=( ${CHECKPATHES} )
|
||||
filename=tmp/test.${HOSTNAME}
|
||||
while test $# -gt 0; do
|
||||
case $1 in
|
||||
(-h|--help) cat <<EOF
|
||||
$0 [OPTIONS] PATH…
|
||||
|
||||
-h, --help show this help
|
||||
-t, --tmpfile <filename> filename for temporary file (default: ${filename})
|
||||
|
||||
PATH
|
||||
|
||||
Path to check.
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
Icinga plugin to check whether a given path is writable and checks the
|
||||
performance of the given path. Detects problems with the filesystem or
|
||||
specific io performance.
|
||||
|
||||
EOF
|
||||
exit;;
|
||||
(-t|--tmpfile) shift; filename=$1;;
|
||||
(*) if ! test -d $1; then
|
||||
echo "ERROR: please specify an existing path, not $1" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
path+=( "$1" );;
|
||||
esac
|
||||
if test $# -lt 1; then
|
||||
echo "ERROR: missing option, try $0 --help"
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
done
|
||||
if test ${#path[@]} -eq 0; then
|
||||
echo "ERROR: missing path, try $0 --help"
|
||||
exit 1
|
||||
fi
|
||||
result=0
|
||||
for file in "${path[@]}"; do
|
||||
check=$(dd if=/dev/zero of=${file}/${filename} bs=2M count=2 iflag=fullblock 2>&1 | sed -n 's/.*, *\([^, ]*\) *\([^, ]*\), *\([^ ,]*\) *\([^, ]*\)/'"${file//\//\\/}"'; time=\1\2 speed=\3\4/p')
|
||||
status=$?
|
||||
if rm ${file}/${filename} 2> /dev/null && test $status -eq 0; then
|
||||
status="OK - $check"
|
||||
else
|
||||
status="CRITICAL - ${file}"
|
||||
result=2
|
||||
fi
|
||||
echo ${status}
|
||||
done
|
||||
exit $result
|
65
gluster-geo-replication.sh
Executable file
65
gluster-geo-replication.sh
Executable file
@@ -0,0 +1,65 @@
|
||||
#!/bin/bash
|
||||
|
||||
while test $# -gt 0; do
|
||||
case $1 in
|
||||
(-h|--help) cat <<EOF
|
||||
$0 [OPTIONS] source-volume target-volume target-user target-host
|
||||
|
||||
-h, --help show this help
|
||||
|
||||
source-volume replication source volume name
|
||||
target-volume replication target volume name
|
||||
target-user replication target user name
|
||||
target-host replication target host name
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
Icinga plugin to check gluster geo replication. Returns OK, if no
|
||||
connection is Faulty and one connection is Active. Returns CRITICAL,
|
||||
if one connection is Faulty- Returns WARNING, if no connection is
|
||||
Active.
|
||||
|
||||
EOF
|
||||
exit;;
|
||||
(*) if test $# -ne 4; then
|
||||
echo "ERROR: wrong number of options $*" 1>&2
|
||||
exit 2
|
||||
fi
|
||||
sourcevolume=$1
|
||||
targetvolume=$2
|
||||
replicationuser=$3
|
||||
targethost=$4
|
||||
break;;
|
||||
esac
|
||||
if test $# -lt 1; then
|
||||
echo "ERROR: missing option, try $0 --help"
|
||||
exit 2
|
||||
fi
|
||||
shift
|
||||
done
|
||||
if test $# -ne 4; then
|
||||
echo "ERROR: wrong number of options $*" 1>&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
rep=$(sudo gluster volume geo-replication \
|
||||
${sourcevolume} \
|
||||
${replicationuser}@${targethost}::${targetvolume} \
|
||||
status \
|
||||
| awk 'BEGIN {status="OK"; res=""} NR>3 {res=res " - " $1 " → " $7} NR>3 && $7=="Faulty" {status="CRITICAL"} END {print status res}')
|
||||
if test $? -ne 0; then
|
||||
echo "CRITICAL - wrong configuration"
|
||||
exit 2
|
||||
elif [[ $rep =~ ^OK ]]; then
|
||||
if [[ $rep =~ Active ]]; then
|
||||
echo $rep
|
||||
exit 0
|
||||
else
|
||||
echo WARNING${rep#OK}
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo $rep
|
||||
exit 2
|
||||
fi
|
||||
|
@@ -39,7 +39,8 @@ fi
|
||||
time=$(mktemp)
|
||||
out=$(mktemp)
|
||||
err=$(mktemp)
|
||||
if /usr/bin/time -f "elapsed=%es" -qo $time ls "$path" > $out 2> $err && test -s $out; then
|
||||
if /usr/bin/time -f "elapsed=%es" -qo $time mount | grep -q 'on '"$path"' '; then
|
||||
if /usr/bin/time -f "elapsed=%es" -qo $time ls "$path" > $out 2> $err && test -s $out; then
|
||||
if [[ $(<$time) =~ elapsed=0.0[012]s ]]; then
|
||||
level=0
|
||||
STATUS="OK - ${path}:"
|
||||
@@ -47,14 +48,18 @@ if /usr/bin/time -f "elapsed=%es" -qo $time ls "$path" > $out 2> $err && test -s
|
||||
level=1
|
||||
STATUS="WARNING - ${path}:"
|
||||
fi
|
||||
else
|
||||
else
|
||||
if test -s $err; then
|
||||
level=2
|
||||
STATUS="CRITICAL - $(<$err):"
|
||||
else
|
||||
level=1
|
||||
level=2
|
||||
STATUS="CRITICAL - ${path} is empty:"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
level=2
|
||||
STATUS="CRITICAL - ${path} not mounted:"
|
||||
fi
|
||||
echo ${STATUS} $(<$time)
|
||||
rm $time $out $err 2>&1 > /dev/null
|
||||
|
Reference in New Issue
Block a user