new checks diskio.sh and gluster-geo-replication.sh
This commit is contained in:
42
README.md
42
README.md
@@ -15,12 +15,47 @@ average per CPU. Different to check_load, the load is divided by the
|
|||||||
number of CPU units and therefore normalized. So the same warning and
|
number of CPU units and therefore normalized. So the same warning and
|
||||||
critical levels fit for any server.
|
critical levels fit for any server.
|
||||||
|
|
||||||
|
If a server's load is often high, you can find tipps in [Isolating Linux High System Load](https://www.tummy.com/articles/isolating-heavy-load/), in short words:
|
||||||
|
|
||||||
|
- see the load average in `uptime`
|
||||||
|
- check system logs with `dmesg`
|
||||||
|
- see `vmstat`:
|
||||||
|
- high `WA` (wait) column: CPU is often waiting, you probably have high disk load
|
||||||
|
- check `swap` → `si` and `so`, if they are often much above 0, you are out of memory
|
||||||
|
- check memory usage of processes with `ps awwlx --sort=vsz`
|
||||||
|
- solution: add more memory
|
||||||
|
- if `cpu` → `ID` (idle) is around 0, your CPU is overloaded
|
||||||
|
- if `SY` (system) is high, there could be large directories, e.g. mail spam
|
||||||
|
- if `SY` (system) is high, it could be the firewall iptables
|
||||||
|
- if `US` (user-space) is high, check with `top` for CPU consuming processes
|
||||||
|
- if `io` → `bi` and `bo` (in/out) are high, check with `iostat` or `sudo iotop`
|
||||||
|
|
||||||
|
|
||||||
mountpoint.sh
|
mountpoint.sh
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
Icinga plugin to check whether a given path is available. Detects
|
Icinga plugin to check whether a given mountpoint is
|
||||||
problems, e.g. with glusterfs: In case of a problem, there is an error
|
available. Detects problems, e.g. with glusterfs: In case of a
|
||||||
message such as "socket not connected". Called with path to check.
|
problem, there is an error message such as "socket not
|
||||||
|
connected". Called with path to check.
|
||||||
|
|
||||||
|
diskio.sh
|
||||||
|
---------
|
||||||
|
|
||||||
|
Icinga plugin to check whether a given path is writable and checks the
|
||||||
|
performance of the given path. Detects problems with the filesystem or
|
||||||
|
specific io performance. Writes a test file using `dd` and reports
|
||||||
|
write speed. It is recommended to use a subdirectory tmp for writing
|
||||||
|
test files.
|
||||||
|
|
||||||
|
gluster-geo-replication.sh
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Icinga plugin to check gluster geo replication. Returns OK, if no
|
||||||
|
connection is Faulty and one connection is Active. Returns CRITICAL,
|
||||||
|
if one connection is Faulty- Returns WARNING, if no connection is
|
||||||
|
Active.
|
||||||
|
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
============
|
============
|
||||||
@@ -37,6 +72,7 @@ Update
|
|||||||
------
|
------
|
||||||
|
|
||||||
cd /opt/icinga-checks
|
cd /opt/icinga-checks
|
||||||
|
sudo git pull
|
||||||
|
|
||||||
Configure
|
Configure
|
||||||
---------
|
---------
|
||||||
|
58
diskio.sh
Executable file
58
diskio.sh
Executable file
@@ -0,0 +1,58 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if test -e /etc/doskio.conf; then
|
||||||
|
. /etc/doskio.conf
|
||||||
|
fi
|
||||||
|
|
||||||
|
path=( ${CHECKPATHES} )
|
||||||
|
filename=tmp/test.${HOSTNAME}
|
||||||
|
while test $# -gt 0; do
|
||||||
|
case $1 in
|
||||||
|
(-h|--help) cat <<EOF
|
||||||
|
$0 [OPTIONS] PATH…
|
||||||
|
|
||||||
|
-h, --help show this help
|
||||||
|
-t, --tmpfile <filename> filename for temporary file (default: ${filename})
|
||||||
|
|
||||||
|
PATH
|
||||||
|
|
||||||
|
Path to check.
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
|
||||||
|
Icinga plugin to check whether a given path is writable and checks the
|
||||||
|
performance of the given path. Detects problems with the filesystem or
|
||||||
|
specific io performance.
|
||||||
|
|
||||||
|
EOF
|
||||||
|
exit;;
|
||||||
|
(-t|--tmpfile) shift; filename=$1;;
|
||||||
|
(*) if ! test -d $1; then
|
||||||
|
echo "ERROR: please specify an existing path, not $1" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
path+=( "$1" );;
|
||||||
|
esac
|
||||||
|
if test $# -lt 1; then
|
||||||
|
echo "ERROR: missing option, try $0 --help"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
if test ${#path[@]} -eq 0; then
|
||||||
|
echo "ERROR: missing path, try $0 --help"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
result=0
|
||||||
|
for file in "${path[@]}"; do
|
||||||
|
check=$(dd if=/dev/zero of=${file}/${filename} bs=2M count=2 iflag=fullblock 2>&1 | sed -n 's/.*, *\([^, ]*\) *\([^, ]*\), *\([^ ,]*\) *\([^, ]*\)/'"${file//\//\\/}"'; time=\1\2 speed=\3\4/p')
|
||||||
|
status=$?
|
||||||
|
if rm ${file}/${filename} 2> /dev/null && test $status -eq 0; then
|
||||||
|
status="OK - $check"
|
||||||
|
else
|
||||||
|
status="CRITICAL - ${file}"
|
||||||
|
result=2
|
||||||
|
fi
|
||||||
|
echo ${status}
|
||||||
|
done
|
||||||
|
exit $result
|
65
gluster-geo-replication.sh
Executable file
65
gluster-geo-replication.sh
Executable file
@@ -0,0 +1,65 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
while test $# -gt 0; do
|
||||||
|
case $1 in
|
||||||
|
(-h|--help) cat <<EOF
|
||||||
|
$0 [OPTIONS] source-volume target-volume target-user target-host
|
||||||
|
|
||||||
|
-h, --help show this help
|
||||||
|
|
||||||
|
source-volume replication source volume name
|
||||||
|
target-volume replication target volume name
|
||||||
|
target-user replication target user name
|
||||||
|
target-host replication target host name
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
|
||||||
|
Icinga plugin to check gluster geo replication. Returns OK, if no
|
||||||
|
connection is Faulty and one connection is Active. Returns CRITICAL,
|
||||||
|
if one connection is Faulty- Returns WARNING, if no connection is
|
||||||
|
Active.
|
||||||
|
|
||||||
|
EOF
|
||||||
|
exit;;
|
||||||
|
(*) if test $# -ne 4; then
|
||||||
|
echo "ERROR: wrong number of options $*" 1>&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
sourcevolume=$1
|
||||||
|
targetvolume=$2
|
||||||
|
replicationuser=$3
|
||||||
|
targethost=$4
|
||||||
|
break;;
|
||||||
|
esac
|
||||||
|
if test $# -lt 1; then
|
||||||
|
echo "ERROR: missing option, try $0 --help"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
if test $# -ne 4; then
|
||||||
|
echo "ERROR: wrong number of options $*" 1>&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
rep=$(sudo gluster volume geo-replication \
|
||||||
|
${sourcevolume} \
|
||||||
|
${replicationuser}@${targethost}::${targetvolume} \
|
||||||
|
status \
|
||||||
|
| awk 'BEGIN {status="OK"; res=""} NR>3 {res=res " - " $1 " → " $7} NR>3 && $7=="Faulty" {status="CRITICAL"} END {print status res}')
|
||||||
|
if test $? -ne 0; then
|
||||||
|
echo "CRITICAL - wrong configuration"
|
||||||
|
exit 2
|
||||||
|
elif [[ $rep =~ ^OK ]]; then
|
||||||
|
if [[ $rep =~ Active ]]; then
|
||||||
|
echo $rep
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo WARNING${rep#OK}
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo $rep
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
@@ -39,6 +39,7 @@ fi
|
|||||||
time=$(mktemp)
|
time=$(mktemp)
|
||||||
out=$(mktemp)
|
out=$(mktemp)
|
||||||
err=$(mktemp)
|
err=$(mktemp)
|
||||||
|
if /usr/bin/time -f "elapsed=%es" -qo $time mount | grep -q 'on '"$path"' '; then
|
||||||
if /usr/bin/time -f "elapsed=%es" -qo $time ls "$path" > $out 2> $err && test -s $out; then
|
if /usr/bin/time -f "elapsed=%es" -qo $time ls "$path" > $out 2> $err && test -s $out; then
|
||||||
if [[ $(<$time) =~ elapsed=0.0[012]s ]]; then
|
if [[ $(<$time) =~ elapsed=0.0[012]s ]]; then
|
||||||
level=0
|
level=0
|
||||||
@@ -52,10 +53,14 @@ else
|
|||||||
level=2
|
level=2
|
||||||
STATUS="CRITICAL - $(<$err):"
|
STATUS="CRITICAL - $(<$err):"
|
||||||
else
|
else
|
||||||
level=1
|
level=2
|
||||||
STATUS="CRITICAL - ${path} is empty:"
|
STATUS="CRITICAL - ${path} is empty:"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
level=2
|
||||||
|
STATUS="CRITICAL - ${path} not mounted:"
|
||||||
|
fi
|
||||||
echo ${STATUS} $(<$time)
|
echo ${STATUS} $(<$time)
|
||||||
rm $time $out $err 2>&1 > /dev/null
|
rm $time $out $err 2>&1 > /dev/null
|
||||||
exit $level
|
exit $level
|
||||||
|
Reference in New Issue
Block a user