diff options
author | Jonas Gunz <himself@jonasgunz.de> | 2021-09-30 23:32:45 +0200 |
---|---|---|
committer | Jonas Gunz <himself@jonasgunz.de> | 2021-09-30 23:32:45 +0200 |
commit | 536bd24438799a15686415b054f57676b053702d (patch) | |
tree | ac5b93d2a342166c3d2d11a867934ec725b7273e /plugins | |
parent | cf666ed055e1e76c5acf0d323d87438bfb8342d6 (diff) | |
download | monitoring_custom-536bd24438799a15686415b054f57676b053702d.tar.gz |
move
Diffstat (limited to 'plugins')
-rwxr-xr-x | plugins/check_ipmi | 75 | ||||
-rwxr-xr-x | plugins/check_jenkins_job | 29 | ||||
-rwxr-xr-x | plugins/check_mem | 51 | ||||
-rwxr-xr-x | plugins/check_pressure | 72 | ||||
-rwxr-xr-x | plugins/check_sensors | 30 | ||||
-rwxr-xr-x | plugins/check_snmp_if | 114 | ||||
-rwxr-xr-x | plugins/check_systemd | 25 |
7 files changed, 396 insertions, 0 deletions
diff --git a/plugins/check_ipmi b/plugins/check_ipmi new file mode 100755 index 0000000..b1223ee --- /dev/null +++ b/plugins/check_ipmi @@ -0,0 +1,75 @@ +#!/bin/bash + +#-I lanplus -U <User> -P <PW> -L User -H <Host> + +2>&2 > /dev/null which ipmitool || exit 3 + +# also replaces na with "" +function strip_string() { + local RETURN="" + [ "$1" = "na" ] || RETURN="$@" + echo "$RETURN" +} + +function float_compare() { + [ $# -ne 2 ] && return 1 + + (( $(echo "$1 > $2" | bc -l ) )) && return 0 + return 1 +} + +WARN=() +CRIT=() + +PERFDATA=() + +DATA="$(ipmitool $@ sensor list; exit $?)" +RET=$? + +if [ $RET -ne 0 ]; then + echo UNKOWN: IPMI error + echo "$DATA" + exit 3 +fi + +# Lower Non-Recoverable : na +# Lower Critical : 5.000 +# Lower Non-Critical : 10.000 +# Upper Non-Critical : 61.000 +# Upper Critical : 66.000 +# Upper Non-Recoverable : na +while IFS='|' read sensor value unit state lnr lc lw uw uc unr ; do + val="$( strip_string $value )" + [ -z $val ] && continue + + unt="$( strip_string $unit )" + [ -z "$unt" -o "$unt" = "discrete" ] && continue + + snsr="$( strip_string $sensor )" + + low_warn="$( strip_string $lw )" + low_crit="$( strip_string $lc )" + hi_warn="$( strip_string $uw )" + hi_crit="$( strip_string $uc )" + + if float_compare $val $hi_crit || float_compare $low_crit $val; then + CRIT+=("$snsr") + elif float_compare $val $hi_warn || float_compare $low_warn $val; then + WARN+=("$snsr") + fi + + PERFDATA+=("$snsr=$val;$low_warn${hi_warn:+:$hi_warn};$low_crit${hi_crit:+:$hi_crit}") +done <<< "$DATA" + +if [ ${#WARN[@]} -gt 0 ]; then + echo -n "IPMI WARNING: ${WARN[@]}" + RET=1 +elif [ ${#CRIT[@]} -gt 0 ]; then + echo -n "IPMI CRITICAL: ${CRIT[@]}" + RET=2 +else + echo -n "IPMI OK" + RET=0 +fi + +echo "|${PERFDATA[@]}" diff --git a/plugins/check_jenkins_job b/plugins/check_jenkins_job new file mode 100755 index 0000000..ab40f87 --- /dev/null +++ b/plugins/check_jenkins_job @@ -0,0 +1,29 @@ +#!/bin/bash + +# Usage: check_jenkins_job <jenkins-url> <view> <job> <curl opts ...> + +if [ "$#" -lt 3 ]; then + echo "Usage: $0 <jenkins-url> <view> <job> <curl opts ...>" + exit 3 +fi + +JENKINS_URL=$1 +VIEW=$2 +JOB=$3 +PASS=$5 +USER=$4 + +shift 5 + +CURL_OUT=$( curl -s --basic -u "$USER:$PASS" $@ $JENKINS_URL/view/$VIEW/job/$JOB/lastBuild/api/json ) + +RESULT=$( jq -r .result <<< $CURL_OUT ) +ID=$( jq -r .displayName <<< $CURL_OUT ) + +if [ "$RESULT" = "SUCCESS" ]; then + echo JOB OK: $JOB $ID succeeded + exit 0 +fi + +echo JOB CRITICAL: $JOB $ID failed +exit 2 diff --git a/plugins/check_mem b/plugins/check_mem new file mode 100755 index 0000000..09b4ba8 --- /dev/null +++ b/plugins/check_mem @@ -0,0 +1,51 @@ +#!/bin/bash + +if [ $# -ne 2 ]; then + cat << EOF +Check memory usage. Values: used% +USAGE: $0 <WARN> <CRIT> +Note: USED=TOTAL-AVAILABLE +EOF + exit 3 +fi + +WARN=$1 +CRIT=$2 + +if ! which free > /dev/null; then + echo UNKNOWN: free command not found. + exit 3 +fi + +TOTAL=0 +USED=0 +AVAIL=0 + +while read line; do + ARR=( $line ) + [ ${ARR[0]} != "Mem:" ] && continue + + TOTAL=${ARR[1]} + AVAIL=${ARR[6]} + USED=$(($TOTAL-$AVAIL)) + + break +done <<< $(free | tail -n -2) + +PERCENT_USED=$(echo "scale=4; $USED / $TOTAL * 100 " | bc -l) +PERCENT_USED=${PERCENT_USED%00} +PERCENT_USED_INT=${PERCENT_USED%.*} + +if [ $PERCENT_USED_INT -ge $CRIT ]; then + RET=2 + echo -n "Memory CRITICAL: " +elif [ $PERCENT_USED_INT -ge $WARN ]; then + RET=1 + echo -n "Memory WARNING: " +else + RET=0 + echo -n "Memory OK: " +fi + +echo "$PERCENT_USED% Used | percent_used=$PERCENT_USED% total=$TOTAL available=$AVAIL" +exit $RET diff --git a/plugins/check_pressure b/plugins/check_pressure new file mode 100755 index 0000000..787827c --- /dev/null +++ b/plugins/check_pressure @@ -0,0 +1,72 @@ +#!/bin/bash + +#USAGE: check_pressure cpu|io|memory some|full [--10 w,c] [--60 w,c] [--300 w,c] + +RESULTS=(OK WARNING CRITICAL UNKNOWN) + +fail() { + local RET=$1 + shift + echo "$@" + exit $RET +} + +float_compare() { + [ $# -ne 2 ] && return 1 + + (( $(echo "$1 > $2" | bc -l ) )) && return 0 + return 1 +} + +# pressure is a relatively new feature. Ignore if not present. +[ ! -e "/proc/pressure" ] && fail 0 pressure API not present + +RESOURCE="$1" +[ ! -e "/proc/pressure/$RESOURCE" ] && fail 3 invalid resource \""$RESOURCE"\" +shift + +( echo "$1" | grep "^some$\|^full$" > /dev/null ) \ + || fail 3 \""$1"\" invalid. must be \"some\" or \"full\" +MODE="$1" + +while shift; do + case "$1" in + "--10") + VAL[0]="$2" + shift;; + "--60") + VAL[1]="$2" + shift;; + "--300") + VAL[2]="$2" + shift;; + "");; + *) + fail 3 "Unkown argument: \"$1\"" + ;; + esac +done + +[ "${#VAL[@]}" -eq 0 ] && fail 3 "At least one set of threshholds must be set" + +read MD avg10 avg60 avg300 catch < <(grep "$MODE" "/proc/pressure/$RESOURCE") \ + || fail 3 "Parsing of file failed. Does $RESOURCE support $MODE?" + +AVG=( "$avg10" "$avg60" "$avg300" ) +WARNING_OFFENDERS=() +CRITICAL_OFFENDERS=() + +for i in 0 1 2; do + [ -z "${VAL[$i]}" ] && continue + IFS=',' read WARN CRIT <<< "${VAL[$i]}" + IFS='=' read KEY VALUE <<< "${AVG[$i]}" + + float_compare "$VALUE" "$CRIT" && CRITICAL_OFFENDERS+=("$KEY($VALUE)=C") && continue + float_compare "$VALUE" "$WARN" && WARNING_OFFENDERS+=("$KEY($VALUE)=W") +done + +WORST=0 +[ ${#WARNING_OFFENDERS[@]} -gt 0 ] && WORST=1 +[ ${#CRITICAL_OFFENDERS[@]} -gt 0 ] && WORST=2 + +echo "PRESSURE ${RESULTS[$WORST]}: ${CRITICAL_OFFENDERS[@]} ${WARNING_OFFENDERS[@]} | ${AVG[@]}" diff --git a/plugins/check_sensors b/plugins/check_sensors new file mode 100755 index 0000000..8cb1e20 --- /dev/null +++ b/plugins/check_sensors @@ -0,0 +1,30 @@ +#!/bin/bash + +# °C +WARN=75 +CRIT=85 + +PERFDATA="|" +IS_WARN="false" +IS_CRIT="false" + +TEXT=(OK WARNING CRITICAL UNKOWN) +LEVEL=0 + +DATA=( $(sensors | \ + grep "^Core" | \ + sed -n "s|^Core\s[0-9]\+:\s\++\([0-9]\+\.[0-9]\+\).*$|\1|p" ) ) + +for (( i=0; i<${#DATA[@]}; i++ )); do + (( $(echo "${DATA[$i]} > $WARN" | bc -l ) )) && IS_WARN="true" + (( $(echo "${DATA[$i]} > $CRIT" | bc -l ) )) && IS_CRIT="true" + PERFDATA+=" core_$i=${DATA[$i]}C" +done + +[ $IS_WARN = true ] && LEVEL=1 +[ $IS_CRIT = true ] && LEVEL=2 +[ ${#DATA[@]} -eq 0 ] && LEVEL=3 + +echo "sensors ${TEXT[$LEVEL]}$PERFDATA" + +exit $LEVEL diff --git a/plugins/check_snmp_if b/plugins/check_snmp_if new file mode 100755 index 0000000..5d56ae3 --- /dev/null +++ b/plugins/check_snmp_if @@ -0,0 +1,114 @@ +#!/bin/bash + +# https://ixnfo.com/en/snmp-oid-and-mib-for-interfaces.html +# https://oidref.com/1.3.6.1.2.1.2.2.1 +# https://bestmonitoringtools.com/mibdb/mibdb_search.php?mib=IF-MIB + +RESULTS=(OK WARNING CRITICAL UNKNOWN) + +readonly MIB_IFOPERSTATUS="1.3.6.1.2.1.2.2.1.8" +readonly MIB_ALIAS="1.3.6.1.2.1.31.1.1.1.18" +readonly MIB_NAME="1.3.6.1.2.1.31.1.1.1.1" +readonly MIB_IN="1.3.6.1.2.1.31.1.1.1.6" +readonly MIB_OUT="1.3.6.1.2.1.31.1.1.1.10" +readonly MIB_ERR_IN="1.3.6.1.2.1.2.2.1.14" +readonly MIB_ERR_OUT="1.3.6.1.2.1.2.2.1.20" + +perror() { + echo "$@" +} >&2 + +fail() { + local RET=$1 + shift + perror "$@" + exit $RET +} + +result() { + local RET=$1 + shift + echo -n "SNMP IF ${RESULTS[$RET]}: $@" + perfdata + exit $RET +} + +walk() { + snmpwalk $AUTH "$HOSTNAME" "$1" + return $? +} + +extract_val() { + sed -n 's/.*=\(.*\)/\1/p' | sed -n 's/.*:\s*\(.*\)/\1/p' | tr -d "\"'" +} + +perfdata() { + [ $GENPERFDATA -eq 0 ] && return + + echo -n "| " + echo -n "in=$INOCTETS " + echo -n "out=$OUTOCTETS " + echo -n "in_err=$INERRORS " + echo -n "out_err=$OUTERRORS " + echo +} + +GENPERFDATA=0 + +while getopts ":H:c:i:u:p:" opt; do + case $opt in + H) + HOSTNAME="$OPTARG" + ;; + c) + COMMUNITY="$OPTARG" + ;; + i) + INTERFACE="$OPTARG" + ;; + u) + SNMP_USER="$OPTARG" + ;; + p) + SNMP_PW="$OPTARG" + ;; + :) + fail 3 "$OPTARG requires Argument." + ;; + *) + fail 3 "Invalid option $OPTARG" + esac +done + +AUTH="" +if [ -n "$COMMUNITY" ]; then + AUTH="-c $COMMUNITY -v 2c" +elif [ -n "$SNMP_USER" ] && [ -n "$SNMP_PW" ]; then + AUTH="-v 3 -u $SNMP_USER -A $SNMP_PW -l authNoPriv" +else + result 3 "No or insufficient authentication info provided" +fi + +SNMPWALK_RESULT=$(walk "$MIB_IFOPERSTATUS.$INTERFACE"; exit $?) +RET=$? + +[ $RET -ne 0 ] && result 3 "snmpwalk failed with code $RET: $SNMPWALK_RESULT" +[ -z "$SNMPWALK_RESULT" ] && result 2 "No matching entry found." + +GENPERFDATA=1 + +NAME=$(walk "$MIB_NAME.$INTERFACE" | extract_val) +ALIAS=$(walk "$MIB_ALIAS.$INTERFACE" | extract_val) + +COMPLETE_NAME="$NAME" +if [ -n "$ALIAS" ]; then COMPLETE_NAME+=" ($ALIAS)"; fi + +INOCTETS=$(walk "$MIB_IN.$INTERFACE" | extract_val) +OUTOCTETS=$(walk "$MIB_OUT.$INTERFACE" | extract_val) + +INERRORS=$(walk "$MIB_ERR_IN.$INTERFACE" | extract_val) +OUTERRORS=$(walk "$MIB_ERR_OUT.$INTERFACE" | extract_val) + +extract_val <<< "$SNMPWALK_RESULT" | grep "up\|1" > /dev/null && result 0 "$COMPLETE_NAME is up." + +result 2 "$COMPLETE_NAME $INTERFACE is not up: $SNMPWALK_RESULT" diff --git a/plugins/check_systemd b/plugins/check_systemd new file mode 100755 index 0000000..bf8d532 --- /dev/null +++ b/plugins/check_systemd @@ -0,0 +1,25 @@ +#!/bin/bash + +# USAGE: +# check_systemd [UNIT ...] + +FAILED=() + +for unit in "$@"; do + if ! systemctl is-active "$unit" > /dev/null; then + FAILED+=("$unit") + fi +done + +if [ ${#FAILED[@]} -gt 0 ]; then + echo "systemd CRITICAL: failed units: ${FAILED[@]}" + exit 2 +fi + +#if ! systemctl is-system-running > /dev/null; then +# echo systemd WARNING: System state degraded. +# exit 1 +#fi + +echo "systemd OK: $# units checked" +exit 0 |