183 lines
4.9 KiB
Plaintext
183 lines
4.9 KiB
Plaintext
|
#!/bin/bash
|
||
|
# -*- sh -*-
|
||
|
|
||
|
: << =cut
|
||
|
|
||
|
=head1 NAME
|
||
|
|
||
|
pvc - Plugin to monitor a PVC cluster.
|
||
|
|
||
|
=head1 AUTHOR
|
||
|
|
||
|
Joshua Boniface <joshua@boniface.me>
|
||
|
|
||
|
=head1 LICENSE
|
||
|
|
||
|
GPLv3
|
||
|
|
||
|
=head1 BUGS
|
||
|
|
||
|
=back
|
||
|
|
||
|
=head1 MAGIC MARKERS
|
||
|
|
||
|
#%# family=auto
|
||
|
#%# capabilities=autoconf
|
||
|
|
||
|
=cut
|
||
|
|
||
|
. "$MUNIN_LIBDIR/plugins/plugin.sh"
|
||
|
|
||
|
is_multigraph
|
||
|
|
||
|
warning=0.99
|
||
|
critical=1.99
|
||
|
|
||
|
export PVC_CLIENT_DIR="/run/shm/munin-pvc"
|
||
|
PVC_CMD="/usr/bin/pvc --quiet --cluster local status --format json-pretty"
|
||
|
JQ_CMD="/usr/bin/jq"
|
||
|
|
||
|
output_usage() {
|
||
|
echo "This plugin outputs information about a PVC cluster and node"
|
||
|
exit 0
|
||
|
}
|
||
|
|
||
|
output_autoconf() {
|
||
|
$PVC_CMD &>/dev/null
|
||
|
pvc_ret=$?
|
||
|
$JQ_CMD --version &>/dev/null
|
||
|
jq_ret=$?
|
||
|
|
||
|
if [[ ${pvc_ret} -eq 0 && ${jq_ret} -eq 0 ]]; then
|
||
|
echo "yes"
|
||
|
elif [[ ${pvc_ret} -ne 0 ]]; then
|
||
|
echo "no (no 'pvc' command found or local cluster not usable)"
|
||
|
elif [[ ${jq_ret} -ne 0 ]]; then
|
||
|
echo "no (no 'jq' command found)"
|
||
|
else
|
||
|
echo "no (generic failure)"
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
output_config() {
|
||
|
echo 'multigraph pvc_cluster_health'
|
||
|
echo 'graph_title PVC Cluster Health'
|
||
|
echo 'graph_args --base 1000'
|
||
|
echo 'graph_vlabel Health%'
|
||
|
echo 'graph_category pvc'
|
||
|
echo 'graph_info Health of the PVC cluster'
|
||
|
|
||
|
echo 'pvc_cluster_health.label Cluster Health'
|
||
|
echo 'pvc_cluster_health.type GAUGE'
|
||
|
echo 'pvc_cluster_health.max 100'
|
||
|
echo 'pvc_cluster_health.min 0'
|
||
|
echo 'pvc_cluster_health.info Health of the PVC cluster in %'
|
||
|
|
||
|
echo 'multigraph pvc_cluster_alert'
|
||
|
echo 'graph_title PVC Cluster Alerting'
|
||
|
echo 'graph_args --base 1000'
|
||
|
echo 'graph_vlabel State'
|
||
|
echo 'graph_category pvc'
|
||
|
echo 'graph_info Alerting state of the PVC cluster health'
|
||
|
|
||
|
echo 'pvc_cluster_alert.label Cluster Health State'
|
||
|
echo 'pvc_cluster_alert.type GAUGE'
|
||
|
echo 'pvc_cluster_alert.max 2'
|
||
|
echo 'pvc_cluster_alert.min 0'
|
||
|
echo 'pvc_cluster_alert.info Alerting state of the PVC cluster health'
|
||
|
print_warning pvc_cluster_alert
|
||
|
print_critical pvc_cluster_alert
|
||
|
|
||
|
echo 'multigraph pvc_node_health'
|
||
|
echo 'graph_title PVC Node Health'
|
||
|
echo 'graph_args --base 1000'
|
||
|
echo 'graph_vlabel Health%'
|
||
|
echo 'graph_category pvc'
|
||
|
echo 'graph_info Health of the PVC node'
|
||
|
|
||
|
echo 'pvc_node_health.label Node Health'
|
||
|
echo 'pvc_node_health.type GAUGE'
|
||
|
echo 'pvc_node_health.max 100'
|
||
|
echo 'pvc_node_health.min 0'
|
||
|
echo 'pvc_node_health.info Health of the PVC node in %'
|
||
|
|
||
|
echo 'multigraph pvc_node_alert'
|
||
|
echo 'graph_title PVC Node Alerting'
|
||
|
echo 'graph_args --base 1000'
|
||
|
echo 'graph_vlabel State'
|
||
|
echo 'graph_category pvc'
|
||
|
echo 'graph_info Alerting state of the PVC node health'
|
||
|
|
||
|
echo 'pvc_node_alert.label Node Health State'
|
||
|
echo 'pvc_node_alert.type GAUGE'
|
||
|
echo 'pvc_node_alert.max 2'
|
||
|
echo 'pvc_node_alert.min 0'
|
||
|
echo 'pvc_node_alert.info Alerting state of the PVC node health'
|
||
|
print_warning pvc_node_alert
|
||
|
print_critical pvc_node_alert
|
||
|
|
||
|
exit 0
|
||
|
}
|
||
|
|
||
|
output_values() {
|
||
|
PVC_OUTPUT="$( $PVC_CMD )"
|
||
|
HOST="$( hostname --short )"
|
||
|
|
||
|
is_maintenance="$( $JQ_CMD ".maintenance" <<<"${PVC_OUTPUT}" | tr -d '"' )"
|
||
|
|
||
|
cluster_health="$( $JQ_CMD ".cluster_health.health" <<<"${PVC_OUTPUT}" | tr -d '"' )"
|
||
|
cluster_health_messages="$( $JQ_CMD -r ".cluster_health.messages | @csv" <<<"${PVC_OUTPUT}" | tr -d '"' | sed 's/,/, /g' )"
|
||
|
echo 'multigraph pvc_cluster_health'
|
||
|
echo "pvc_cluster_health.value ${cluster_health}"
|
||
|
echo "pvc_cluster_health.extinfo ${cluster_health_messages}"
|
||
|
|
||
|
if [[ ${cluster_health} -le 50 && ${is_maintenance} == "false" ]]; then
|
||
|
cluster_health_alert=2
|
||
|
elif [[ ${cluster_health} -le 90 && ${is_maintenance} == "false" ]]; then
|
||
|
cluster_health_alert=1
|
||
|
else
|
||
|
cluster_health_alert=0
|
||
|
fi
|
||
|
echo 'multigraph pvc_cluster_alert'
|
||
|
echo "pvc_cluster_alert.value ${cluster_health_alert}"
|
||
|
|
||
|
node_health="$( $JQ_CMD ".node_health.${HOST}.health" <<<"${PVC_OUTPUT}" | tr -d '"' )"
|
||
|
node_health_messages="$( $JQ_CMD -r ".node_health.${HOST}.messages | @csv" <<<"${PVC_OUTPUT}" | tr -d '"' | sed 's/,/, /g' )"
|
||
|
echo 'multigraph pvc_node_health'
|
||
|
echo "pvc_node_health.value ${node_health}"
|
||
|
echo "pvc_node_health.extinfo ${node_health_messages}"
|
||
|
|
||
|
if [[ ${node_health} -le 50 && ${is_maintenance} != "true" ]]; then
|
||
|
node_health_alert=2
|
||
|
elif [[ ${node_health} -le 90 && ${is_maintenance} != "true" ]]; then
|
||
|
node_health_alert=1
|
||
|
else
|
||
|
node_health_alert=0
|
||
|
fi
|
||
|
echo 'multigraph pvc_node_alert'
|
||
|
echo "pvc_node_alert.value ${node_health_alert}"
|
||
|
}
|
||
|
|
||
|
case $# in
|
||
|
0)
|
||
|
output_values
|
||
|
;;
|
||
|
1)
|
||
|
case $1 in
|
||
|
autoconf)
|
||
|
output_autoconf
|
||
|
;;
|
||
|
config)
|
||
|
output_config
|
||
|
;;
|
||
|
*)
|
||
|
output_usage
|
||
|
exit 1
|
||
|
;;
|
||
|
esac
|
||
|
;;
|
||
|
*)
|
||
|
output_usage
|
||
|
exit 1
|
||
|
esac
|