pvc/monitoring/munin/pvc

183 lines
4.9 KiB
Plaintext
Raw Normal View History

#!/bin/bash
# -*- sh -*-
: << =cut
=head1 NAME
pvc - Plugin to monitor a PVC cluster.
=head1 AUTHOR
Joshua Boniface <joshua@boniface.me>
=head1 LICENSE
GPLv3
=head1 BUGS
=back
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf
=cut
. "$MUNIN_LIBDIR/plugins/plugin.sh"
is_multigraph
2023-02-22 10:42:20 -05:00
warning=0.99
critical=1.99
export PVC_CLIENT_DIR="/run/shm/munin-pvc"
2024-10-30 12:53:29 -04:00
PVC_CMD="/usr/bin/sudo /usr/bin/pvc --quiet cluster status --format json-pretty"
JQ_CMD="/usr/bin/jq"
output_usage() {
echo "This plugin outputs information about a PVC cluster and node"
exit 0
}
output_autoconf() {
$PVC_CMD &>/dev/null
pvc_ret=$?
$JQ_CMD --version &>/dev/null
jq_ret=$?
if [[ ${pvc_ret} -eq 0 && ${jq_ret} -eq 0 ]]; then
echo "yes"
elif [[ ${pvc_ret} -ne 0 ]]; then
echo "no (no 'pvc' command found or local cluster not usable)"
elif [[ ${jq_ret} -ne 0 ]]; then
echo "no (no 'jq' command found)"
else
echo "no (generic failure)"
fi
}
output_config() {
echo 'multigraph pvc_cluster_health'
echo 'graph_title PVC Cluster Health'
echo 'graph_args --base 1000'
echo 'graph_vlabel Health%'
echo 'graph_category pvc'
echo 'graph_info Health of the PVC cluster'
echo 'pvc_cluster_health.label Cluster Health'
echo 'pvc_cluster_health.type GAUGE'
echo 'pvc_cluster_health.max 100'
echo 'pvc_cluster_health.min 0'
echo 'pvc_cluster_health.info Health of the PVC cluster in %'
echo 'multigraph pvc_cluster_alert'
echo 'graph_title PVC Cluster Alerting'
echo 'graph_args --base 1000'
echo 'graph_vlabel State'
echo 'graph_category pvc'
echo 'graph_info Alerting state of the PVC cluster health'
2023-02-16 16:06:00 -05:00
echo 'pvc_cluster_alert.label Cluster Health State'
echo 'pvc_cluster_alert.type GAUGE'
echo 'pvc_cluster_alert.max 2'
echo 'pvc_cluster_alert.min 0'
2023-02-16 16:06:00 -05:00
echo 'pvc_cluster_alert.info Alerting state of the PVC cluster health'
print_warning pvc_cluster_alert
print_critical pvc_cluster_alert
echo 'multigraph pvc_node_health'
echo 'graph_title PVC Node Health'
echo 'graph_args --base 1000'
echo 'graph_vlabel Health%'
echo 'graph_category pvc'
echo 'graph_info Health of the PVC node'
echo 'pvc_node_health.label Node Health'
echo 'pvc_node_health.type GAUGE'
echo 'pvc_node_health.max 100'
echo 'pvc_node_health.min 0'
echo 'pvc_node_health.info Health of the PVC node in %'
echo 'multigraph pvc_node_alert'
echo 'graph_title PVC Node Alerting'
echo 'graph_args --base 1000'
echo 'graph_vlabel State'
echo 'graph_category pvc'
echo 'graph_info Alerting state of the PVC node health'
2023-02-16 16:06:00 -05:00
echo 'pvc_node_alert.label Node Health State'
echo 'pvc_node_alert.type GAUGE'
echo 'pvc_node_alert.max 2'
echo 'pvc_node_alert.min 0'
2023-02-16 16:06:00 -05:00
echo 'pvc_node_alert.info Alerting state of the PVC node health'
print_warning pvc_node_alert
print_critical pvc_node_alert
exit 0
}
output_values() {
PVC_OUTPUT="$( $PVC_CMD )"
2023-02-16 16:06:00 -05:00
HOST="$( hostname --short )"
2023-02-17 16:17:46 -05:00
is_maintenance="$( $JQ_CMD ".maintenance" <<<"${PVC_OUTPUT}" | tr -d '"' )"
2023-02-16 16:06:00 -05:00
cluster_health="$( $JQ_CMD ".cluster_health.health" <<<"${PVC_OUTPUT}" | tr -d '"' )"
2024-10-30 12:53:29 -04:00
cluster_health_messages="$( $JQ_CMD -r ".cluster_health.messages | map(.text) | join(\", \")" <<<"${PVC_OUTPUT}" )"
echo 'multigraph pvc_cluster_health'
echo "pvc_cluster_health.value ${cluster_health}"
echo "pvc_cluster_health.extinfo ${cluster_health_messages}"
2023-02-16 16:06:00 -05:00
if [[ ${cluster_health} -le 50 && ${is_maintenance} == "false" ]]; then
cluster_health_alert=2
elif [[ ${cluster_health} -le 90 && ${is_maintenance} == "false" ]]; then
cluster_health_alert=1
else
cluster_health_alert=0
fi
echo 'multigraph pvc_cluster_alert'
2023-02-16 16:06:00 -05:00
echo "pvc_cluster_alert.value ${cluster_health_alert}"
node_health="$( $JQ_CMD ".node_health.${HOST}.health" <<<"${PVC_OUTPUT}" | tr -d '"' )"
2024-10-30 12:53:29 -04:00
node_health_messages="$( $JQ_CMD -r ".node_health.${HOST}.messages | join(\", \")" <<<"${PVC_OUTPUT}" )"
echo 'multigraph pvc_node_health'
echo "pvc_node_health.value ${node_health}"
echo "pvc_node_health.extinfo ${node_health_messages}"
2023-02-16 16:06:00 -05:00
2023-02-17 16:17:46 -05:00
if [[ ${node_health} -le 50 && ${is_maintenance} != "true" ]]; then
2023-02-16 16:06:00 -05:00
node_health_alert=2
2023-02-17 16:17:46 -05:00
elif [[ ${node_health} -le 90 && ${is_maintenance} != "true" ]]; then
2023-02-16 16:06:00 -05:00
node_health_alert=1
else
node_health_alert=0
fi
echo 'multigraph pvc_node_alert'
2023-02-16 16:06:00 -05:00
echo "pvc_node_alert.value ${node_health_alert}"
}
case $# in
0)
output_values
;;
1)
case $1 in
autoconf)
output_autoconf
;;
config)
output_config
;;
*)
output_usage
exit 1
;;
esac
;;
*)
output_usage
exit 1
esac