Friday, July 22, 2016

Nagios custom script checklist


#cd /usr/lib64/nagios/plugins
- create script with EXIT 0/1/2  ie "check_raid_disks"
#nano /etc/nagios/nrpe.cfg
- define command[check_raid_disks]=/usr/bin/sudo /usr/lib64/nagios/plugins/check_raid_disks
#/etc/init.d/nrpe restart

* please notice that command definition includes sudo, normally nrpe scripts do not require this, but here we will be running custom executable inside our script. If sudo is not used nrpe responds with "NRPE: Unable to read output". Sudo-ers workaroud follows:

#nano /etc/sudoers (add nrpe user rule, comment out TTY requirement)
   nrpe ALL=(ALL) NOPASSWD:/usr/lib64/nagios/plugins/check_raid_disks
   # Defaults    requiretty
#setenforce 0 (disable selinux)

Nagios server

#/usr/lib64/nagios/plugins/check_nrpe -H -c check_raid_disks
#nano /etc/nagios/servers/awsdb.cfg  (server name file)
- define service{
        use                             generic-service
        host_name                       awsdb
        service_description             Raid Disks
        check_command                   check_nrpe!check_raid_disks
#/etc/init.d/nagios restart

Example script


# Exit codes

# server awsDB - virtual drives (vd's) count: 3
# server awsDB - physical drives count: 8

hds=$(/root/bin/MegaCli64 -PDList -aALL | grep state | grep Online -c)
# echo $dds

# Check for missing parameters
if [[ -z "$hds" ]]; then
  echo "Script error, check setup, environment or parameters"
  exit 2
if [[ "$hds" = "8" ]]; then
  echo "OK - virtual disk count: $hds"
  exit 0

if [[ "$hds" -lt "8" ]]; then
  echo "CRITICAL - not all disks are Online: $hds/8"
  exit 2

echo "WARNING - Script error"
exit 1

Nagios debug command notes

#/usr/lib64/nagios/plugins/check_nrpe -H -c check_cpu
#nagios -v /etc/nagios/nagios.cfg

No comments: