CheckLsPath.ksh
#!/usr/bin/ksh
# CheckLsPath.ksh
# Checks:
#
# 1) For any vscsi connections that are in
# non-Enabled state.
#
# 2) That each disk has a path through
# each vscsi.
# - Jim Maher
#
function GatherAndMail
{
# This function is called once the
# main program finds an error. It
# gathers information about the system's
# lspath, and mails it out. After,
# it waits 5 minutes, and then sends
# out a new summary.
# Gather information to report now.
VAdapterCount=`lsdev -C | grep vscsi | wc -l | tr -d " "`
DiskCount=`lsdev -C | grep disk | wc -l | tr -d " "`
TotalPathCount=`lspath | wc -l | tr -d " "`
ActivePathCount=`lspath | grep Enabled | wc -l | tr -d " "`
InactivePathCount=`lspath | grep -v Enabled | wc -l | tr -d " "`
Subject="!!! lspath check error - inactive path: $InactivePathCount | total path: $TotalPathCount !!!"
Date=`date`
ErrptData=`/usr/bin/errpt | head -10`
LsPathResults=`lspath`
mail -s "$Subject" root << EOF
Errors were found when running the lspath
command. This report will run again in
5 minutes, and list the state then.
Reporting Time : $Date
----------------------
SUMMARY:
hostname : `hostname`
virtual adapters : $VAdapterCount
disks : $DiskCount
total paths : $TotalPathCount
active paths : $ActivePathCount
inactive paths : $InactivePathCount
schedule for run : `/usr/bin/crontab -l | grep -i CheckLsPath.ksh`
----------------------
LSPATH OUTPUT:
$LsPathResults
----------------------
ERROR REPORTER OUTPUT (last 10 lines):
$ErrptData
EOF
# sleep for 5 minutes (300 seconds), and
# report the data again.
sleep 300
# Mail the old data + the current data.
# New Inactive Paths:
NewInactivePathCount=`lspath | grep -v Enabled | wc -l | tr -d " "`
NewTotalPathCount=`lspath | wc -l | tr -d " "`
if (( NewInactivePathCount == 0 )); then
Subject="+++ lspath cheeck (recovery) - inactive path: $InactivePathCount | total path: $TotalPathCount +++"
else
Subject="!!! lspath check error (followup test) - inactive path: $InactivePathCount | total path: $TotalPathCount !!!"
fi
mail -s "$Subject" root << EOF
NEW DATA <<<<<<<<<<<<<<<<<<<<<
Reporting Time : `date`
----------------------
SUMMARY:
hostname : `hostname`
virtual adapters : `lsdev -C | grep vscsi | wc -l | tr -d " "`
disks : `lsdev -C | grep disk | wc -l | tr -d " "`
total paths : `lspath | wc -l | tr -d " "`
active paths : `lspath | grep Enabled | wc -l | tr -d " "`
inactive paths : `lspath | grep -v Enabled | wc -l | tr -d " "`
schedule for run : `/usr/bin/crontab -l | grep -i CheckLsPath.ksh`
----------------------
LSPATH OUTPUT:
`lspath`
----------------------
ERROR REPORTER OUTPUT (last 10 lines):
`/usr/bin/errpt | head -10`
OLD DATA FROM 5 MINUTES AGO <<<<<<<<<<<<
Reporting Time : `date`
----------------------
SUMMARY:
virtual adapters : $VAdapterCount
disks : $DiskCount
total paths : $TotalPathCount
active paths : $ActivePathCount
inactive paths : $InactivePathCount
----------------------
LSPATH OUTPUT:
$LsPathResults
----------------------
ERROR REPORTER OUTPUT (last 10 lines):
$ErrptData
EOF
# End of GatherAndMail
}
# Beginning On Main Code.
# Count the number of connections that are not
# Enabled.
NotEnabled=`lspath | grep -v ^Enabled | wc -l | awk '{print $1}'`
# If there are any, then return 2 to nagios.
if (( NotEnabled != 0 )); then
# Some connections were not enabled.
# Call function to gather the info, and report it.
GatherAndMail
return 2
fi
# Find the number of physical volumes.
Numlspv=`lspv | wc -l | awk -F" " '{print $1}'`
# Find the number of vio's.
NumVios=` lspath | awk -F" " '{print $3}' | sort -u | wc -l | awk -F" " '{print $1}'`
# Find the number of paths.
NumLsPaths=` lspath | wc -l | awk -F" " '{print $1}'`
if (( ( Numlspv * NumVios ) != NumLsPaths )); then
# There are missing paths. Send a non-zero
# back to nagios.
GatherAndMail
return 2
fi
# If you got to this point, then you everything
# checked out. Return 0 to nagios.
return 0