CheckLsPath.ksh

#!/usr/bin/ksh

# CheckLsPath.ksh

# Checks:
#
# 1) For any vscsi connections that are in
# non-Enabled state.
#
# 2) That each disk has a path through
# each vscsi.

# - Jim Maher
# 

function GatherAndMail
{

  # This function is called once the
  # main program finds an error.  It
  # gathers information about the system's
  # lspath, and mails it out.  After,
  # it waits 5 minutes, and then sends
  # out a new summary.

  # Gather information to report now.

  VAdapterCount=`lsdev -C | grep vscsi | wc -l |  tr -d " "`
  DiskCount=`lsdev -C | grep disk | wc -l |  tr -d " "`
  TotalPathCount=`lspath | wc -l |  tr -d " "`
  ActivePathCount=`lspath | grep Enabled | wc -l |  tr -d " "`
  InactivePathCount=`lspath | grep -v Enabled | wc -l |  tr -d " "`
  Subject="!!! lspath check error - inactive path: $InactivePathCount | total path: $TotalPathCount !!!"
  Date=`date`
  ErrptData=`/usr/bin/errpt | head -10`

  LsPathResults=`lspath`

  mail -s "$Subject" root << EOF
Errors were found when running the lspath
command.  This report will run again in
5 minutes, and list the state then.

Reporting Time    : $Date
----------------------
SUMMARY:
hostname          : `hostname`
virtual adapters  : $VAdapterCount
disks             : $DiskCount
total paths       : $TotalPathCount
active paths      : $ActivePathCount
inactive paths    : $InactivePathCount
schedule for run  : `/usr/bin/crontab -l | grep -i CheckLsPath.ksh`
----------------------
LSPATH OUTPUT:
$LsPathResults
----------------------
ERROR REPORTER OUTPUT (last 10 lines):
$ErrptData

EOF

  # sleep for 5 minutes (300 seconds), and
  # report the data again.

  sleep 300
 
  # Mail the old data + the current data.

  # New Inactive Paths:
  NewInactivePathCount=`lspath | grep -v Enabled | wc -l |  tr -d " "`
  NewTotalPathCount=`lspath | wc -l |  tr -d " "`

  if (( NewInactivePathCount == 0 )); then

    Subject="+++ lspath cheeck (recovery) - inactive path: $InactivePathCount | total path: $TotalPathCount +++"
  else
    Subject="!!! lspath check error (followup test) - inactive path: $InactivePathCount | total path: $TotalPathCount !!!"
  fi

  mail -s "$Subject" root << EOF
NEW DATA <<<<<<<<<<<<<<<<<<<<<
Reporting Time    : `date`
----------------------
SUMMARY:
hostname          : `hostname`
virtual adapters  : `lsdev -C | grep vscsi | wc -l |  tr -d " "`
disks             : `lsdev -C | grep disk | wc -l |  tr -d " "`
total paths       : `lspath | wc -l |  tr -d " "`
active paths      : `lspath | grep Enabled | wc -l |  tr -d " "`
inactive paths    : `lspath | grep -v Enabled | wc -l |  tr -d " "`
schedule for run  : `/usr/bin/crontab -l | grep -i CheckLsPath.ksh`
----------------------
LSPATH OUTPUT:
`lspath`
----------------------
ERROR REPORTER OUTPUT (last 10 lines):
`/usr/bin/errpt | head -10`

OLD DATA FROM 5 MINUTES AGO <<<<<<<<<<<<
Reporting Time    : `date`
----------------------
SUMMARY:
virtual adapters  : $VAdapterCount
disks             : $DiskCount
total paths       : $TotalPathCount
active paths      : $ActivePathCount
inactive paths    : $InactivePathCount
----------------------
LSPATH OUTPUT:
$LsPathResults
----------------------
ERROR REPORTER OUTPUT (last 10 lines):
$ErrptData

EOF

  # End of GatherAndMail
}



# Beginning On Main Code.

# Count the number of connections that are not
# Enabled.
NotEnabled=`lspath | grep -v ^Enabled | wc -l | awk '{print $1}'`

# If there are any, then return 2 to nagios.
if (( NotEnabled != 0 )); then

  # Some connections were not enabled.

  # Call function to gather the info, and report it.

  GatherAndMail

  return 2

fi

# Find the number of physical volumes.
Numlspv=`lspv | wc -l | awk -F" " '{print $1}'`

# Find the number of vio's.
NumVios=` lspath | awk -F" " '{print $3}' | sort -u | wc -l | awk -F" " '{print $1}'`

# Find the number of paths.
NumLsPaths=` lspath | wc -l | awk -F" " '{print $1}'`

if (( ( Numlspv * NumVios ) != NumLsPaths )); then

  # There are missing paths.  Send a non-zero
  # back to nagios.

  GatherAndMail

  return 2

fi

# If you got to this point, then you everything
# checked out. Return 0 to nagios.

return 0