监控Elasticsearch的插件【check_es_system】

监控Elasticsearch的插件推荐  强大的shell脚本

#!/bin/bash
################################################################################
# Script:       check_es_system.sh                                             #
# Author:       Claudio Kuenzler www.claudiokuenzler.com                       #
# Purpose:      Monitor ElasticSearch Store (Disk) Usage                       #
# Licence:      GPLv2                                                          #
# Licence :     GNU General Public Licence (GPL) http://www.gnu.org/           #
# This program is free software; you can redistribute it and/or                #
# modify it under the terms of the GNU General Public License                  #
# as published by the Free Software Foundation; either version 2               #
# of the License, or (at your option) any later version.                       #
#                                                                              #
# This program is distributed in the hope that it will be useful,              #
# but WITHOUT ANY WARRANTY; without even the implied warranty of               #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                #
#                                                                              #
# GNU General Public License for more details.                                 #
#                                                                              #
# You should have received a copy of the GNU General Public License            #
# along with this program; if not, see <https://www.gnu.org/licenses/>.        #
#                                                                              #
# Copyright 2016,2018,2019 Claudio Kuenzler                                    #
# Copyright 2018 Tomas Barton                                                  #
#                                                                              #
# History:                                                                     #
# 20160429: Started programming plugin                                         #
# 20160601: Continued programming. Working now as it should =)                 #
# 20160906: Added memory usage check, check types option (-t)                  #
# 20160906: Renamed plugin from check_es_store to check_es_system              #
# 20160907: Change internal referenced variable name for available size        #
# 20160907: Output now contains both used and available sizes                  #
# 20161017: Add missing -t in usage output                                     #
# 20180105: Fix if statement for authentication (@deric)                       #
# 20180105: Fix authentication when wrong credentials were used                #
# 20180313: Configure max_time for Elastic to respond (@deric)                 #
# 20190219: Fix alternative subject name in ssl (issue 4), direct to auth      #
# 20190220: Added status check type                                            #
# 20190403: Check for mandatory parameter checktype, adjust help               #
# 20190403: Catch connection refused error                                     #
################################################################################
#Variables and defaults
STATE_OK=0              # define the exit code if status is OK
STATE_WARNING=1         # define the exit code if status is Warning
STATE_CRITICAL=2        # define the exit code if status is Critical
STATE_UNKNOWN=3         # define the exit code if status is Unknown
export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path
version=1.3
port=9200
httpscheme=http
unit=G
warning=80
critical=95
max_time=30
################################################################################
#Functions
help () {
echo -e "$0 $version (c) 2016-$(date +%Y) Claudio Kuenzler and contributers (published below GPL licence)

Usage: ./check_es_system.sh -H ESNode [-P port] [-S] [-u user] [-p pass] -t checktype [-d int] [-o unit] [-w int] [-c int] [-m int]

Options:

   * -H Hostname or ip address of ElasticSearch Node
     -P Port (defaults to 9200)
     -S Use https
     -u Username if authentication is required
     -p Password if authentication is required
   * -t Type of check (disk|mem|status)
   + -d Available size of disk or memory (ex. 20)
     -o Disk space unit (K|M|G) (defaults to G)
     -w Warning threshold in percent (default: 80)
     -c Critical threshold in percent (default: 95)
     -m Maximum time in seconds to wait for response (default: 30)
     -h Help!

*mandatory options
+mandatory options for types disk,mem

Requirements: curl, jshon, expr"
exit $STATE_UNKNOWN;
}

authlogic () {
if [[ -z $user ]] && [[ -z $pass ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing username and password"; exit $STATE_UNKNOWN
elif [[ -n $user ]] && [[ -z $pass ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing password"; exit $STATE_UNKNOWN
elif [[ -n $pass ]] && [[ -z $user ]]; then echo "ES SYSTEM UNKNOWN - Missing username"; exit $STATE_UNKNOWN
fi
}

unitcalc() {
# ES presents the currently used disk space in Bytes
if [[ -n $unit ]]; then
  case $unit in
    K) availsize=$(expr $available * 1024); outputsize=$(expr ${size} / 1024);;
    M) availsize=$(expr $available * 1024 * 1024); outputsize=$(expr ${size} / 1024 / 1024);;
    G) availsize=$(expr $available * 1024 * 1024 * 1024); outputsize=$(expr ${size} / 1024 / 1024 / 1024);;
  esac
  if [[ -n $warning ]] ; then
    warningsize=$(expr $warning * ${availsize} / 100)
  fi
  if [[ -n $critical ]] ; then
    criticalsize=$(expr $critical * ${availsize} / 100)
  fi
  usedpercent=$(expr $size * 100 / $availsize)
else echo "UNKNOWN - Shouldnt exit here. No units given"; exit $STATE_UNKNOWN
fi
}

availrequired() {
if [ -z ${available} ]; then echo "UNKNOWN - Missing parameter '-d'"; exit $STATE_UNKNOWN; fi
}
################################################################################
# Check requirements
for cmd in curl jshon expr; do
 if ! `which ${cmd} 1>/dev/null`; then
   echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
   exit ${STATE_UNKNOWN}
 fi
done
################################################################################
# Check for people who need help - aren't we all nice ;-)
if [ "${1}" = "--help" -o "${#}" = "0" ]; then help; exit $STATE_UNKNOWN; fi
################################################################################
# Get user-given variables
while getopts "H:P:Su:p:d:o:w:c:t:m:" Input;
do
  case ${Input} in
  H)      host=${OPTARG};;
  P)      port=${OPTARG};;
  S)      httpscheme=https;;
  u)      user=${OPTARG};;
  p)      pass=${OPTARG};;
  d)      available=${OPTARG};;
  o)      unit=${OPTARG};;
  w)      warning=${OPTARG};;
  c)      critical=${OPTARG};;
  t)      checktype=${OPTARG};;
  m)      max_time=${OPTARG};;
  *)      help;;
  esac
done

# Check for mandatory opts
if [ -z ${host} ]; then help; exit $STATE_UNKNOWN; fi
if [ -z ${checktype} ]; then help; exit $STATE_UNKNOWN; fi
################################################################################
# Retrieve information from Elasticsearch
esurl="${httpscheme}://${host}:${port}/_cluster/stats"
eshealthurl="${httpscheme}://${host}:${port}/_cluster/health"
if [[ -z $user ]]; then 
  # Without authentication
  esstatus=$(curl -k -s --max-time ${max_time} $esurl)
  if [[ $? -eq 7 ]]; then
    echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
    exit $STATE_CRITICAL
  elif [[ $? -eq 28 ]]; then
    echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
    exit $STATE_CRITICAL
  fi
  # Additionally get cluster health infos
  if [ $checktype = status ]; then
    eshealth=$(curl -k -s --max-time ${max_time} $eshealthurl)
  fi
fi

if [[ -n $user ]] || [[ -n $(echo $esstatus | grep -i authentication) ]] ; then
  # Authentication required
  authlogic
  esstatus=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} $esurl)
  if [[ $? -eq 7 ]]; then
    echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
    exit $STATE_CRITICAL
  elif [[ $? -eq 28 ]]; then
    echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
    exit $STATE_CRITICAL
  elif [[ -n $(echo $esstatus | grep -i "unable to authenticate") ]]; then
    echo "ES SYSTEM CRITICAL - Unable to authenticate user $user for REST request"
    exit $STATE_CRITICAL
  fi
  # Additionally get cluster health infos
  if [[ $checktype = status ]]; then
    eshealth=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} $eshealthurl)
  fi
fi

# Catch empty reply from server (typically happens when ssl port used with http connection)
if [[ -z $esstatus ]] || [[ $esstatus = '' ]]; then
  echo "ES SYSTEM UNKNOWN - Empty reply from server (verify ssl settings)"
  exit $STATE_UNKNOWN
fi


# Do the checks
case $checktype in
disk) # Check disk usage
  availrequired
  size=$(echo $esstatus | jshon -e indices -e store -e "size_in_bytes")
  unitcalc
  if [ -n "${warning}" ] || [ -n "${critical}" ]; then
    # Handle tresholds
    if [ $size -ge $criticalsize ]; then
      echo "ES SYSTEM CRITICAL - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;${warningsize};${criticalsize};;"
      exit $STATE_CRITICAL
    elif [ $size -ge $warningsize ]; then
      echo "ES SYSTEM WARNING - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;${warningsize};${criticalsize};;"
      exit $STATE_WARNING
    else
      echo "ES SYSTEM OK - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;${warningsize};${criticalsize};;"
      exit $STATE_OK
    fi
  else
    # No thresholds
    echo "ES SYSTEM OK - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;;;;"
    exit $STATE_OK
  fi
  ;;

mem) # Check memory usage
  availrequired
  size=$(echo $esstatus | jshon -e nodes -e jvm -e mem -e "heap_used_in_bytes")
  unitcalc
  if [ -n "${warning}" ] || [ -n "${critical}" ]; then
    # Handle tresholds
    if [ $size -ge $criticalsize ]; then
      echo "ES SYSTEM CRITICAL - Memory usage is at ${usedpercent}% ($outputsize $unit) from $available $unit|es_memory=${size}B;${warningsize};${criticalsize};;"
      exit $STATE_CRITICAL
    elif [ $size -ge $warningsize ]; then
      echo "ES SYSTEM WARNING - Memory usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_memory=${size}B;${warningsize};${criticalsize};;"
      exit $STATE_WARNING
    else
      echo "ES SYSTEM OK - Memory usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_memory=${size}B;${warningsize};${criticalsize};;"
      exit $STATE_OK
    fi
  else
    # No thresholds
    echo "ES SYSTEM OK - Memory usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_memory=${size}B;;;;"
    exit $STATE_OK
  fi
  ;;

status) # Check Elasticsearch status
  status=$(echo $esstatus | jshon -e status -u)
  shards=$(echo $esstatus | jshon -e indices -e shards -e total -u)
  docs=$(echo $esstatus | jshon -e indices -e docs -e count -u)
  nodest=$(echo $esstatus | jshon -e nodes -e count -e total -u)
  nodesd=$(echo $esstatus | jshon -e nodes -e count -e data -u)
  relocating=$(echo $eshealth | jshon -e relocating_shards -u)
  init=$(echo $eshealth | jshon -e initializing_shards -u)
  unass=$(echo $eshealth | jshon -e unassigned_shards -u)
  if [ "$status" = "green" ]; then 
    echo "ES SYSTEM OK - Elasticsearch Cluster is green (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
    exit $STATE_OK
  elif [ "$status" = "yellow" ]; then
    echo "ES SYSTEM WARNING - Elasticsearch Cluster is yellow (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${relocating} relocating shards, ${init} initializing shards, ${unass} unassigned shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
      exit $STATE_WARNING
  elif [ "$status" = "red" ]; then
    echo "ES SYSTEM CRITICAL - Elasticsearch Cluster is red (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${relocating} relocating shards, ${init} initializing shards, ${unass} unassigned shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
      exit $STATE_CRITICAL
  fi  
  ;;

*) help
esac

转 

https://www.claudiokuenzler.com/monitoring-plugins/check_es_system.php

原文地址:https://www.cnblogs.com/uglyliu/p/10668909.html