telegraf 学习三 telegra inputs.net_response + smtp2http+ grafana 进行tcp服务状态监控

以下演示一个简单的使用telegra inputs.net_response 进行tcp 服务状态的监控,统计集成grafana 的alert
为了方便使用了一个smtp2http 的服务,对于http 的webhook 使用benthos

环境准备

项目使用docker-compose 运行,配置通过本地数据卷挂载方式,运行的服务有点多,但是关于influxdb 的基本都包含了

  • docker-compose 文件
 
version: "3"
services:
    grafana:
     image: grafana/grafana
     ports:
     - "3000:3000"
     volumes: 
     - "./defaults.ini:/usr/share/grafana/conf/defaults.ini"
    web:
      image: openresty/openresty:alpine
      ports: 
      - "80:80"
    influxdb:
      image: influxdb
      ports: 
      - "8086:8086"
    telegraf:
      image: telegraf
      volumes: 
      - "./telegraf.conf:/etc/telegraf/telegraf.conf"
    chronograf:
      image: chronograf
      ports:
      - "8888:8888"
      command: --influxdb-url=http://influxdb:8086
    kapacitor:
      image: kapacitor
      volumes: 
      - "./kapacitor.conf:/etc/kapacitor/kapacitor.conf"
      environment:
      - "KAPACITOR_INFLUXDB_0_URLS_0=http://influxdb:8086"
      ports:
      - "9092:9092"
    benthos:
      image: jeffail/benthos
      volumes:
      - "./conf/webhook.yaml:/benthos.yaml"
      ports:
      - "4195:4195"
    smtp2http:
      image: dalongrong/smtp2http
      command: --listen=:25 --webhook=http://benthos:4195/ --strict=false
  • telegraf 配置文件
    telegraf.conf
 
[global_tags]
[agent]
  ## Default data collection interval for all inputs
  interval = "10s"
  round_interval = true
  metric_batch_size = 1000
  metric_buffer_limit = 10000
  collection_jitter = "0s"
  flush_interval = "10s"
  flush_jitter = "0s"
  precision = ""
  hostname = ""
  ## If set to true, do no set the "host" tag in the telegraf agent.
  omit_hostname = false
# Configuration for sending metrics to InfluxDB
[[outputs.influxdb]]
  urls = ["http://influxdb:8086"]
# Read metrics about cpu usage
[[inputs.cpu]]
  ## Whether to report per-cpu stats or not
  percpu = true
  ## Whether to report total system cpu stats or not
  totalcpu = true
  ## If true, collect raw CPU time metrics.
  collect_cpu_time = false
  ## If true, compute and report the sum of all non-idle CPU states.
  report_active = false
# Read metrics about memory usage
[[inputs.mem]]
  # no configuration
[[inputs.net_response]]
  protocol = "tcp"
  # 本次的核心,使用net_response 进行web 服务的状态检测,这个是基于tcp 的,实际上还有基于http_response 的input 插件
  address = "web:80"
  • grafana 配置

    主要添加了关于smtp 的配置,使用的是smtp2http 的服务
    defaults.ini 文件,配置挺多,但是只需要关注smtp 的

 
##################### Grafana Configuration Defaults #####################
#
# Do not modify this file in grafana installs
#
# possible values : production, development
app_mode = production
# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty
instance_name = ${HOSTNAME}
#################################### Paths ###############################
[paths]
# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used)
data = data
# Temporary files in `data` directory older than given duration will be removed
temp_data_lifetime = 24h
# Directory where grafana can store logs
logs = data/log
# Directory where grafana will automatically scan and look for plugins
plugins = data/plugins
# folder that contains provisioning config files that grafana will apply on startup and while running.
provisioning = conf/provisioning
#################################### Server ##############################
[server]
# Protocol (http, https, socket)
protocol = http
# The ip address to bind to, empty will bind to all interfaces
http_addr =
# The http port to use
http_port = 3000
# The public facing domain name used to access grafana from a browser
domain = localhost
# Redirect to correct domain if host header does not match domain
# Prevents DNS rebinding attacks
enforce_domain = false
# The full public facing url
root_url = %(protocol)s://%(domain)s:%(http_port)s/
# Log web requests
router_logging = false
# the path relative working path
static_root_path = public
# enable gzip
enable_gzip = false
# https certs & key file
cert_file =
cert_key =
# Unix socket path
socket = /tmp/grafana.sock
#################################### Database ############################
[database]
# You can configure the database connection by specifying type, host, name, user and password
# as separate properties or as on string using the url property.
# Either "mysql", "postgres" or "sqlite3", it's your choice
type = sqlite3
host = 127.0.0.1:3306
name = grafana
user = root
# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;"""
password =
# Use either URL or the previous fields to configure the database
# Example: mysql://user:secret@host:port/database
url =
# Max idle conn setting default is 2
max_idle_conn = 2
# Max conn setting default is 0 (mean not set)
max_open_conn =
# Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours)
conn_max_lifetime = 14400
# Set to true to log the sql calls and execution times.
log_queries =
# For "postgres", use either "disable", "require" or "verify-full"
# For "mysql", use either "true", "false", or "skip-verify".
ssl_mode = disable
ca_cert_path =
client_key_path =
client_cert_path =
server_cert_name =
# For "sqlite3" only, path relative to data_path setting
path = grafana.db
# For "sqlite3" only. cache mode setting used for connecting to the database
cache_mode = private
#################################### Cache server #############################
[remote_cache]
# Either "redis", "memcached" or "database" default is "database"
type = database
# cache connectionstring options
# database: will use Grafana primary database.
# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=0`. Only addr is required.
# memcache: 127.0.0.1:11211
connstr =
#################################### Data proxy ###########################
[dataproxy]
# This enables data proxy logging, default is false
logging = false
# How long the data proxy should wait before timing out default is 30 (seconds)
timeout = 30
# If enabled and user is not anonymous, data proxy will add X-Grafana-User header with username into the request, default is false.
send_user_header = false
#################################### Analytics ###########################
[analytics]
# Server reporting, sends usage counters to stats.grafana.org every 24 hours.
# No ip addresses are being tracked, only simple counters to track
# running instances, dashboard and error counts. It is very helpful to us.
# Change this option to false to disable reporting.
reporting_enabled = true
# Set to false to disable all checks to https://grafana.com
# for new versions (grafana itself and plugins), check is used
# in some UI views to notify that grafana or plugin update exists
# This option does not cause any auto updates, nor send any information
# only a GET request to https://grafana.com to get latest versions
check_for_updates = true
# Google Analytics universal tracking code, only enabled if you specify an id here
google_analytics_ua_id =
# Google Tag Manager ID, only enabled if you specify an id here
google_tag_manager_id =
#################################### Security ############################
[security]
# default admin user, created on startup
admin_user = admin
# default admin password, can be changed before first start of grafana, or in profile settings
admin_password = admin
# used for signing
secret_key = SW2YcwTIb9zpOOhoPsMm
# disable gravatar profile images
disable_gravatar = false
# data source proxy whitelist (ip_or_domain:port separated by spaces)
data_source_proxy_whitelist =
# disable protection against brute force login attempts
disable_brute_force_login_protection = false
# set to true if you host Grafana behind HTTPS. default is false.
cookie_secure = false
# set cookie SameSite attribute. defaults to `lax`. can be set to "lax", "strict" and "none"
cookie_samesite = lax
# set to true if you want to allow browsers to render Grafana in a <frame>, <iframe>, <embed> or <object>. default is false.
allow_embedding = false
# Set to true if you want to enable http strict transport security (HSTS) response header.
# This is only sent when HTTPS is enabled in this configuration.
# HSTS tells browsers that the site should only be accessed using HTTPS.
# The default will change to true in the next minor release, 6.3.
strict_transport_security = false
# Sets how long a browser should cache HSTS. Only applied if strict_transport_security is enabled.
strict_transport_security_max_age_seconds = 86400
# Set to true if to enable HSTS preloading option. Only applied if strict_transport_security is enabled.
strict_transport_security_preload = false
# Set to true if to enable the HSTS includeSubDomains option. Only applied if strict_transport_security is enabled.
strict_transport_security_subdomains = false
# Set to true to enable the X-Content-Type-Options response header.
# The X-Content-Type-Options response HTTP header is a marker used by the server to indicate that the MIME types advertised
# in the Content-Type headers should not be changed and be followed. The default will change to true in the next minor release, 6.3.
x_content_type_options = false
# Set to true to enable the X-XSS-Protection header, which tells browsers to stop pages from loading
# when they detect reflected cross-site scripting (XSS) attacks. The default will change to true in the next minor release, 6.3.
x_xss_protection = false
#################################### Snapshots ###########################
[snapshots]
# snapshot sharing options
external_enabled = true
external_snapshot_url = https://snapshots-origin.raintank.io
external_snapshot_name = Publish to snapshot.raintank.io
# remove expired snapshot
snapshot_remove_expired = true
#################################### Dashboards ##################
[dashboards]
# Number dashboard versions to keep (per dashboard). Default: 20, Minimum: 1
versions_to_keep = 20
#################################### Users ###############################
[users]
# disable user signup / registration
allow_sign_up = false
# Allow non admin users to create organizations
allow_org_create = false
# Set to true to automatically assign new users to the default organization (id 1)
auto_assign_org = true
# Set this value to automatically add new users to the provided organization (if auto_assign_org above is set to true)
auto_assign_org_id = 1
# Default role new users will be automatically assigned (if auto_assign_org above is set to true)
auto_assign_org_role = Viewer
# Require email validation before sign up completes
verify_email_enabled = false
# Background text for the user field on the login page
login_hint = email or username
password_hint = password
# Default UI theme ("dark" or "light")
default_theme = dark
# External user management
external_manage_link_url =
external_manage_link_name =
external_manage_info =
# Viewers can edit/inspect dashboard settings in the browser. But not save the dashboard.
viewers_can_edit = false
# Editors can administrate dashboard, folders and teams they create
editors_can_admin = false
[auth]
# Login cookie name
login_cookie_name = grafana_session
# The lifetime (days) an authenticated user can be inactive before being required to login at next visit. Default is 7 days.
login_maximum_inactive_lifetime_days = 7
# The maximum lifetime (days) an authenticated user can be logged in since login time before being required to login. Default is 30 days.
login_maximum_lifetime_days = 30
# How often should auth tokens be rotated for authenticated users when being active. The default is each 10 minutes.
token_rotation_interval_minutes = 10
# Set to true to disable (hide) the login form, useful if you use OAuth
disable_login_form = false
# Set to true to disable the signout link in the side menu. useful if you use auth.proxy
disable_signout_menu = false
# URL to redirect the user to after sign out
signout_redirect_url =
# Set to true to attempt login with OAuth automatically, skipping the login screen.
# This setting is ignored if multiple OAuth providers are configured.
oauth_auto_login = false
#################################### Anonymous Auth ######################
[auth.anonymous]
# enable anonymous access
enabled = false
# specify organization name that should be used for unauthenticated users
org_name = Main Org.
# specify role for unauthenticated users
org_role = Viewer
#################################### Github Auth #########################
[auth.github]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret = some_secret
scopes = user:email,read:org
auth_url = https://github.com/login/oauth/authorize
token_url = https://github.com/login/oauth/access_token
api_url = https://api.github.com/user
team_ids =
allowed_organizations =
#################################### GitLab Auth #########################
[auth.gitlab]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret = some_secret
scopes = api
auth_url = https://gitlab.com/oauth/authorize
token_url = https://gitlab.com/oauth/token
api_url = https://gitlab.com/api/v4
allowed_groups =
#################################### Google Auth #########################
[auth.google]
enabled = false
allow_sign_up = true
client_id = some_client_id
client_secret = some_client_secret
scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email
auth_url = https://accounts.google.com/o/oauth2/auth
token_url = https://accounts.google.com/o/oauth2/token
api_url = https://www.googleapis.com/oauth2/v1/userinfo
allowed_domains =
hosted_domain =
#################################### Grafana.com Auth ####################
# legacy key names (so they work in env variables)
[auth.grafananet]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret = some_secret
scopes = user:email
allowed_organizations =
[auth.grafana_com]
enabled = false
allow_sign_up = true
client_id = some_id
client_secret = some_secret
scopes = user:email
allowed_organizations =
#################################### Generic OAuth #######################
[auth.generic_oauth]
name = OAuth
enabled = false
allow_sign_up = true
client_id = some_id
client_secret = some_secret
scopes = user:email
email_attribute_name = email:primary
auth_url =
token_url =
api_url =
team_ids =
allowed_organizations =
tls_skip_verify_insecure = false
tls_client_cert =
tls_client_key =
tls_client_ca =
send_client_credentials_via_post = false
#################################### Basic Auth ##########################
[auth.basic]
enabled = true
#################################### Auth Proxy ##########################
[auth.proxy]
enabled = false
header_name = X-WEBAUTH-USER
header_property = username
auto_sign_up = true
ldap_sync_ttl = 60
whitelist =
headers =
#################################### Auth LDAP ###########################
[auth.ldap]
enabled = false
config_file = /etc/grafana/ldap.toml
allow_sign_up = true
# LDAP backround sync (Enterprise only)
sync_cron = @hourly
active_sync_enabled = false
#################################### SMTP / Emailing #####################
[smtp]
enabled = true
host = smtp2http:25
user = dalong@qq.com
# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;"""
password = dalongdemo
cert_file =
key_file =
skip_verify = true
from_address = dalong@qq.com
ehlo_identity =
[emails]
welcome_email_on_sign_up = false
templates_pattern = emails/*.html
#################################### Logging ##########################
[log]
# Either "console", "file", "syslog". Default is console and file
# Use space to separate multiple modes, e.g. "console file"
mode = console file
# Either "debug", "info", "warn", "error", "critical", default is "info"
level = info
# optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug
filters =
# For "console" mode only
[log.console]
level =
# log line format, valid options are text, console and json
format = console
# For "file" mode only
[log.file]
level =
# log line format, valid options are text, console and json
format = text
# This enables automated log rotate(switch of following options), default is true
log_rotate = true
# Max line number of single file, default is 1000000
max_lines = 1000000
# Max size shift of single file, default is 28 means 1 << 28, 256MB
max_size_shift = 28
# Segment log daily, default is true
daily_rotate = true
# Expired days of log file(delete after max days), default is 7
max_days = 7
[log.syslog]
level =
# log line format, valid options are text, console and json
format = text
# Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used.
network =
address =
# Syslog facility. user, daemon and local0 through local7 are valid.
facility =
# Syslog tag. By default, the process' argv[0] is used.
tag =
#################################### Usage Quotas ########################
[quota]
enabled = false
#### set quotas to -1 to make unlimited. ####
# limit number of users per Org.
org_user = 10
# limit number of dashboards per Org.
org_dashboard = 100
# limit number of data_sources per Org.
org_data_source = 10
# limit number of api_keys per Org.
org_api_key = 10
# limit number of orgs a user can create.
user_org = 10
# Global limit of users.
global_user = -1
# global limit of orgs.
global_org = -1
# global limit of dashboards
global_dashboard = -1
# global limit of api_keys
global_api_key = -1
# global limit on number of logged in users.
global_session = -1
#################################### Alerting ############################
[alerting]
# Disable alerting engine & UI features
enabled = true
# Makes it possible to turn off alert rule execution but alerting UI is visible
execute_alerts = true
# Default setting for new alert rules. Defaults to categorize error and timeouts as alerting. (alerting, keep_state)
error_or_timeout = alerting
# Default setting for how Grafana handles nodata or null values in alerting. (alerting, no_data, keep_state, ok)
nodata_or_nullvalues = no_data
# Alert notifications can include images, but rendering many images at the same time can overload the server
# This limit will protect the server from render overloading and make sure notifications are sent out quickly
concurrent_render_limit = 5
# Default setting for alert calculation timeout. Default value is 30
evaluation_timeout_seconds = 30
# Default setting for alert notification timeout. Default value is 30
notification_timeout_seconds = 30
# Default setting for max attempts to sending alert notifications. Default value is 3
max_attempts = 3
#################################### Explore #############################
[explore]
# Enable the Explore section
enabled = true
#################################### Internal Grafana Metrics ############
# Metrics available at HTTP API Url /metrics
[metrics]
enabled           = true
interval_seconds  = 10
#If both are set, basic auth will be required for the metrics endpoint.
basic_auth_username =
basic_auth_password =
# Send internal Grafana metrics to graphite
[metrics.graphite]
# Enable by setting the address setting (ex localhost:2003)
address =
prefix = prod.grafana.%(instance_name)s.
[grafana_net]
url = https://grafana.com
[grafana_com]
url = https://grafana.com
#################################### Distributed tracing ############
[tracing.jaeger]
# jaeger destination (ex localhost:6831)
address =
# tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2)
always_included_tag =
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
sampler_type = const
# jaeger samplerconfig param
# for "const" sampler, 0 or 1 for always false/true respectively
# for "probabilistic" sampler, a probability between 0 and 1
# for "rateLimiting" sampler, the number of spans per second
# for "remote" sampler, param is the same as for "probabilistic"
# and indicates the initial sampling rate before the actual one
# is received from the mothership
sampler_param = 1
#################################### External Image Storage ##############
[external_image_storage]
# You can choose between (s3, webdav, gcs, azure_blob, local)
provider =
[external_image_storage.s3]
bucket_url =
bucket =
region =
path =
access_key =
secret_key =
[external_image_storage.webdav]
url =
username =
password =
public_url =
[external_image_storage.gcs]
key_file =
bucket =
path =
[external_image_storage.azure_blob]
account_name =
account_key =
container_name =
[external_image_storage.local]
# does not require any configuration
[rendering]
# Options to configure external image rendering server like https://github.com/grafana/grafana-image-renderer
server_url =
callback_url =
[panels]
# here for to support old env variables, can remove after a few months
enable_alpha = false
disable_sanitize_html = false
[plugins]
enable_alpha = false
app_tls_skip_verify_insecure = false
[enterprise]
license_path =
 
  • benthos webhook
input:
  type: broker
  broker:
    inputs:
      - type: http_server
        http_server:
          path: /
        processors:
          - type: text
            text:
              operator: prepend
              value: "get email message: "
output:
  type: stdout
 

启动&&测试

  • 启动
 
docker-compose up -d
  • 添加grafanna influxdb

  • 报警通道配置

  • 倒入dashboard json 数据

  • dashboard 界面

  • alert 配置

  • 测试报警
    停止docker-compose 中web的nginx 服务
 
docker-compose stop  web

查看smtp2http 配置webhook 信息

docker-compose logs -f benthos

效果


重新启动web 服务

 
docker-compose start web
 

说明

以上是一个简单集成,实际上我们可以基于不通的input 插件,做好多方便的事情,项目里也包含了chronograf,kapacitor 集成,可以方便测试

参考资料

https://github.com/rongfengliang/influxdb_kapacitor_chronograf
https://github.com/influxdata/telegraf/blob/release-1.11/plugins/inputs/http_response/README.md
https://github.com/influxdata/telegraf/blob/release-1.11/plugins/inputs/net_response/README.md

原文地址:https://www.cnblogs.com/rongfengliang/p/11263608.html