prometheus(docker)安装和报警 -- nginx域名监控

软件组件:
prometheus
alertmanager
prometheus-webhook-dingtalk

nginx-vts-exporter
nginx

(###--add-module=../nginx-module-vts/)


1、安装prometheus
2、安装alertmanager
3、nginx-vts-exporter

#cat docker-compose.yml
version: '2'
services:
alertmanager:
image: docker.io/prom/alertmanager
container_name: alertmanager
ports:
- 9093
volumes:
- /etc/prometheus/alertmanager.yml:/etc/prometheus/alertmanager.yml
network_mode: host
prometheus:
image: prom/prometheus
container_name: prometheus
ports:
- 9090
volumes:
- /etc/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- /data/prometheus-data:/prometheus-data
- /etc/prometheus/rules.yml:/etc/prometheus/rules.yml

network_mode: host

nginx-vts-exporter:
image: sophos/nginx-vts-exporter
container_name: nginx-vts-exporter
ports:
- 9913
environment:
- NGINX_STATUS=http://127.0.0.1:11111/vt-status/format/json
network_mode: host

##cat /etc/prometheus/prometheus.yml

# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: [ "127.0.0.1:9093"]

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/etc/prometheus/rules.yml"
# - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ['localhost:9090']
- job_name: 'prometheus_nginx'
static_configs:
- targets: ['127.0.0.1:9913']
- job_name: 'docker'
static_configs:
- targets: ['192.168.0.200:9999']

#cat /etc/prometheus/rules.yml
groups:

- name: test-rule
rules:
- alert: NodeFilesystemUsage
expr: (node_filesystem_size{device="rootfs"} - node_filesystem_free{device="rootfs"}) / node_filesystem_size{device="rootfs"} * 100 > 80
for: 2m
labels:
team: node
annotations:
summary: "{{$labels.instance}}: High Filesystem usage detected"
description: "{{$labels.instance}}: Filesystem usage is above 80% (current value is: {{ $value }}"

- alert: NodeMemoryUsage
expr: nginx_server_bytes > 100
for: 2m
labels:
team: node
annotations:
summary: "nginx_server_bytes too 100"
description: "{{$labels.instance}}: nginx_server_bytes (current value is: {{ $value }}"

#cat /etc/prometheus/alertmanager.yml
global:
resolve_timeout: 5m

route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 5m
receiver: 'send_to_dingding_webhook1'

receivers:
- name: 'send_to_dingding_webhook1'
webhook_configs:
- send_resolved: true
url: 'http://127.0.0.1:8060/dingtalk/webhook1/send'


inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']


#
nohup ./prometheus-webhook-dingtalk --web.listen-address=":8060" --ding.profile="webhook1=https://oapi.dingtalk.com/robot/send?access_token=682fdfb7c9b83670f32a0a1549187ff16fe36ebc2dcfab9747cd3122dbefa6dd" &

##启动
docker-compose up -d

4、安装nginx (略)
--user=nginx --group=nginx --prefix=/opt/app/nginx --with-http_v2_module --with-http_ssl_module --with-http_sub_module --with-http_stub_status_module --with-http_gzip_static_module --with-pcre --add-module=../nginx-module-vts/ --with-http_image_filter_module=dynamic

配置nginx.conf
http {
vhost_traffic_status_zone;

vhost_traffic_status_filter_by_host on;
...
}

server{

listen 11111;
server_name 127.0.0.1;
location /vt-status {
stub_status on;
access_log off;

}
location /vt-status {
vhost_traffic_status_display;
vhost_traffic_status_display_format html;
}

}

原文地址:https://www.cnblogs.com/Qing-840/p/9263487.html