Prometheus+Grafana+Altermanager监控告警(三)

Prometheus配置,对接k8s,自动发现集群资源, prometheus.yml

配置文件如下: 自己调整下标签匹配就行了,已在生产跑起来,没问题

global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  scrape_timeout:      15s
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).
rule_files:
- /prometheus_rules/*.rules
scrape_configs:
- job_name: prometheus
  static_configs:
  - targets:
    - localhost:9090
- job_name: 'host_discovery'
  file_sd_configs:
    - files:
      - "/host_discovery_data/*.json"
      refresh_interval: 3s

- job_name: "kube-state-metrics"
  scheme: https
  tls_config:
    insecure_skip_verify: true
  #使用apiserver授权部分解密的token值,以文件形式存储
  bearer_token_file: /data/xn-secret
  # k8s自动发现具体配置
  kubernetes_sd_configs:
  # 使用endpoint级别自动发现
  - role: endpoints
    api_server: "https://10.3.218.10:16443"
    tls_config:
      insecure_skip_verify: true
    bearer_token_file: /data/xn-secret
  relabel_configs:
  - source_labels: [__meta_kubernetes_service_name]
    # 只保留指定匹配正则的标签,不匹配则删除
    action: keep
    #regex: '^(kube-state-metrics)$'
    regex: '^(prometheus-operator-kube-state-metrics)$'
#  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
    # 只保留指定匹配正则的标签,不匹配则删除
#    action: keep
#    regex: true
  - source_labels: [__address__]
    action: replace
    target_label: instance
  - target_label: __address__
    # 使用replacement值替换__address__默认值
    replacement: 10.3.218.10:16443
  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name, __meta_kubernetes_pod_container_port_number]
    # 正则匹配
    regex: ([^;]+);([^;]+);([^;]+)
    # 使用replacement值替换__metrics_path__默认值
    target_label: __metrics_path__
    # 自行构建的apiserver proxy url
    replacement: /api/v1/namespaces/${1}/pods/http:${2}:${3}/proxy/metrics
  - action: labelmap
    regex: __meta_kubernetes_service_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    # 将标签__meta_kubernetes_namespace修改为kubernetes_namespace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_service_name]
    action: replace
    # 将标签__meta_kubernetes_service_name修改为service_name
    target_label: service_name

- job_name: "kube-node-exporter"
  scheme: https
  tls_config:
    insecure_skip_verify: true
  #使用apiserver授权部分解密的token值,以文件形式存储
  bearer_token_file: /data/xn-secret
  # k8s自动发现具体配置
  kubernetes_sd_configs:
  # 使用endpoint级别自动发现
  - role: endpoints
    api_server: "https://10.3.218.10:16443"
    tls_config:
      insecure_skip_verify: true
    bearer_token_file: /data/xn-secret
  relabel_configs:
  - source_labels: [__meta_kubernetes_service_name]
    # 只保留指定匹配正则的标签,不匹配则删除
    action: keep
    regex: '^(prometheus-operator-prometheus-node-exporter)$'
  - source_labels: [__address__]
    action: replace
    target_label: instance
  - target_label: __address__
    # 使用replacement值替换__address__默认值
    replacement: 10.3.218.10:16443
  - source_labels: [__meta_kubernetes_endpoint_node_name]
    # 正则匹配
    regex: (.+)
    # 使用replacement值替换__metrics_path__默认值
    target_label: __metrics_path__
    # 自行构建的apiserver proxy url
    replacement: /api/v1/nodes/${1}:9100/proxy/metrics
  - action: labelmap
    regex: __meta_kubernetes_service_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    # 将标签__meta_kubernetes_namespace修改为kubernetes_namespace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_service_name]
    action: replace
    # 将标签__meta_kubernetes_service_name修改为service_name
    target_label: service_name

#pods
#- job_name: "kube-pods"
#  scheme: https
#  tls_config:
#    insecure_skip_verify: true
#  #使用apiserver授权部分解密的token值,以文件形式存储
#  bearer_token_file: /data/xn-secret
#  # k8s自动发现具体配置
#  kubernetes_sd_configs:
#  # 使用endpoint级别自动发现
#  - role: pod
#    api_server: "https://10.3.218.10:16443"
#    tls_config:
#      insecure_skip_verify: true
#    bearer_token_file: /data/xn-secret
#  relabel_configs:
#  - source_labels: [__address__]
#    action: replace
#    target_label: instance
#  - target_label: __address__
#    # 使用replacement值替换__address__默认值
#    replacement: 10.3.218.10:16443
#  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name, __meta_kubernetes_pod_container_port_number]
#    # 正则匹配
#    regex: ([^;]+);([^;]+);([^;]+)
#    # 使用replacement值替换__metrics_path__默认值
#    target_label: __metrics_path__
#    # 自行构建的apiserver proxy url
#    replacement: /api/v1/namespaces/${1}/pods/http:${2}:${3}/proxy/metrics
#  - action: labelmap
#    regex: __meta_kubernetes_service_label_(.+)
#  - source_labels: [__meta_kubernetes_namespace]
#    action: replace
#    # 将标签__meta_kubernetes_namespace修改为kubernetes_namespace
#    target_label: kubernetes_namespace
#  - source_labels: [__meta_kubernetes_service_name]
#    action: replace
#    # 将标签__meta_kubernetes_service_name修改为service_name
#    target_label: service_name


# kubelet
- job_name: "kube-node-kubelet"
  scheme: https
  tls_config:
    insecure_skip_verify: true
  bearer_token_file: /data/xn-secret
  kubernetes_sd_configs:
  - role: node
    api_server: "https://10.3.218.10:16443"
    tls_config:
      insecure_skip_verify: true
    bearer_token_file: /data/xn-secret
  relabel_configs:
  - target_label: __address__
    # 使用replacement值替换__address__默认值
    replacement: 10.3.218.10:16443
  - source_labels: [__meta_kubernetes_node_name]
    regex: (.+)
    # 使用replacement值替换__metrics_path__默认值
    target_label: __metrics_path__
    replacement: /api/v1/nodes/${1}:10250/proxy/metrics
  - action: labelmap
    regex: __meta_kubernetes_service_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_service_name]
    action: replace
    target_label: service_name
  - source_labels: [__meta_kubernetes_node_address_InternalIP]
    separator: ;
    regex: (.*)
    target_label: IP
    replacement: $1
    action: replace

# advisor    
- job_name: "kube-node-cadvisor"
  scheme: https
  tls_config:
    insecure_skip_verify: true
  bearer_token_file: /data/xn-secret
  kubernetes_sd_configs:
  - role: node
    api_server: "https://10.3.218.10:16443"
    tls_config:
      insecure_skip_verify: true
    bearer_token_file: /data/xn-secret
  relabel_configs:
  - target_label: __address__
    # 使用replacement值替换__address__默认值
    replacement: 10.3.218.10:16443
  - source_labels: [__meta_kubernetes_node_name]
    regex: (.+)
    # 使用replacement值替换__metrics_path__默认值
    target_label: __metrics_path__
    replacement: /api/v1/nodes/${1}:10250/proxy/metrics/cadvisor
  - action: labelmap
    regex: __meta_kubernetes_service_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_service_name]
    action: replace
    target_label: service_name

alerting:
  alertmanagers:
    - static_configs:
      - targets: ['10.5.250.10:9093']
View Code

apiserver授权

# 1.创建serviceaccounts
kubectl create sa xn-prometheus
# 2.创建prometheus角色并对其绑定cluster-admin
kubectl create clusterrolebinding xn-prometheus --clusterrole cluster-admin --serviceaccount=default:xn-prometheus

kubectl集群配置和token;获取到server信息和tocken信息

[root@zy-master01-218011 ~]# kubectl config view
apiVersion: v1
clusters:
- cluster:
    certificate-authority-data: DATA+OMITTED
    server: https://10.3.218.10:16443
  name: kubernetes
contexts:
- context:
    cluster: kubernetes
    user: kubernetes-admin
  name: kubernetes-admin@kubernetes
current-context: kubernetes-admin@kubernetes
kind: Config
preferences: {}
users:
- name: kubernetes-admin
  user:
    client-certificate-data: REDACTED
    client-key-data: REDACTED

获取token(这个获取的是defalut的token,根据需要调整)

APISERVER=$(kubectl config view | grep server | cut -f 2- -d ":" | tr -d " ")
TOKEN=$(kubectl describe secret $(kubectl get secrets | grep default | cut -f1 -d ' ') | grep -E '^token' | cut -f2 -d':' | tr -d ' ')
curl $APISERVER/api --header "Authorization: Bearer $TOKEN" --insecure

将以上tocken配置到Prometheus中,需要用,本文配置到了/data/xn-secret

relabel的action类型

replace: 对标签和标签值进行替换。
keep: 满足特定条件的实例进行采集,其他的不采集。
drop: 满足特定条件的实例不采集,其他的采集。
hashmod: 这个我也没看懂啥意思,囧。
labelmap: 这个我也没看懂啥意思,囧。
labeldrop: 对抓取的实例特定标签进行删除。
labelkeep:  对抓取的实例特定标签进行保留,其他标签删除。

参考:https://www.jianshu.com/p/c21d399c140a

原文地址:https://www.cnblogs.com/litzhiai/p/15429783.html