Kubernetes集群资源监控

k8s监控方案

1、cadvisor+heapster+influxdb+grafana

cAdvisor(k8s node节点的kubelet已经集成了,暴露一个端口) <--容器中拿数据-- Heapster --汇总到--> InfluxDB <--拿数据展示--Grafana
缺点:只能支持监控容器资源,无法支持业务监控,扩展性较差

2、cadvisor/exporter+prometheus+grafana

总体流程: 数据采集-->汇总-->处理-->存储-->展示
1. pod监控:prometheus使用cadvisor采集容器监控指标,cadvisor集成在k8s的kubelet中--->通过prometheus进程存储--->使用grafana进行展现
2. node物理节点的监控:通过node_pxporter采集当前主机的资源--->通过prometheus进程存储--->使用grafana进行展现
3. master物理节点的监控:通过kube-state-metrics插件从k8s中获取到apiserver的相关数据--->通过prometheus进程存储--->使用grafana进行展现


kubernetes监控指标

集群监控:节点资源利用率、节点数、运行Pods
Pod监控:Kubernetes指标、容器指标、应用程序

1. kubernetes自身的监控
node的资源利用率-node节点上的cpu、内存、硬盘、链接
node的数量-node数量与资源利用率、业务负载的比例情况、成本、资源扩展的评估
pod的数量-当负载到一定程度时,node与pod的数量,评估负载到哪个阶段,大约需要多少服务器,每个pod的资源占用率如何,进行整体评估
资源对象状态-k8s在运行过程中,会创建很多pod,控制器,任务,这些内容都是由k8s中的资源对象进行维护,需要进行对资源对象的监控,获取资源对象的状态

2. pod监控
每个项目中pod的数量-正常的pod数量,有问题的pod数量
容器资源利用率-统计当前pod的资源利用率,统计pod中的容器资源利用率,cpu、网络、内存评估
应用程序-项目中的程序的自身情况,如并发,请求响应,项目用户数量,订单数等

官方文档
https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config

 

Heapster+InfluxDB+Grafana

#部署监控前 必须先部署DNS服务(上次已经部署了coredns)
[root@master01 Monitor]# kubectl get all -n kube-system
NAME                                        READY   STATUS    RESTARTS   AGE
pod/coredns-5c5d76fdbb-lnhfq                1/1     Running   0          8d
pod/kubernetes-dashboard-587699746d-4njgl   1/1     Running   0          8d

NAME                           TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)         AGE
service/kube-dns               ClusterIP   10.0.0.2     <none>        53/UDP,53/TCP   8d
service/kubernetes-dashboard   NodePort    10.0.0.153   <none>        443:30001/TCP   8d

NAME                                   READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/coredns                1/1     1            1           8d
deployment.apps/kubernetes-dashboard   1/1     1            1           8d

NAME                                              DESIRED   CURRENT   READY   AGE
replicaset.apps/coredns-5c5d76fdbb                1         1         1       8d
replicaset.apps/kubernetes-dashboard-587699746d   1         1         1       8d
[root@master01 Monitor]# kubectl apply -f 
grafana.yaml                          influxdb.yaml                         kubernetes-pod-statistics_rev1.json   
heapster.yaml                         kubernetes-node-statistics_rev1.json  
#部署heapster
[root@master01 Monitor]# cat heapster.yaml 
apiVersion: v1
kind: ServiceAccount  #为了有权限访问apiserver
metadata:
  name: heapster
  namespace: kube-system

---

kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  name: heapster
roleRef:
  kind: ClusterRole
  name: cluster-admin
  apiGroup: rbac.authorization.k8s.io
subjects:        #绑定集群角色
  - kind: ServiceAccount
    name: heapster
    namespace: kube-system

---

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: heapster
  namespace: kube-system
spec:
  replicas: 1
  template:
    metadata:
      labels:
        task: monitoring
        k8s-app: heapster
    spec:
      serviceAccountName: heapster
      containers:
      - name: heapster
        image: 10.192.27.111/library/heapster-amd64:v1.4.2 
        imagePullPolicy: IfNotPresent
        command:
        - /heapster
        - --source=kubernetes:https://kubernetes.default #kube-apiserver地址
        - --sink=influxdb:http://monitoring-influxdb:8086 #influxdb的地址  一定要DNS解析

---

apiVersion: v1
kind: Service
metadata:
  labels:
    task: monitoring
    kubernetes.io/cluster-service: 'true'
    kubernetes.io/name: Heapster
  name: heapster
  namespace: kube-system
spec:
  ports:
  - port: 80
    targetPort: 8082
  selector:
    k8s-app: heapster
[root@master01 Monitor]#                   
[root@master01 Monitor]# kubectl apply -f heapster.yaml 
serviceaccount/heapster created
clusterrolebinding.rbac.authorization.k8s.io/heapster created
deployment.extensions/heapster created
service/heapster created
#部署influxdb
[root@master01 Monitor]# cat influxdb.yaml 
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: monitoring-influxdb
  namespace: kube-system
spec:
  replicas: 1
  template:
    metadata:
      labels:
        task: monitoring
        k8s-app: influxdb
    spec:
      containers:
      - name: influxdb
        image: 10.192.27.111/library/heapster-influxdb-amd64:v1.1.1 
        volumeMounts:
        - mountPath: /data  #数据可以永久存储,这里只是临时存储
          name: influxdb-storage
      volumes:
      - name: influxdb-storage
        emptyDir: {}

---

apiVersion: v1
kind: Service
metadata:
  labels:
    task: monitoring
    kubernetes.io/cluster-service: 'true'
    kubernetes.io/name: monitoring-influxdb
  name: monitoring-influxdb
  namespace: kube-system
spec:
  ports:
  - port: 8086
    targetPort: 8086
  selector:
    k8s-app: influxdb
[root@master01 Monitor]# 
[root@master01 Monitor]# kubectl apply -f influxdb.yaml 
deployment.extensions/monitoring-influxdb created
service/monitoring-influxdb created
#部署grafana
[root@master01 Monitor]# cat grafana.yaml 
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: monitoring-grafana
  namespace: kube-system
spec:
  replicas: 1
  template:
    metadata:
      labels:
        task: monitoring
        k8s-app: grafana
    spec:
      containers:
      - name: grafana
        image: 10.192.27.111/library/heapster-grafana-amd64:v4.4.1 
        ports:
          - containerPort: 3000
            protocol: TCP
        volumeMounts:
        - mountPath: /var
          name: grafana-storage
        env:
        - name: INFLUXDB_HOST #设置环境变量 influxdb的域名
          value: monitoring-influxdb
        - name: GF_AUTH_BASIC_ENABLED
          value: "false"
        - name: GF_AUTH_ANONYMOUS_ENABLED
          value: "true"
        - name: GF_AUTH_ANONYMOUS_ORG_ROLE
          value: Admin
        - name: GF_SERVER_ROOT_URL
          value: /
      volumes:
      - name: grafana-storage
        emptyDir: {}

---

apiVersion: v1
kind: Service
metadata:
  labels:
    kubernetes.io/cluster-service: 'true'
    kubernetes.io/name: monitoring-grafana
  name: monitoring-grafana
  namespace: kube-system
spec:
  type: NodePort
  ports:
  - port : 80
    targetPort: 3000
  selector:
    k8s-app: grafana
[root@master01 Monitor]#
[root@master01 Monitor]# kubectl apply -f grafana.yaml 
deployment.extensions/monitoring-grafana created
service/monitoring-grafana created
#查看结果
[root@master01 Monitor]# kubectl get all -n kube-system
NAME                                        READY   STATUS    RESTARTS   AGE
pod/coredns-5c5d76fdbb-lnhfq                1/1     Running   0          8d
pod/heapster-6567dc64f4-44j29               1/1     Running   0          19s
pod/kubernetes-dashboard-587699746d-4njgl   1/1     Running   0          8d
pod/monitoring-grafana-6d7b7f5fd8-9xftz     1/1     Running   0          5s
pod/monitoring-influxdb-7875d7469c-8gxfn    1/1     Running   0          10s

NAME                           TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)         AGE
service/heapster               ClusterIP   10.0.0.189   <none>        80/TCP          19s
service/kube-dns               ClusterIP   10.0.0.2     <none>        53/UDP,53/TCP   8d
service/kubernetes-dashboard   NodePort    10.0.0.153   <none>        443:30001/TCP   8d
service/monitoring-grafana     NodePort    10.0.0.17    <none>        80:49268/TCP    5s  #访问端口
service/monitoring-influxdb    ClusterIP   10.0.0.233   <none>        8086/TCP        10s

NAME                                   READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/coredns                1/1     1            1           8d
deployment.apps/heapster               1/1     1            1           19s
deployment.apps/kubernetes-dashboard   1/1     1            1           8d
deployment.apps/monitoring-grafana     1/1     1            1           5s
deployment.apps/monitoring-influxdb    1/1     1            1           10s

NAME                                              DESIRED   CURRENT   READY   AGE
replicaset.apps/coredns-5c5d76fdbb                1         1         1       8d
replicaset.apps/heapster-6567dc64f4               1         1         1       19s
replicaset.apps/kubernetes-dashboard-587699746d   1         1         1       8d
replicaset.apps/monitoring-grafana-6d7b7f5fd8     1         1         1       5s
replicaset.apps/monitoring-influxdb-7875d7469c    1         1         1       10s
[root@master01 Monitor]# 
访问地址:http://10.192.27.115:49268

导入自己设计好的样式

 

 

 

上面好像看不了每个pod的详情 由于它的过滤规则问题,要将它清除掉

 

 

 

原文地址:https://www.cnblogs.com/linux985/p/11846860.html