现有k8s 1.8集群无法使用top命令,定位异常资源服务困难,需要安装metrics-server,经测试由于版本过老,集群本身kubelet未开启聚合功能,无法部署metrics-server,故部署弃用组件heapster,部署文件如下:
-
heapster-deployment.yaml
apiVersion: extensions/v1beta1 kind: Deployment metadata: name: heapster namespace: kube-system spec: replicas: 1 template: metadata: labels: task: monitoring k8s-app: heapster spec: serviceAccountName: heapster # 生产只有master可以访问kubelet服务,故将heapster调度到master运行 # 忽略masster污点 nodeSelector: "node-role.kubernetes.io/master": "true" tolerations: - key: node-role.kubernetes.io/master operator: Exists effect: NoSchedule containers: - name: heapster image: k8s.gcr.io/heapster-amd64:v1.4.3 imagePullPolicy: IfNotPresent command: - /heapster # kubelet未开启https 10255端口,手动添加采集源 - --source=kubernetes.summary_api:https://kubernetes.default.svc?kubeletHttps=true&kubeletPort=10250&insecure=true # 指定influxdb - --sink=influxdb:http://monitoring-influxdb-heapster:8086
-
heapster-rbac.yaml
apiVersion: v1 kind: ServiceAccount metadata: name: heapster namespace: kube-system --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1beta1 metadata: name: heapster subjects: - kind: ServiceAccount name: heapster namespace: kube-system roleRef: kind: ClusterRole name: cluster-admin apiGroup: rbac.authorization.k8s.io
-
heapster-service.yaml
apiVersion: v1 kind: Service metadata: labels: task: monitoring # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) # If you are NOT using this as an addon, you should comment out this line. kubernetes.io/cluster-service: 'true' kubernetes.io/name: Heapster name: heapster namespace: kube-system spec: ports: - port: 80 targetPort: 8082 selector: k8s-app: heapster
-
influxdb-cm.yaml
apiVersion: v1 kind: ConfigMap metadata: name: influxdb-config-heapster namespace: kube-system data: config.toml: | reporting-disabled = true bind-address = ":8088" [meta] dir = "/data/meta" retention-autocreate = true logging-enabled = true [data] dir = "/data/data" wal-dir = "/data/wal" query-log-enabled = true cache-max-memory-size = 1073741824 cache-snapshot-memory-size = 26214400 cache-snapshot-write-cold-duration = "10m0s" compact-full-write-cold-duration = "4h0m0s" max-series-per-database = 1000000 max-values-per-tag = 100000 trace-logging-enabled = false [coordinator] write-timeout = "10s" max-concurrent-queries = 0 query-timeout = "0s" log-queries-after = "0s" max-select-point = 0 max-select-series = 0 max-select-buckets = 0 [retention] enabled = true check-interval = "30m0s" [admin] enabled = true bind-address = ":8083" https-enabled = false https-certificate = "/etc/ssl/influxdb.pem" [shard-precreation] enabled = true check-interval = "10m0s" advance-period = "30m0s" [monitor] store-enabled = true store-database = "_internal" store-interval = "10s" [subscriber] enabled = true http-timeout = "30s" insecure-skip-verify = false ca-certs = "" write-concurrency = 40 write-buffer-size = 1000 [http] enabled = true bind-address = ":8086" auth-enabled = false log-enabled = true write-tracing = false pprof-enabled = false https-enabled = false https-certificate = "/etc/ssl/influxdb.pem" https-private-key = "" max-row-limit = 10000 max-connection-limit = 0 shared-secret = "" realm = "InfluxDB" unix-socket-enabled = false bind-socket = "/var/run/influxdb.sock" [[graphite]] enabled = false bind-address = ":2003" database = "graphite" retention-policy = "" protocol = "tcp" batch-size = 5000 batch-pending = 10 batch-timeout = "1s" consistency-level = "one" separator = "." udp-read-buffer = 0 [[collectd]] enabled = false bind-address = ":25826" database = "collectd" retention-policy = "" batch-size = 5000 batch-pending = 10 batch-timeout = "10s" read-buffer = 0 typesdb = "/usr/share/collectd/types.db" [[opentsdb]] enabled = false bind-address = ":4242" database = "opentsdb" retention-policy = "" consistency-level = "one" tls-enabled = false certificate = "/etc/ssl/influxdb.pem" batch-size = 1000 batch-pending = 5 batch-timeout = "1s" log-point-errors = true [[udp]] enabled = false bind-address = ":8089" database = "udp" retention-policy = "" batch-size = 5000 batch-pending = 10 read-buffer = 0 batch-timeout = "1s" precision = "" [continuous_queries] log-enabled = true enabled = true run-interval = "1s"
-
influxdb-deployment.yaml
apiVersion: extensions/v1beta1 kind: Deployment metadata: name: monitoring-influxdb-heapster namespace: kube-system spec: replicas: 1 template: metadata: labels: task: monitoring k8s-app: influxdb spec: containers: - name: influxdb image: if-harbor.in.za/devops/heapster-influxdb-amd64:v1.1.1 volumeMounts: - mountPath: /data name: influxdb-storage - mountPath: /etc/ name: influxdb-config volumes: - name: influxdb-storage emptyDir: {} - name: influxdb-config configMap: name: influxdb-config-heapster
-
influxdb-service.yaml
apiVersion: v1 kind: Service metadata: labels: task: monitoring # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) # If you are NOT using this as an addon, you should comment out this line. kubernetes.io/cluster-service: 'true' kubernetes.io/name: monitoring-influxdb-heapster name: monitoring-influxdb-heapster namespace: kube-system spec: type: NodePort ports: - port: 8086 targetPort: 8086 name: http - port: 8083 targetPort: 8083 name: admin selector: k8s-app: influxdb