利用Prometheus+Grafana可以轻松的搭建K8S集群监控系统,这套系统事实上也作为K8S监控的标准,本文借鉴了网上大量的文章资料并在他们的基础上总结所成。
说明: 文章中所使用的均为CentOS 7 操作系统
一. NFS文件服务器准备,用以存储Promethues和Grafana数据
1.安装NFS服务器(192.168.0.2)
yum -y install nfs-utils
systemctl enable nfs #开启自启动
systemctl start nfs #启动nfs服务
2.创建共享目录
mkdir -p /data/nfs/{promethues,grafana,grafana}
chown -R 65534.65534 /data/nfs/promethues #这里非常重要,否则promethues无法启动,即便是设置了777权限也会报错
chmod 777 /data/nfs/{promethues,grafana,grafana} #这里需要打开所有用户可读写
3.设置NFS共享
echo "/data/nfs *(rw,no_root_squash,async,insecure)">>/etc/exports
systemctl restart nfs
4.查看共享
showmount -e 127.0.0.1
[root@nfsserv ~]# showmount -e 127.0.0.1
Export list for 127.0.0.1:
/data/nfs *
二.安装 node-exporter
1. node-exporter.yaml
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: kube-system
labels:
k8s-app: node-exporter
spec:
selector:
matchLabels:
k8s-app: node-exporter
template:
metadata:
labels:
k8s-app: node-exporter
spec:
hostNetwork: true
hostPID: true
hostIPC: true
tolerations:
- key: "disk"
operator: "Equal"
value: "ssd"
effect: "NoSchedule"
- key: "node-role.kubernetes.io/master"
operator: "Equal"
value: "true"
effect: "NoSchedule"
- key: "esnode"
operator: "Equal"
value: "es"
effect: "NoSchedule"
containers:
- image: prom/node-exporter
name: node-exporter
resources:
requests:
cpu: 128m
securityContext:
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /host/rootfs
ports:
- containerPort: 9100
protocol: TCP
name: http
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
kubectl apply -f node-exporter.yaml
三.安装 Promethues
1.创建 promethues-pvc.yaml
apiVersion: v1
kind: Namespace
metadata:
name: kube-system
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: nfs-pv-prometheus
labels:
pv: nfs-pv-prometheus
annotations:
volume.beta.kubernetes.io/mount-options: "noatime,nodiratime,noresvport,nolock,proto=udp,rsize=1048576,wsize=1048576,hard"
spec:
capacity:
storage: 100Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: /data/nfs/prometheus
server: 192.168.0.2
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: nfs-pvc-prometheus
namespace: kube-system
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 100Gi
selector:
matchLabels:
pv: nfs-pv-prometheus
2. 创建promethues-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: kube-system
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: prometheus
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 15s
scheme: http
metrics_path: /metrics
static_configs:
- targets:
- localhost:9090
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-services'
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module: [http_2xx]
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- job_name: 'kubernetes-ingresses'
kubernetes_sd_configs:
- role: ingress
relabel_configs:
- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'k8s-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
3.创建promethues-rbac.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- apiGroups:
- extensions
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: kube-system
4.创建promethues-deploy.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
name: prometheus-deployment
name: prometheus
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
containers:
- image: prom/prometheus:v2.27.1
name: prometheus
command:
- "/bin/prometheus"
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention=24h"
- "--web.enable-admin-api"
- "--web.enable-lifecycle"
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: "/prometheus"
name: data
- mountPath: "/etc/prometheus"
name: config-volume
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 500m
memory: 1024Mi
serviceAccountName: prometheus
volumes:
- name: data
persistentVolumeClaim:
claimName: nfs-pvc-prometheus
#需要chown 65534.65534 目录
- name: config-volume
configMap:
name: prometheus-config
5.创建promethues-svc.yaml
---
kind: Service
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus
namespace: kube-system
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
nodePort: 30003
selector:
app: prometheus
四 安装 Grafana
1.创建grafana存储以及插件的存储grafana-pvc.yaml
apiVersion: v1
kind: Namespace
metadata:
name: kube-system
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: nfs-pv-grafana
labels:
pv: nfs-pv-grafana
annotations:
volume.beta.kubernetes.io/mount-options: "noatime,nodiratime,noresvport,nolock,proto=udp,rsize=1048576,wsize=1048576,hard"
spec:
capacity:
storage: 100Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: /data/nfs/grafana
server: 192.168.0.2
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: nfs-pvc-grafana
namespace: kube-system
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 100Gi
selector:
matchLabels:
pv: nfs-pv-grafana
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: nfs-pv-grafana-plugin
labels:
pv: nfs-pv-grafana-plugin
annotations:
volume.beta.kubernetes.io/mount-options: "noatime,nodiratime,noresvport,nolock,proto=udp,rsize=1048576,wsize=1048576,hard"
spec:
capacity:
storage: 100Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
nfs:
path: /data/nfs/grafana-plugin
server: 192.168.0.2
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: nfs-pvc-grafana-plugin
namespace: kube-system
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 100Gi
selector:
matchLabels:
pv: nfs-pv-grafana-plugin
2.创建 grafana-configmap.yaml (这里整合了grafana的datasource,dashboard)
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-datasources
namespace: kube-system
data:
data_source.yaml: |
{
"apiVersion": 1,
"datasources": [
{
"access": "proxy",
"editable": false,
"name": "Prometheus",
"orgId": 1,
"type": "prometheus",
"url": "http://prometheus:9090",
"version": 1
}
]
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboards
namespace: kube-system
data:
dashboards.yaml: |-
{
"apiVersion": 1,
"providers": [
{
"folder": "Default",
"name": "0",
"options": {
"path": "/grafana-dashboard-definitions/0"
},
"orgId": 1,
"type": "file"
}
]
}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-k8s-pod-resource
namespace: kube-system
data:
Kubernetes-Pod-Resources.json: |
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Shows resource usage of Kubernetes pods.",
"editable": true,
"gnetId": 737,
"graphTooltip": 0,
"id": 1,
"iteration": 1624950781810,
"links": [],
"panels": [
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 35,
"panels": [],
"title": "节点资源统计",
"type": "row"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(50, 172, 45, 0.97)",
"value": null
},
{
"color": "rgba(237, 129, 40, 0.89)",
"value": 65
},
{
"color": "rgba(245, 54, 54, 0.9)",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 8,
"x": 0,
"y": 1
},
"id": 4,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum (container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum (machine_memory_bytes{instance=~\"^$instance$\"}) * 100",
"interval": "",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 2
}
],
"timeFrom": "1m",
"timeShift": null,
"title": "内存使用量",
"type": "gauge"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(50, 172, 45, 0.97)",
"value": null
},
{
"color": "rgba(237, 129, 40, 0.89)",
"value": 65
},
{
"color": "rgba(245, 54, 54, 0.9)",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 8,
"x": 8,
"y": 1
},
"id": 6,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m])) / sum (machine_cpu_cores{instance=~\"^$instance$\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"timeShift": null,
"title": "Cpu 使用量",
"type": "gauge"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(50, 172, 45, 0.97)",
"value": null
},
{
"color": "rgba(237, 129, 40, 0.89)",
"value": 65
},
{
"color": "rgba(245, 54, 54, 0.9)",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 8,
"x": 16,
"y": 1
},
"id": 7,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum(container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "",
"metric": "",
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"timeShift": null,
"title": "磁盘使用量",
"type": "gauge"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 0,
"y": 6
},
"hideTimeOverride": true,
"id": 9,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"})",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"title": "Used",
"type": "stat"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 4,
"y": 6
},
"hideTimeOverride": true,
"id": 10,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum (machine_memory_bytes{instance=~\"^$instance$\"})",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"title": "Total",
"type": "stat"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 8,
"y": 6
},
"hideTimeOverride": true,
"id": 11,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m]))",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"timeShift": null,
"title": "Used",
"type": "stat"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 12,
"y": 6
},
"hideTimeOverride": true,
"id": 12,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum (machine_cpu_cores{instance=~\"^$instance$\"})",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"title": "Total",
"type": "stat"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 16,
"y": 6
},
"hideTimeOverride": true,
"id": 13,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"})",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"title": "Used",
"type": "stat"
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 20,
"y": 6
},
"hideTimeOverride": true,
"id": 14,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"expr": "sum (container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"})",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"timeFrom": "1m",
"title": "Total",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 1,
"fillGradient": 0,
"grid": {},
"gridPos": {
"h": 5,
"w": 24,
"x": 0,
"y": 9
},
"height": "200px",
"hiddenSeries": false,
"id": 32,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_network_receive_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))",
"interval": "",
"intervalFactor": 2,
"legendFormat": "receive",
"metric": "network",
"refId": "A",
"step": 240
},
{
"expr": "- sum(rate(container_network_transmit_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))",
"interval": "",
"intervalFactor": 2,
"legendFormat": "transmit",
"metric": "network",
"refId": "B",
"step": 240
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "网络",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": "transmit / receive",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 14
},
"id": 36,
"panels": [],
"title": "Pod 资源统计",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 3,
"editable": true,
"error": false,
"fill": 0,
"fillGradient": 0,
"grid": {},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 15
},
"height": "",
"hiddenSeries": false,
"id": 17,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"hideEmpty": true,
"hideZero": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod)",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ pod }}",
"metric": "container_cpu",
"refId": "A",
"step": 240
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Cpu 使用量",
"tooltip": {
"msResolution": true,
"shared": false,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "none",
"label": "cores",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 0,
"fillGradient": 0,
"grid": {},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 22
},
"hiddenSeries": false,
"id": 33,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"hideEmpty": true,
"hideZero": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum (container_memory_working_set_bytes{image!=\"\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod)",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ pod }}",
"metric": "",
"refId": "A",
"step": 240
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "内存使用量",
"tooltip": {
"msResolution": false,
"shared": false,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": "used",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 1,
"fillGradient": 0,
"grid": {},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 29
},
"hiddenSeries": false,
"id": 16,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"hideEmpty": true,
"hideZero": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": 200,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ pod }} < in",
"metric": "network",
"refId": "A",
"step": 240
},
{
"exemplar": true,
"expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ pod }} > out",
"metric": "network",
"refId": "B",
"step": 240
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "网络",
"tooltip": {
"msResolution": false,
"shared": false,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": "transmit / receive",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 1,
"fillGradient": 0,
"grid": {},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 36
},
"hiddenSeries": false,
"id": 34,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"hideEmpty": true,
"hideZero": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(container_fs_usage_bytes{image!=\"\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod)",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ pod }}",
"metric": "network",
"refId": "A",
"step": 240
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "磁盘使用量",
"tooltip": {
"msResolution": false,
"shared": false,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": "used",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": false,
"schemaVersion": 30,
"style": "dark",
"tags": [
"kubernetes"
],
"templating": {
"list": [
{
"allValue": ".*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": "Prometheus",
"definition": "",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": "Instance",
"multi": false,
"name": "instance",
"options": [],
"query": {
"query": "label_values(instance)",
"refId": "Prometheus-instance-Variable-Query"
},
"refresh": 1,
"regex": "master|node.*",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": "Prometheus",
"definition": "",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": "Namespace",
"multi": true,
"name": "namespace",
"options": [],
"query": {
"query": "label_values(namespace)",
"refId": "Prometheus-namespace-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Kubernetes Pod Resources",
"uid": "0Mu1btknk",
"version": 4
}
3.创建promethues-deploy.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana-core
namespace: kube-system
labels:
app: grafana
component: core
spec:
selector:
matchLabels:
app: grafana
component: core
replicas: 1
template:
metadata:
labels:
app: grafana
component: core
spec:
containers:
- image: grafana/grafana:7.5.6
name: grafana-core
imagePullPolicy: IfNotPresent
# env:
resources:
# keep request = limit to keep this container in guaranteed class
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 100m
memory: 100Mi
env:
# The following env variables set up basic auth twith the default admin user and admin password.
- name: GF_AUTH_BASIC_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "false"
# - name: GF_AUTH_ANONYMOUS_ORG_ROLE
# value: Admin
# does not really work, because of template variables in exported dashboards:
# - name: GF_DASHBOARDS_JSON_ENABLED
# value: "true"
readinessProbe:
httpGet:
path: /login
port: 3000
# initialDelaySeconds: 30
# timeoutSeconds: 1
volumeMounts:
- name: grafana-persistent-storage
mountPath: /var
- name: grafana-plugin-storage
mountPath: /var/lib/grafana
- mountPath: /etc/grafana/provisioning/datasources
name: grafana-datasources
readOnly: false
- mountPath: /etc/grafana/provisioning/dashboards
name: grafana-dashboards
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/podresource
name: grafana-k8s-pod-resource
readOnly: false
volumes:
- name: grafana-persistent-storage
persistentVolumeClaim:
claimName: nfs-pvc-grafana
- name: grafana-plugin-storage
persistentVolumeClaim:
claimName: nfs-pvc-grafana-plugin
- name: grafana-datasources
configMap:
name: grafana-datasources
- configMap:
name: grafana-dashboards
name: grafana-dashboards
- configMap:
name: grafana-k8s-pod-resource
name: grafana-k8s-pod-resource
4.创建grafana-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: kube-system
labels:
app: grafana
component: core
spec:
type: NodePort
ports:
- port: 3000
selector:
app: grafana
component: core
五 效果展示
©著作权归作者所有:来自51CTO博客作者运维记事的原创作品,请联系作者获取转载授权,否则将追究法律责任
K8S部署Prometheus+Grafana监控集群
https://blog.51cto.com/fengwan/5190009
作者:Jeebiz 创建时间:2023-02-20 09:40
最后编辑:Jeebiz 更新时间:2024-08-02 14:28
最后编辑:Jeebiz 更新时间:2024-08-02 14:28