|
2 | 2 | // Deploys a dashboard showing information about support resources
|
3 | 3 | local grafonnet = import 'grafonnet/main.libsonnet';
|
4 | 4 | local dashboard = grafonnet.dashboard;
|
5 |
| -local graphPanel = grafonnet.graphPanel; |
6 |
| -local prometheus = grafonnet.prometheus; |
7 |
| -local row = grafonnet.row; |
| 5 | +local ts = grafonnet.panel.timeSeries; |
| 6 | +local prometheus = grafonnet.query.prometheus; |
| 7 | +local row = grafonnet.panel.row; |
8 | 8 |
|
9 | 9 | local common = import './common.libsonnet';
|
10 | 10 |
|
11 | 11 | // NFS Stats
|
12 |
| -local userNodesNFSOps = graphPanel.new( |
13 |
| - 'User Nodes NFS Ops', |
14 |
| - decimals=0, |
15 |
| - min=0, |
16 |
| - datasource='$PROMETHEUS_DS' |
17 |
| -).addTargets([ |
18 |
| - prometheus.target( |
19 |
| - 'sum(rate(node_nfs_requests_total[5m])) by (node) > 0', |
20 |
| - legendFormat='{{ node }}' |
21 |
| - ), |
22 |
| -]); |
| 12 | +local userNodesNFSOps = |
| 13 | + common.tsOptions |
| 14 | + + ts.new('User Nodes NFS Ops') |
| 15 | + + ts.standardOptions.withDecimals(0) |
| 16 | + + ts.queryOptions.withTargets([ |
| 17 | + prometheus.new( |
| 18 | + '$PROMETHEUS_DS', |
| 19 | + ||| |
| 20 | + sum(rate(node_nfs_requests_total[5m])) by (node) > 0 |
| 21 | + ||| |
| 22 | + ) |
| 23 | + + prometheus.withLegendFormat('{{ node }}'), |
| 24 | + ]); |
23 | 25 |
|
24 |
| -local userNodesIOWait = graphPanel.new( |
25 |
| - 'iowait % on each node', |
26 |
| - decimals=0, |
27 |
| - min=0, |
28 |
| - datasource='$PROMETHEUS_DS' |
29 |
| -).addTargets([ |
30 |
| - prometheus.target( |
31 |
| - 'sum(rate(node_nfs_requests_total[5m])) by (node)', |
32 |
| - legendFormat='{{ node }}' |
33 |
| - ), |
34 |
| -]); |
| 26 | +local userNodesIOWait = |
| 27 | + common.tsOptions |
| 28 | + + ts.new('iowait % on each node') |
| 29 | + + ts.standardOptions.withDecimals(0) |
| 30 | + + ts.queryOptions.withTargets([ |
| 31 | + prometheus.new( |
| 32 | + '$PROMETHEUS_DS', |
| 33 | + ||| |
| 34 | + sum(rate(node_nfs_requests_total[5m])) by (node) |
| 35 | + ||| |
| 36 | + ) |
| 37 | + + prometheus.withLegendFormat('{{ node }}'), |
| 38 | + ]); |
35 | 39 |
|
36 |
| -local userNodesHighNFSOps = graphPanel.new( |
37 |
| - 'NFS Operation Types on user nodes', |
38 |
| - decimals=0, |
39 |
| - min=0, |
40 |
| - datasource='$PROMETHEUS_DS' |
41 |
| -).addTargets([ |
42 |
| - prometheus.target( |
43 |
| - 'sum(rate(node_nfs_requests_total[5m])) by (method) > 0', |
44 |
| - legendFormat='{{method}}' |
45 |
| - ), |
46 |
| -]); |
| 40 | +local userNodesHighNFSOps = |
| 41 | + common.tsOptions |
| 42 | + + ts.new('NFS Operation Types on user nodes') |
| 43 | + + ts.standardOptions.withDecimals(0) |
| 44 | + + ts.queryOptions.withTargets([ |
| 45 | + prometheus.new( |
| 46 | + '$PROMETHEUS_DS', |
| 47 | + ||| |
| 48 | + sum(rate(node_nfs_requests_total[5m])) by (method) > 0 |
| 49 | + ||| |
| 50 | + ) |
| 51 | + + prometheus.withLegendFormat('{{ method }}'), |
| 52 | + ]); |
47 | 53 |
|
48 |
| -local nfsServerCPU = graphPanel.new( |
49 |
| - 'NFS Server CPU', |
50 |
| - min=0, |
51 |
| - datasource='$PROMETHEUS_DS' |
52 |
| -).addTargets([ |
53 |
| - prometheus.target( |
54 |
| - 'avg(rate(node_cpu_seconds_total{job="prometheus-nfsd-server", mode!="idle"}[2m])) by (mode)', |
55 |
| - legendFormat='{{mode}}' |
56 |
| - ), |
57 |
| -]); |
| 54 | +local nfsServerCPU = |
| 55 | + common.tsOptions |
| 56 | + + ts.new('NFS Server CPU') |
| 57 | + + ts.queryOptions.withTargets([ |
| 58 | + prometheus.new( |
| 59 | + '$PROMETHEUS_DS', |
| 60 | + ||| |
| 61 | + avg(rate(node_cpu_seconds_total{job="prometheus-nfsd-server", mode!="idle"}[2m])) by (mode) |
| 62 | + ||| |
| 63 | + ) |
| 64 | + + prometheus.withLegendFormat('{{ mode }}'), |
| 65 | + ]); |
58 | 66 |
|
59 |
| -local nfsServerIOPS = graphPanel.new( |
60 |
| - 'NFS Server Disk ops', |
61 |
| - decimals=0, |
62 |
| - min=0, |
63 |
| - datasource='$PROMETHEUS_DS' |
64 |
| -).addTargets([ |
65 |
| - prometheus.target( |
66 |
| - 'sum(rate(node_nfsd_disk_bytes_read_total[5m]))', |
67 |
| - legendFormat='Read' |
68 |
| - ), |
69 |
| - prometheus.target( |
70 |
| - 'sum(rate(node_nfsd_disk_bytes_written_total[5m]))', |
71 |
| - legendFormat='Write' |
72 |
| - ), |
73 |
| -]); |
| 67 | +local nfsServerIOPS = |
| 68 | + common.tsOptions |
| 69 | + + ts.new('NFS Server Disk ops') |
| 70 | + + ts.standardOptions.withDecimals(0) |
| 71 | + + ts.queryOptions.withTargets([ |
| 72 | + prometheus.new( |
| 73 | + '$PROMETHEUS_DS', |
| 74 | + ||| |
| 75 | + sum(rate(node_nfsd_disk_bytes_read_total[5m])) |
| 76 | + ||| |
| 77 | + ) |
| 78 | + + prometheus.withLegendFormat('Read'), |
| 79 | + prometheus.new( |
| 80 | + '$PROMETHEUS_DS', |
| 81 | + ||| |
| 82 | + sum(rate(node_nfsd_disk_bytes_written_total[5m])) |
| 83 | + ||| |
| 84 | + ) |
| 85 | + + prometheus.withLegendFormat('Write'), |
| 86 | + ]); |
74 | 87 |
|
75 |
| -local nfsServerWriteLatency = graphPanel.new( |
76 |
| - 'NFS Server disk write latency', |
77 |
| - min=0, |
78 |
| - datasource='$PROMETHEUS_DS' |
79 |
| -).addTargets([ |
80 |
| - prometheus.target( |
81 |
| - 'sum(rate(node_disk_write_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_writes_completed_total{job="prometheus-nfsd-server"}[5m])) by (device)', |
82 |
| - legendFormat='{{device}}' |
83 |
| - ), |
84 |
| -]); |
| 88 | +local nfsServerWriteLatency = |
| 89 | + common.tsOptions |
| 90 | + + ts.new('NFS Server disk write latency') |
| 91 | + + ts.queryOptions.withTargets([ |
| 92 | + prometheus.new( |
| 93 | + '$PROMETHEUS_DS', |
| 94 | + ||| |
| 95 | + sum(rate(node_disk_write_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_writes_completed_total{job="prometheus-nfsd-server"}[5m])) by (device) |
| 96 | + ||| |
| 97 | + + prometheus.withLegendFormat('{{ device }}'), |
| 98 | + ), |
| 99 | + ]); |
85 | 100 |
|
86 |
| -local nfsServerReadLatency = graphPanel.new( |
87 |
| - 'NFS Server disk read latency', |
88 |
| - min=0, |
89 |
| - datasource='$PROMETHEUS_DS' |
90 |
| -).addTargets([ |
91 |
| - prometheus.target( |
92 |
| - 'sum(rate(node_disk_read_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_reads_completed_total{job="prometheus-nfsd-server"}[5m])) by (device)', |
93 |
| - legendFormat='{{device}}' |
94 |
| - ), |
95 |
| -]); |
| 101 | +local nfsServerReadLatency = |
| 102 | + common.tsOptions |
| 103 | + + ts.new('NFS Server disk read latency') |
| 104 | + + ts.queryOptions.withTargets([ |
| 105 | + prometheus.new( |
| 106 | + '$PROMETHEUS_DS', |
| 107 | + ||| |
| 108 | + sum(rate(node_disk_read_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_reads_completed_total{job="prometheus-nfsd-server"}[5m])) by (device) |
| 109 | + ||| |
| 110 | + + prometheus.withLegendFormat('{{ device }}'), |
| 111 | + ), |
| 112 | + ]); |
96 | 113 |
|
97 | 114 | // Support Metrics
|
98 |
| -local prometheusMemory = graphPanel.new( |
99 |
| - 'Prometheus Memory (Working Set)', |
100 |
| - formatY1='bytes', |
101 |
| - min=0, |
102 |
| - datasource='$PROMETHEUS_DS' |
103 |
| -).addTargets([ |
104 |
| - prometheus.target( |
105 |
| - 'sum(container_memory_working_set_bytes{pod=~"support-prometheus-server-.*", namespace="support"})' |
106 |
| - ), |
107 |
| -]); |
| 115 | +local prometheusMemory = |
| 116 | + common.tsOptions |
| 117 | + + ts.new('Prometheus Memory (Working Set)') |
| 118 | + + ts.standardOptions.withUnit('bytes') |
| 119 | + + ts.queryOptions.withTargets([ |
| 120 | + prometheus.new( |
| 121 | + '$PROMETHEUS_DS', |
| 122 | + ||| |
| 123 | + sum(container_memory_working_set_bytes{pod=~"support-prometheus-server-.*", namespace="support"}) |
| 124 | + ||| |
| 125 | + ), |
| 126 | + ]); |
108 | 127 |
|
109 |
| -local prometheusCPU = graphPanel.new( |
110 |
| - 'Prometheus CPU', |
111 |
| - min=0, |
112 |
| - datasource='$PROMETHEUS_DS' |
113 |
| -).addTargets([ |
114 |
| - prometheus.target( |
115 |
| - 'sum(rate(container_cpu_usage_seconds_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m]))' |
116 |
| - ), |
117 |
| -]); |
| 128 | +local prometheusCPU = |
| 129 | + common.tsOptions |
| 130 | + + ts.new('Prometheus CPU') |
| 131 | + + ts.queryOptions.withTargets([ |
| 132 | + prometheus.new( |
| 133 | + '$PROMETHEUS_DS', |
| 134 | + ||| |
| 135 | + sum(rate(container_cpu_usage_seconds_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m])) |
| 136 | + ||| |
| 137 | + ), |
| 138 | + ]); |
118 | 139 |
|
119 |
| -local prometheusDiskSpace = graphPanel.new( |
120 |
| - 'Prometheus Free Disk space', |
121 |
| - formatY1='bytes', |
122 |
| - min=0, |
123 |
| - datasource='$PROMETHEUS_DS' |
124 |
| -).addTargets([ |
125 |
| - prometheus.target( |
126 |
| - 'sum(kubelet_volume_stats_available_bytes{namespace="support",persistentvolumeclaim="support-prometheus-server"})' |
127 |
| - ), |
128 |
| -]); |
| 140 | +local prometheusDiskSpace = |
| 141 | + common.tsOptions |
| 142 | + + ts.new('Prometheus Free Disk space') |
| 143 | + + ts.standardOptions.withUnit('bytes') |
| 144 | + + ts.queryOptions.withTargets([ |
| 145 | + prometheus.new( |
| 146 | + '$PROMETHEUS_DS', |
| 147 | + ||| |
| 148 | + sum(kubelet_volume_stats_available_bytes{namespace="support",persistentvolumeclaim="support-prometheus-server"}) |
| 149 | + ||| |
| 150 | + ), |
| 151 | + ]); |
129 | 152 |
|
130 |
| -local prometheusNetwork = graphPanel.new( |
131 |
| - 'Prometheus Network Usage', |
132 |
| - formatY1='bytes', |
133 |
| - decimals=0, |
134 |
| - min=0, |
135 |
| - datasource='$PROMETHEUS_DS' |
136 |
| -).addTargets([ |
137 |
| - prometheus.target( |
138 |
| - 'sum(rate(container_network_receive_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m]))', |
139 |
| - legendFormat='receive' |
140 |
| - ), |
141 |
| - prometheus.target( |
142 |
| - 'sum(rate(container_network_send_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m]))', |
143 |
| - legendFormat='send' |
144 |
| - ), |
145 |
| -]); |
| 153 | +local prometheusNetwork = |
| 154 | + common.tsOptions |
| 155 | + + ts.new('Prometheus Network Usage') |
| 156 | + + ts.standardOptions.withUnit('bytes') |
| 157 | + + ts.standardOptions.withDecimals(0) |
| 158 | + + ts.queryOptions.withTargets([ |
| 159 | + prometheus.new( |
| 160 | + '$PROMETHEUS_DS', |
| 161 | + ||| |
| 162 | + sum(rate(container_network_receive_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m])) |
| 163 | + ||| |
| 164 | + + prometheus.withLegendFormat('receive'), |
| 165 | + ), |
| 166 | + prometheus.new( |
| 167 | + '$PROMETHEUS_DS', |
| 168 | + ||| |
| 169 | + sum(rate(container_network_send_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m])) |
| 170 | + ||| |
| 171 | + + prometheus.withLegendFormat('send'), |
| 172 | + ), |
| 173 | + ]); |
146 | 174 |
|
147 | 175 | dashboard.new('NFS and Support Information')
|
148 | 176 | + dashboard.withTags(['support', 'kubernetes'])
|
|
0 commit comments