Skip to content

Commit 562c01d

Browse files
authored
[connector/servicegraph] Fix histogram metrics miss unit (#34511)
I found the servicegraph histogram metrics missing unit. This PR will add a proper unit to metrics like [spanmetrics connector does.]( metric.SetUnit(p.config.Histogram.Unit.String())) Signed-off-by: Murphy Chen <[email protected]>
1 parent 0bcac4b commit 562c01d

7 files changed

+102
-41
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: breaking
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: servicegraphconnector
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Fix histogram metrics miss unit
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [34511]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
All metrics will remove the suffix `_seconds`. It will not introduce breaking change if users use
20+
| `prometheusexporter` or `prometheusremotewriteexporter` to exporter metrics in pipeline.
21+
| In some cases, like using `clickhouseexporter`(save data in native OTLP format), it will be a breaking change.
22+
| Users can use `transformprocessor` to add back this suffix.
23+
24+
25+
# If your change doesn't affect end users or the exported elements of any package,
26+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
27+
# Optional: The change log or logs in which this entry should be included.
28+
# e.g. '[user]' or '[user, api]'
29+
# Include 'user' if the change is relevant to end users.
30+
# Include 'api' if there is a change to a library API.
31+
# Default: '[user]'
32+
change_logs: []

connector/servicegraphconnector/connector.go

+13-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ const (
3131
clientKind = "client"
3232
serverKind = "server"
3333
virtualNodeLabel = "virtual_node"
34+
millisecondsUnit = "ms"
35+
secondsUnit = "s"
3436
)
3537

3638
var (
@@ -522,10 +524,10 @@ func (p *serviceGraphConnector) collectCountMetrics(ilm pmetric.ScopeMetrics) er
522524
func (p *serviceGraphConnector) collectLatencyMetrics(ilm pmetric.ScopeMetrics) error {
523525
// TODO: Remove this once legacy metric names are removed
524526
if legacyMetricNamesFeatureGate.IsEnabled() {
525-
return p.collectServerLatencyMetrics(ilm, "traces_service_graph_request_duration_seconds")
527+
return p.collectServerLatencyMetrics(ilm, "traces_service_graph_request_duration")
526528
}
527529

528-
if err := p.collectServerLatencyMetrics(ilm, "traces_service_graph_request_server_seconds"); err != nil {
530+
if err := p.collectServerLatencyMetrics(ilm, "traces_service_graph_request_server"); err != nil {
529531
return err
530532
}
531533

@@ -535,7 +537,11 @@ func (p *serviceGraphConnector) collectLatencyMetrics(ilm pmetric.ScopeMetrics)
535537
func (p *serviceGraphConnector) collectClientLatencyMetrics(ilm pmetric.ScopeMetrics) error {
536538
if len(p.reqServerDurationSecondsCount) > 0 {
537539
mDuration := ilm.Metrics().AppendEmpty()
538-
mDuration.SetName("traces_service_graph_request_client_seconds")
540+
mDuration.SetName("traces_service_graph_request_client")
541+
mDuration.SetUnit(secondsUnit)
542+
if legacyLatencyUnitMsFeatureGate.IsEnabled() {
543+
mDuration.SetUnit(millisecondsUnit)
544+
}
539545
// TODO: Support other aggregation temporalities
540546
mDuration.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
541547
timestamp := pcommon.NewTimestampFromTime(time.Now())
@@ -565,6 +571,10 @@ func (p *serviceGraphConnector) collectServerLatencyMetrics(ilm pmetric.ScopeMet
565571
if len(p.reqServerDurationSecondsCount) > 0 {
566572
mDuration := ilm.Metrics().AppendEmpty()
567573
mDuration.SetName(mName)
574+
mDuration.SetUnit(secondsUnit)
575+
if legacyLatencyUnitMsFeatureGate.IsEnabled() {
576+
mDuration.SetUnit(millisecondsUnit)
577+
}
568578
// TODO: Support other aggregation temporalities
569579
mDuration.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
570580
timestamp := pcommon.NewTimestampFromTime(time.Now())

connector/servicegraphconnector/connector_test.go

+14-3
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ func TestConnectorConsume(t *testing.T) {
163163
},
164164
sampleTraces: buildSampleTrace(t, "val"),
165165
gates: []*featuregate.Gate{legacyLatencyUnitMsFeatureGate},
166-
verifyMetrics: verifyHappyCaseMetricsWithDuration(2000, 1000),
166+
verifyMetrics: verifyHappyCaseLatencyMetrics(),
167167
},
168168
} {
169169
t.Run(tc.name, func(t *testing.T) {
@@ -226,15 +226,22 @@ func verifyHappyCaseMetricsWithDuration(serverDurationSum, clientDurationSum flo
226226
verifyCount(t, mCount)
227227

228228
mServerDuration := ms.At(1)
229-
assert.Equal(t, "traces_service_graph_request_server_seconds", mServerDuration.Name())
229+
assert.Equal(t, "traces_service_graph_request_server", mServerDuration.Name())
230230
verifyDuration(t, mServerDuration, serverDurationSum, []uint64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0})
231231

232232
mClientDuration := ms.At(2)
233-
assert.Equal(t, "traces_service_graph_request_client_seconds", mClientDuration.Name())
233+
assert.Equal(t, "traces_service_graph_request_client", mClientDuration.Name())
234234
verifyDuration(t, mClientDuration, clientDurationSum, []uint64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0})
235235
}
236236
}
237237

238+
func verifyHappyCaseLatencyMetrics() func(t *testing.T, md pmetric.Metrics) {
239+
return func(t *testing.T, md pmetric.Metrics) {
240+
verifyHappyCaseMetricsWithDuration(2000, 1000)(t, md)
241+
verifyUnit(t, md.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(1).Unit(), millisecondsUnit)
242+
}
243+
}
244+
238245
func verifyCount(t *testing.T, m pmetric.Metric) {
239246
assert.Equal(t, "traces_service_graph_request_total", m.Name())
240247

@@ -281,6 +288,10 @@ func verifyAttr(t *testing.T, attrs pcommon.Map, k, expected string) {
281288
assert.Equal(t, expected, v.AsString())
282289
}
283290

291+
func verifyUnit(t *testing.T, expected, actual string) {
292+
assert.Equal(t, expected, actual)
293+
}
294+
284295
func buildSampleTrace(t *testing.T, attrValue string) ptrace.Traces {
285296
tStart := time.Date(2022, 1, 2, 3, 4, 5, 6, time.UTC)
286297
// client: 1s

connector/servicegraphconnector/testdata/extra-dimensions-queue-db-expected-metrics.yaml

+15-13
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ resourceMetrics:
1111
- key: client
1212
value:
1313
stringValue: foo-server
14+
- key: client_messaging.system
15+
value:
16+
stringValue: kafka
1417
- key: connection_type
1518
value:
1619
stringValue: ""
@@ -20,9 +23,6 @@ resourceMetrics:
2023
- key: server
2124
value:
2225
stringValue: bar-requester
23-
- key: client_messaging.system
24-
value:
25-
stringValue: kafka
2626
- key: server_db.system
2727
value:
2828
stringValue: postgresql
@@ -36,6 +36,9 @@ resourceMetrics:
3636
- key: client
3737
value:
3838
stringValue: foo-server
39+
- key: client_messaging.system
40+
value:
41+
stringValue: kafka
3942
- key: connection_type
4043
value:
4144
stringValue: ""
@@ -45,9 +48,6 @@ resourceMetrics:
4548
- key: server
4649
value:
4750
stringValue: bar-requester
48-
- key: client_messaging.system
49-
value:
50-
stringValue: kafka
5151
- key: server_db.system
5252
value:
5353
stringValue: postgresql
@@ -62,16 +62,20 @@ resourceMetrics:
6262
- 1
6363
- 10
6464
startTimeUnixNano: "1000000"
65-
sum: 0.000001
65+
sum: 1e-06
6666
timeUnixNano: "2000000"
67-
name: traces_service_graph_request_server_seconds
67+
name: traces_service_graph_request_server
68+
unit: s
6869
- histogram:
6970
aggregationTemporality: 2
7071
dataPoints:
7172
- attributes:
7273
- key: client
7374
value:
7475
stringValue: foo-server
76+
- key: client_messaging.system
77+
value:
78+
stringValue: kafka
7579
- key: connection_type
7680
value:
7781
stringValue: ""
@@ -81,9 +85,6 @@ resourceMetrics:
8185
- key: server
8286
value:
8387
stringValue: bar-requester
84-
- key: client_messaging.system
85-
value:
86-
stringValue: kafka
8788
- key: server_db.system
8889
value:
8990
stringValue: postgresql
@@ -98,8 +99,9 @@ resourceMetrics:
9899
- 1
99100
- 10
100101
startTimeUnixNano: "1000000"
101-
sum: 0.000001
102+
sum: 1e-06
102103
timeUnixNano: "2000000"
103-
name: traces_service_graph_request_client_seconds
104+
name: traces_service_graph_request_client
105+
unit: s
104106
scope:
105107
name: traces_service_graph

connector/servicegraphconnector/testdata/failed-label-not-work-expect-metrics.yaml

+20-18
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ resourceMetrics:
7272
stringValue: ""
7373
- key: failed
7474
value:
75-
boolValue: true
75+
boolValue: false
7676
- key: server
7777
value:
7878
stringValue: bar
@@ -87,14 +87,14 @@ resourceMetrics:
8787
- "0"
8888
- "0"
8989
- "0"
90-
- "1"
90+
- "2"
9191
- "0"
9292
- "0"
9393
- "0"
9494
- "0"
9595
- "0"
9696
- "0"
97-
count: "1"
97+
count: "2"
9898
explicitBounds:
9999
- 0.002
100100
- 0.004
@@ -113,7 +113,7 @@ resourceMetrics:
113113
- 10
114114
- 15
115115
startTimeUnixNano: "1000000"
116-
sum: 1
116+
sum: 2
117117
timeUnixNano: "2000000"
118118
- attributes:
119119
- key: client
@@ -124,7 +124,7 @@ resourceMetrics:
124124
stringValue: ""
125125
- key: failed
126126
value:
127-
boolValue: false
127+
boolValue: true
128128
- key: server
129129
value:
130130
stringValue: bar
@@ -139,14 +139,14 @@ resourceMetrics:
139139
- "0"
140140
- "0"
141141
- "0"
142-
- "2"
142+
- "1"
143143
- "0"
144144
- "0"
145145
- "0"
146146
- "0"
147147
- "0"
148148
- "0"
149-
count: "2"
149+
count: "1"
150150
explicitBounds:
151151
- 0.002
152152
- 0.004
@@ -165,9 +165,10 @@ resourceMetrics:
165165
- 10
166166
- 15
167167
startTimeUnixNano: "1000000"
168-
sum: 2
168+
sum: 1
169169
timeUnixNano: "2000000"
170-
name: traces_service_graph_request_server_seconds
170+
name: traces_service_graph_request_server
171+
unit: s
171172
- histogram:
172173
aggregationTemporality: 2
173174
dataPoints:
@@ -180,7 +181,7 @@ resourceMetrics:
180181
stringValue: ""
181182
- key: failed
182183
value:
183-
boolValue: true
184+
boolValue: false
184185
- key: server
185186
value:
186187
stringValue: bar
@@ -195,14 +196,14 @@ resourceMetrics:
195196
- "0"
196197
- "0"
197198
- "0"
198-
- "1"
199+
- "2"
199200
- "0"
200201
- "0"
201202
- "0"
202203
- "0"
203204
- "0"
204205
- "0"
205-
count: "1"
206+
count: "2"
206207
explicitBounds:
207208
- 0.002
208209
- 0.004
@@ -221,7 +222,7 @@ resourceMetrics:
221222
- 10
222223
- 15
223224
startTimeUnixNano: "1000000"
224-
sum: 1
225+
sum: 2
225226
timeUnixNano: "2000000"
226227
- attributes:
227228
- key: client
@@ -232,7 +233,7 @@ resourceMetrics:
232233
stringValue: ""
233234
- key: failed
234235
value:
235-
boolValue: false
236+
boolValue: true
236237
- key: server
237238
value:
238239
stringValue: bar
@@ -247,14 +248,14 @@ resourceMetrics:
247248
- "0"
248249
- "0"
249250
- "0"
250-
- "2"
251+
- "1"
251252
- "0"
252253
- "0"
253254
- "0"
254255
- "0"
255256
- "0"
256257
- "0"
257-
count: "2"
258+
count: "1"
258259
explicitBounds:
259260
- 0.002
260261
- 0.004
@@ -273,8 +274,9 @@ resourceMetrics:
273274
- 10
274275
- 15
275276
startTimeUnixNano: "1000000"
276-
sum: 2
277+
sum: 1
277278
timeUnixNano: "2000000"
278-
name: traces_service_graph_request_client_seconds
279+
name: traces_service_graph_request_client
280+
unit: s
279281
scope:
280282
name: traces_service_graph

connector/servicegraphconnector/testdata/virtual-node-label-client-expected-metrics.yaml

+4-2
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ resourceMetrics:
6464
startTimeUnixNano: "1000000"
6565
sum: 1e-06
6666
timeUnixNano: "2000000"
67-
name: traces_service_graph_request_server_seconds
67+
name: traces_service_graph_request_server
68+
unit: s
6869
- histogram:
6970
aggregationTemporality: 2
7071
dataPoints:
@@ -100,6 +101,7 @@ resourceMetrics:
100101
startTimeUnixNano: "1000000"
101102
sum: 0
102103
timeUnixNano: "2000000"
103-
name: traces_service_graph_request_client_seconds
104+
name: traces_service_graph_request_client
105+
unit: s
104106
scope:
105107
name: traces_service_graph

connector/servicegraphconnector/testdata/virtual-node-label-server-expected-metrics.yaml

+4-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ resourceMetrics:
5858
startTimeUnixNano: "1000000"
5959
sum: 0
6060
timeUnixNano: "2000000"
61-
name: traces_service_graph_request_server_seconds
61+
name: traces_service_graph_request_server
62+
unit: s
6263
- histogram:
6364
aggregationTemporality: 2
6465
dataPoints:
@@ -91,6 +92,7 @@ resourceMetrics:
9192
startTimeUnixNano: "1000000"
9293
sum: 1e-06
9394
timeUnixNano: "2000000"
94-
name: traces_service_graph_request_client_seconds
95+
name: traces_service_graph_request_client
96+
unit: s
9597
scope:
9698
name: traces_service_graph

0 commit comments

Comments
 (0)