Skip to content

Commit 45ffc03

Browse files
Alex Greenbankcarrieedwardsjpkrohling
authored
[chore] [receiver/datadog] Add support for Service Checks (#34474)
Description: This PR adds support for Datadog Service Checks. Follow up of #33631 , #33957 and #34180. The full version of the code can be found in the cedwards/datadog-metrics-receiver-full branch, or in Grafana Alloy: https://github.com/grafana/alloy/tree/main/internal/etc/datadogreceiver Link to tracking Issue: #18278 Testing: Unit tests, as well as an end-to-end test, have been added. --------- Signed-off-by: alexgreenbank <[email protected]> Co-authored-by: Carrie Edwards <[email protected]> Co-authored-by: Juraci Paixão Kröhling <[email protected]>
1 parent cc5889d commit 45ffc03

File tree

4 files changed

+461
-3
lines changed

4 files changed

+461
-3
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package translator // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/datadogreceiver/internal/translator"
5+
6+
import (
7+
"time"
8+
9+
"github.com/DataDog/datadog-api-client-go/v2/api/datadogV1"
10+
"go.opentelemetry.io/collector/pdata/pcommon"
11+
"go.opentelemetry.io/collector/pdata/pmetric"
12+
13+
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/exp/metrics/identity"
14+
)
15+
16+
type ServiceCheck struct {
17+
Check string `json:"check"`
18+
HostName string `json:"host_name"`
19+
Status datadogV1.ServiceCheckStatus `json:"status"`
20+
Timestamp int64 `json:"timestamp,omitempty"`
21+
Tags []string `json:"tags,omitempty"`
22+
}
23+
24+
// More information on Datadog service checks: https://docs.datadoghq.com/api/latest/service-checks/
25+
func (mt *MetricsTranslator) TranslateServices(services []ServiceCheck) pmetric.Metrics {
26+
bt := newBatcher()
27+
bt.Metrics = pmetric.NewMetrics()
28+
29+
for _, service := range services {
30+
metricProperties := parseSeriesProperties("service_check", "service_check", service.Tags, service.HostName, mt.buildInfo.Version, mt.stringPool)
31+
metric, metricID := bt.Lookup(metricProperties) // TODO(alexg): proper name
32+
33+
dps := metric.Gauge().DataPoints()
34+
dps.EnsureCapacity(1)
35+
36+
dp := dps.AppendEmpty()
37+
dp.SetTimestamp(pcommon.Timestamp(service.Timestamp * time.Second.Nanoseconds())) // OTel uses nanoseconds, while Datadog uses seconds
38+
metricProperties.dpAttrs.CopyTo(dp.Attributes())
39+
dp.SetIntValue(int64(service.Status))
40+
41+
// TODO(alexg): Do this stream thing for service check metrics?
42+
stream := identity.OfStream(metricID, dp)
43+
ts, ok := mt.streamHasTimestamp(stream)
44+
if ok {
45+
dp.SetStartTimestamp(ts)
46+
}
47+
mt.updateLastTsForStream(stream, dp.Timestamp())
48+
}
49+
return bt.Metrics
50+
}
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package translator
5+
6+
import (
7+
"encoding/json"
8+
"testing"
9+
10+
"github.com/DataDog/datadog-api-client-go/v2/api/datadogV1"
11+
"github.com/stretchr/testify/assert"
12+
"github.com/stretchr/testify/require"
13+
"go.opentelemetry.io/collector/component"
14+
"go.opentelemetry.io/collector/pdata/pcommon"
15+
"go.opentelemetry.io/collector/pdata/pmetric"
16+
)
17+
18+
var (
19+
testTimestamp = int64(1700000000)
20+
)
21+
22+
func TestHandleStructureParsing(t *testing.T) {
23+
tests := []struct {
24+
name string
25+
checkRunPayload []byte
26+
expectedServices []ServiceCheck
27+
}{
28+
{
29+
name: "happy",
30+
checkRunPayload: []byte(`[
31+
{
32+
"check": "datadog.agent.check_status",
33+
"host_name": "hosta",
34+
"status": 0,
35+
"message": "",
36+
"tags": [
37+
"check:container"
38+
]
39+
},
40+
{
41+
"check": "app.working",
42+
"host_name": "hosta",
43+
"timestamp": 1700000000,
44+
"status": 0,
45+
"message": "",
46+
"tags": null
47+
},
48+
{
49+
"check": "env.test",
50+
"host_name": "hosta",
51+
"status": 0,
52+
"message": "",
53+
"tags": [
54+
"env:argle", "foo:bargle"
55+
]
56+
}
57+
]`),
58+
expectedServices: []ServiceCheck{
59+
{
60+
Check: "datadog.agent.check_status",
61+
HostName: "hosta",
62+
Status: 0,
63+
Tags: []string{"check:container"},
64+
},
65+
{
66+
Check: "app.working",
67+
HostName: "hosta",
68+
Status: 0,
69+
Timestamp: 1700000000,
70+
},
71+
{
72+
Check: "env.test",
73+
HostName: "hosta",
74+
Status: 0,
75+
Tags: []string{"env:argle", "foo:bargle"},
76+
},
77+
},
78+
},
79+
{
80+
name: "happy no tags",
81+
checkRunPayload: []byte(`[
82+
{
83+
"check": "app.working",
84+
"host_name": "hosta",
85+
"timestamp": 1700000000,
86+
"status": 0,
87+
"message": "",
88+
"tags": null
89+
}
90+
]`),
91+
expectedServices: []ServiceCheck{
92+
{
93+
Check: "app.working",
94+
HostName: "hosta",
95+
Status: 0,
96+
Timestamp: 1700000000,
97+
},
98+
},
99+
},
100+
{
101+
name: "happy no timestamp",
102+
checkRunPayload: []byte(`[
103+
{
104+
"check": "env.test",
105+
"host_name": "hosta",
106+
"status": 0,
107+
"message": "",
108+
"tags": [
109+
"env:argle", "foo:bargle"
110+
]
111+
}
112+
]`),
113+
expectedServices: []ServiceCheck{
114+
{
115+
Check: "env.test",
116+
HostName: "hosta",
117+
Status: 0,
118+
Tags: []string{"env:argle", "foo:bargle"},
119+
},
120+
},
121+
},
122+
{
123+
name: "empty",
124+
checkRunPayload: []byte(`[]`),
125+
expectedServices: []ServiceCheck{},
126+
},
127+
{
128+
name: "happy no hostname",
129+
checkRunPayload: []byte(`[
130+
{
131+
"check": "env.test",
132+
"status": 0,
133+
"message": "",
134+
"tags": [
135+
"env:argle", "foo:bargle"
136+
]
137+
}
138+
]`),
139+
expectedServices: []ServiceCheck{
140+
{
141+
Check: "env.test",
142+
Status: 0,
143+
Tags: []string{"env:argle", "foo:bargle"},
144+
},
145+
},
146+
},
147+
{
148+
name: "empty",
149+
checkRunPayload: []byte(`[]`),
150+
expectedServices: []ServiceCheck{},
151+
},
152+
}
153+
154+
for _, tt := range tests {
155+
t.Run(tt.name, func(t *testing.T) {
156+
var services []ServiceCheck
157+
err := json.Unmarshal(tt.checkRunPayload, &services)
158+
require.NoError(t, err, "Failed to unmarshal service payload JSON")
159+
assert.Equal(t, tt.expectedServices, services, "Parsed series does not match expected series")
160+
})
161+
}
162+
}
163+
164+
func TestTranslateCheckRun(t *testing.T) {
165+
tests := []struct {
166+
name string
167+
services []ServiceCheck
168+
expect func(t *testing.T, result pmetric.Metrics)
169+
}{
170+
{
171+
name: "OK status, with TS, no tags, no hostname",
172+
services: []ServiceCheck{
173+
{
174+
Check: "app.working",
175+
Timestamp: 1700000000,
176+
Status: datadogV1.SERVICECHECKSTATUS_OK,
177+
Tags: []string{},
178+
},
179+
},
180+
expect: func(t *testing.T, result pmetric.Metrics) {
181+
expectedAttrs := tagsToAttributes([]string{}, "", newStringPool())
182+
require.Equal(t, 1, result.ResourceMetrics().Len())
183+
requireResourceAttributes(t, result.ResourceMetrics().At(0).Resource().Attributes(), expectedAttrs.resource)
184+
require.Equal(t, 1, result.MetricCount())
185+
require.Equal(t, 1, result.DataPointCount())
186+
187+
requireScope(t, result, expectedAttrs.scope, component.NewDefaultBuildInfo().Version)
188+
189+
metric := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0)
190+
requireGauge(t, metric, "service_check", 1)
191+
192+
dp := metric.Gauge().DataPoints().At(0)
193+
requireDp(t, dp, expectedAttrs.dp, 1700000000, 0)
194+
},
195+
},
196+
{
197+
name: "OK status, no TS",
198+
services: []ServiceCheck{
199+
{
200+
Check: "app.working",
201+
HostName: "foo",
202+
Status: datadogV1.SERVICECHECKSTATUS_OK,
203+
Tags: []string{"env:tag1", "version:tag2"},
204+
},
205+
},
206+
expect: func(t *testing.T, result pmetric.Metrics) {
207+
expectedAttrs := tagsToAttributes([]string{"env:tag1", "version:tag2"}, "foo", newStringPool())
208+
require.Equal(t, 1, result.ResourceMetrics().Len())
209+
requireResourceAttributes(t, result.ResourceMetrics().At(0).Resource().Attributes(), expectedAttrs.resource)
210+
require.Equal(t, 1, result.MetricCount())
211+
require.Equal(t, 1, result.DataPointCount())
212+
213+
requireScope(t, result, expectedAttrs.scope, component.NewDefaultBuildInfo().Version)
214+
215+
metric := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0)
216+
requireGauge(t, metric, "service_check", 1)
217+
218+
dp := metric.Gauge().DataPoints().At(0)
219+
requireDp(t, dp, expectedAttrs.dp, 0, 0)
220+
},
221+
},
222+
}
223+
224+
for _, tt := range tests {
225+
t.Run(tt.name, func(t *testing.T) {
226+
mt := createMetricsTranslator()
227+
mt.buildInfo = component.BuildInfo{
228+
Command: "otelcol",
229+
Description: "OpenTelemetry Collector",
230+
Version: "latest",
231+
}
232+
result := mt.TranslateServices(tt.services)
233+
234+
tt.expect(t, result)
235+
})
236+
}
237+
}
238+
239+
func TestTranslateCheckRunStatuses(t *testing.T) {
240+
tests := []struct {
241+
name string
242+
services []ServiceCheck
243+
expectedStatus int64
244+
}{
245+
{
246+
name: "OK status, no TS",
247+
services: []ServiceCheck{
248+
{
249+
Check: "app.working",
250+
HostName: "foo",
251+
Status: datadogV1.SERVICECHECKSTATUS_OK,
252+
Tags: []string{"env:tag1", "version:tag2"},
253+
},
254+
},
255+
expectedStatus: 0,
256+
},
257+
{
258+
name: "Warning status",
259+
services: []ServiceCheck{
260+
{
261+
Check: "app.warning",
262+
HostName: "foo",
263+
Status: datadogV1.SERVICECHECKSTATUS_WARNING,
264+
Tags: []string{"env:tag1", "version:tag2"},
265+
Timestamp: testTimestamp,
266+
},
267+
},
268+
expectedStatus: 1,
269+
},
270+
{
271+
name: "Critical status",
272+
services: []ServiceCheck{
273+
{
274+
Check: "app.critical",
275+
HostName: "foo",
276+
Status: datadogV1.SERVICECHECKSTATUS_CRITICAL,
277+
Tags: []string{"env:tag1", "version:tag2"},
278+
Timestamp: testTimestamp,
279+
},
280+
},
281+
expectedStatus: 2,
282+
},
283+
{
284+
name: "Unknown status",
285+
services: []ServiceCheck{
286+
{
287+
Check: "app.unknown",
288+
HostName: "foo",
289+
Status: datadogV1.SERVICECHECKSTATUS_UNKNOWN,
290+
Tags: []string{"env:tag1", "version:tag2"},
291+
Timestamp: testTimestamp,
292+
},
293+
},
294+
expectedStatus: 3,
295+
},
296+
}
297+
298+
for _, tt := range tests {
299+
t.Run(tt.name, func(t *testing.T) {
300+
mt := createMetricsTranslator()
301+
mt.buildInfo = component.BuildInfo{
302+
Command: "otelcol",
303+
Description: "OpenTelemetry Collector",
304+
Version: "latest",
305+
}
306+
result := mt.TranslateServices(tt.services)
307+
308+
require.Equal(t, 1, result.MetricCount())
309+
require.Equal(t, 1, result.DataPointCount())
310+
311+
requireScopeMetrics(t, result, 1, 1)
312+
313+
requireScope(t, result, pcommon.NewMap(), component.NewDefaultBuildInfo().Version)
314+
315+
metrics := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics()
316+
for i := 0; i < metrics.Len(); i++ {
317+
metric := metrics.At(i)
318+
assert.Equal(t, tt.expectedStatus, metric.Gauge().DataPoints().At(0).IntValue())
319+
}
320+
})
321+
}
322+
}

0 commit comments

Comments
 (0)