Skip to content

Commit da20704

Browse files
authored
feat: add smart sampling for OTel tracing (#2343)
1 parent 049205b commit da20704

File tree

4 files changed

+132
-4
lines changed

4 files changed

+132
-4
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
'use strict';
2+
3+
const { BatchSpanProcessor } = require('@opentelemetry/sdk-trace-base');
4+
const { diag } = require('@opentelemetry/api');
5+
6+
class OutlierDetectionBatchSpanProcessor extends BatchSpanProcessor {
7+
constructor(exporter, config) {
8+
super(exporter, config);
9+
this._traces = new Map();
10+
}
11+
12+
onEnd(span) {
13+
if (span.instrumentationLibrary.name === 'artillery-playwright') {
14+
super.onEnd(span);
15+
} else {
16+
const traceId = span.spanContext().traceId;
17+
18+
// When an outlier span is recognised the whole trace it belongs to is exported, so all the spans that belong to the trace need to be grouped and held until the trace finishes.
19+
if (!this._traces.has(traceId)) {
20+
this._traces.set(traceId, {
21+
spans: [],
22+
hasOutlier: false
23+
});
24+
}
25+
const traceData = this._traces.get(traceId);
26+
traceData.spans.push(span);
27+
28+
// Since only request level spans are screened for outliers, the outlier check is performed only if the span is a request level span - has 'http.url' attribute
29+
if (span.attributes['http.url'] && this._isOutlier(span)) {
30+
traceData.hasOutlier = true;
31+
}
32+
33+
// The trace ends when the root span ends, so we only filter and send data when the span that ended is the root span
34+
// The traces that do not have outlier spans are dropped and the rest is sent to buffer/export
35+
if (!span.parentSpanId) {
36+
if (traceData.hasOutlier) {
37+
traceData.spans.forEach(super.onEnd, this);
38+
}
39+
this._traces.delete(traceId);
40+
}
41+
}
42+
}
43+
// Export only outliers on shut down as well for http engine
44+
onShutdown() {
45+
this._traces.forEach((traceData, traceId) => {
46+
if (traceData.hasOutlier) {
47+
traceData.spans.forEach(super.onEnd, this);
48+
}
49+
});
50+
this._traces.clear();
51+
// By here all the available HTTP engine traces are processed and sent to buffer, the parent onShutDown will cover for possible playwright engine traces and the shutdown process
52+
super.onShutdown();
53+
}
54+
55+
// The outlier detection logic based on the provided criteria
56+
_isOutlier(span) {
57+
return !!span.attributes.outlier;
58+
}
59+
}
60+
61+
module.exports = { OutlierDetectionBatchSpanProcessor };

packages/artillery-plugin-publish-metrics/lib/open-telemetry/tracing/base.js

+8-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
const debug = require('debug')('plugin:publish-metrics:open-telemetry');
44
const grpc = require('@grpc/grpc-js');
55
const { traceExporters, validateExporter } = require('../exporters');
6+
const {
7+
OutlierDetectionBatchSpanProcessor
8+
} = require('../outlier-detection-processor');
69

710
const { SemanticAttributes } = require('@opentelemetry/semantic-conventions');
811
const {
@@ -57,8 +60,12 @@ class OTelTraceConfig {
5760
this.exporterOpts
5861
);
5962

63+
const Processor = this.config.smartSampling
64+
? OutlierDetectionBatchSpanProcessor
65+
: BatchSpanProcessor;
66+
6067
this.tracerProvider.addSpanProcessor(
61-
new BatchSpanProcessor(this.exporter, {
68+
new Processor(this.exporter, {
6269
scheduledDelayMillis: 1000
6370
})
6471
);

packages/artillery-plugin-publish-metrics/lib/open-telemetry/tracing/http.js

+56-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ const {
1515
class OTelHTTPTraceReporter extends OTelTraceBase {
1616
constructor(config, script) {
1717
super(config, script);
18+
this.outlierCriteria = config.smartSampling;
19+
this.statusAsErrorThreshold = 400;
1820
}
1921
run() {
2022
this.setTracer('http');
@@ -86,10 +88,14 @@ class OTelHTTPTraceReporter extends OTelTraceBase {
8688
if (!userContext.vars['__otlpHTTPRequestSpan']) {
8789
return done();
8890
}
89-
9091
const span = userContext.vars['__otlpHTTPRequestSpan'];
9192
let endTime;
9293

94+
const scenarioSpan = userContext.vars['__httpScenarioSpan'];
95+
if (this.config.smartSampling) {
96+
this.tagResponseOutliers(span, scenarioSpan, res, this.outlierCriteria);
97+
}
98+
9399
if (res.timings && res.timings.phases) {
94100
span.setAttribute('response.time.ms', res.timings.phases.firstByte);
95101

@@ -134,7 +140,7 @@ class OTelHTTPTraceReporter extends OTelTraceBase {
134140
res.request.options.headers['user-agent']
135141
});
136142

137-
if (res.statusCode >= 400) {
143+
if (res.statusCode >= this.statusAsErrorThreshold) {
138144
span.setStatus({
139145
code: SpanStatusCode.ERROR,
140146
message: res.statusMessage
@@ -161,6 +167,14 @@ class OTelHTTPTraceReporter extends OTelTraceBase {
161167
code: SpanStatusCode.ERROR,
162168
message: err.message || err
163169
});
170+
171+
if (this.config.smartSampling) {
172+
requestSpan.setAttributes({
173+
outlier: 'true',
174+
'outlier.type.error': true
175+
});
176+
}
177+
164178
requestSpan.end();
165179
this.pendingRequestSpans--;
166180
} else {
@@ -171,10 +185,50 @@ class OTelHTTPTraceReporter extends OTelTraceBase {
171185
code: SpanStatusCode.ERROR,
172186
message: err.message || err
173187
});
188+
189+
if (this.config.smartSampling) {
190+
scenarioSpan.setAttributes({
191+
outlier: 'true',
192+
'outlier.type.error': true
193+
});
194+
}
195+
174196
scenarioSpan.end();
175197
this.pendingScenarioSpans--;
176198
return done();
177199
}
200+
201+
tagResponseOutliers(span, scenarioSpan, res, criteria) {
202+
const types = {};
203+
const details = [];
204+
if (res.statusCode >= this.statusAsErrorThreshold) {
205+
types['outlier.type.status_code'] = true;
206+
details.push(`HTTP Status Code >= ${this.statusAsErrorThreshold}`);
207+
}
208+
if (criteria.thresholds && res.timings?.phases) {
209+
Object.entries(criteria.thresholds).forEach(([name, value]) => {
210+
if (res.timings.phases[name] >= value) {
211+
types[`outlier.type.${name}`] = true;
212+
details.push(`'${name}' >= ${value}`);
213+
}
214+
});
215+
}
216+
217+
if (!details.length) {
218+
return;
219+
}
220+
221+
span.setAttributes({
222+
outlier: 'true',
223+
'outlier.details': details.join(', '),
224+
...types
225+
});
226+
227+
scenarioSpan.setAttributes({
228+
outlier: 'true',
229+
...types
230+
});
231+
}
178232
}
179233

180234
module.exports = {

packages/types/schema/plugins/publish-metrics.js

+7-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,13 @@ const OpenTelemetryReporterSchema = Joi.object({
181181
.default('otlp-http'),
182182
sampleRate: artilleryNumberOrString,
183183
useRequestNames: artilleryBooleanOrString,
184-
attributes: Joi.object().unknown()
184+
attributes: Joi.object().unknown(),
185+
smartSampling: Joi.object({
186+
thresholds: Joi.object({
187+
firstByte: artilleryNumberOrString,
188+
total: artilleryNumberOrString
189+
})
190+
})
185191
})
186192
})
187193
.unknown(false)

0 commit comments

Comments
 (0)