8
8
import static io .airbyte .metrics .lib .ApmTraceConstants .ACTIVITY_TRACE_OPERATION_NAME ;
9
9
import static io .airbyte .metrics .lib .ApmTraceConstants .Tags .ATTEMPT_NUMBER_KEY ;
10
10
import static io .airbyte .metrics .lib .ApmTraceConstants .Tags .CONNECTION_ID_KEY ;
11
+ import static io .airbyte .metrics .lib .ApmTraceConstants .Tags .FAILURE_ORIGINS_KEY ;
11
12
import static io .airbyte .metrics .lib .ApmTraceConstants .Tags .JOB_ID_KEY ;
12
13
import static io .airbyte .persistence .job .models .AttemptStatus .FAILED ;
13
14
22
23
import io .airbyte .config .Configs .WorkerEnvironment ;
23
24
import io .airbyte .config .DestinationConnection ;
24
25
import io .airbyte .config .FailureReason ;
26
+ import io .airbyte .config .FailureReason .FailureOrigin ;
25
27
import io .airbyte .config .JobConfig ;
26
28
import io .airbyte .config .JobOutput ;
27
29
import io .airbyte .config .JobSyncConfig ;
56
58
import io .airbyte .workers .run .TemporalWorkerRunFactory ;
57
59
import io .airbyte .workers .run .WorkerRun ;
58
60
import io .micronaut .context .annotation .Requires ;
61
+ import io .micronaut .core .util .CollectionUtils ;
59
62
import jakarta .inject .Singleton ;
60
63
import java .io .IOException ;
61
64
import java .nio .file .Path ;
67
70
import java .util .OptionalLong ;
68
71
import java .util .Set ;
69
72
import java .util .UUID ;
73
+ import java .util .stream .Collectors ;
70
74
import lombok .extern .slf4j .Slf4j ;
71
75
72
76
@ Slf4j
@@ -179,9 +183,8 @@ private void emitSrcIdDstIdToReleaseStagesMetric(final UUID srcId, final UUID ds
179
183
@ Override
180
184
public AttemptCreationOutput createNewAttempt (final AttemptCreationInput input ) throws RetryableException {
181
185
try {
182
- ApmTraceUtils .addTagsToTrace (Map .of (JOB_ID_KEY , input .getJobId ()));
183
-
184
186
final long jobId = input .getJobId ();
187
+ ApmTraceUtils .addTagsToTrace (Map .of (JOB_ID_KEY , jobId ));
185
188
final Job createdJob = jobPersistence .getJob (jobId );
186
189
187
190
final WorkerRun workerRun = temporalWorkerRunFactory .create (createdJob );
@@ -200,9 +203,8 @@ public AttemptCreationOutput createNewAttempt(final AttemptCreationInput input)
200
203
@ Override
201
204
public AttemptNumberCreationOutput createNewAttemptNumber (final AttemptCreationInput input ) throws RetryableException {
202
205
try {
203
- ApmTraceUtils .addTagsToTrace (Map .of (JOB_ID_KEY , input .getJobId ()));
204
-
205
206
final long jobId = input .getJobId ();
207
+ ApmTraceUtils .addTagsToTrace (Map .of (JOB_ID_KEY , jobId ));
206
208
final Job createdJob = jobPersistence .getJob (jobId );
207
209
208
210
final WorkerRun workerRun = temporalWorkerRunFactory .create (createdJob );
@@ -221,10 +223,9 @@ public AttemptNumberCreationOutput createNewAttemptNumber(final AttemptCreationI
221
223
@ Override
222
224
public void jobSuccess (final JobSuccessInput input ) {
223
225
try {
224
- ApmTraceUtils .addTagsToTrace (Map .of (ATTEMPT_NUMBER_KEY , input .getAttemptId (), JOB_ID_KEY , input .getJobId ()));
225
-
226
226
final long jobId = input .getJobId ();
227
227
final int attemptId = input .getAttemptId ();
228
+ ApmTraceUtils .addTagsToTrace (Map .of (ATTEMPT_NUMBER_KEY , attemptId , JOB_ID_KEY , jobId ));
228
229
229
230
if (input .getStandardSyncOutput () != null ) {
230
231
final JobOutput jobOutput = new JobOutput ().withSync (input .getStandardSyncOutput ());
@@ -287,12 +288,13 @@ public void jobFailure(final JobFailureInput input) {
287
288
@ Override
288
289
public void attemptFailure (final AttemptFailureInput input ) {
289
290
try {
290
- ApmTraceUtils .addTagsToTrace (Map .of (ATTEMPT_NUMBER_KEY , input .getAttemptId (), JOB_ID_KEY , input .getJobId ()));
291
-
292
291
final int attemptId = input .getAttemptId ();
293
292
final long jobId = input .getJobId ();
294
293
final AttemptFailureSummary failureSummary = input .getAttemptFailureSummary ();
295
294
295
+ ApmTraceUtils .addTagsToTrace (Map .of (ATTEMPT_NUMBER_KEY , attemptId , JOB_ID_KEY , jobId ));
296
+ traceFailures (failureSummary );
297
+
296
298
jobPersistence .failAttempt (jobId , attemptId );
297
299
jobPersistence .writeAttemptFailureSummary (jobId , attemptId , failureSummary );
298
300
@@ -302,11 +304,7 @@ public void attemptFailure(final AttemptFailureInput input) {
302
304
}
303
305
304
306
emitJobIdToReleaseStagesMetric (OssMetricsRegistry .ATTEMPT_FAILED_BY_RELEASE_STAGE , jobId );
305
- for (final FailureReason reason : failureSummary .getFailures ()) {
306
- MetricClientFactory .getMetricClient ().count (OssMetricsRegistry .ATTEMPT_FAILED_BY_FAILURE_ORIGIN , 1 ,
307
- new MetricAttribute (MetricTags .FAILURE_ORIGIN , MetricTags .getFailureOrigin (reason .getFailureOrigin ())));
308
- }
309
-
307
+ trackFailures (failureSummary );
310
308
} catch (final IOException e ) {
311
309
throw new RetryableException (e );
312
310
}
@@ -329,10 +327,9 @@ public void attemptFailureWithAttemptNumber(final AttemptNumberFailureInput inpu
329
327
@ Override
330
328
public void jobCancelled (final JobCancelledInput input ) {
331
329
try {
332
- ApmTraceUtils .addTagsToTrace (Map .of (ATTEMPT_NUMBER_KEY , input .getAttemptId (), JOB_ID_KEY , input .getJobId ()));
333
-
334
330
final long jobId = input .getJobId ();
335
331
final int attemptId = input .getAttemptId ();
332
+ ApmTraceUtils .addTagsToTrace (Map .of (ATTEMPT_NUMBER_KEY , attemptId , JOB_ID_KEY , jobId ));
336
333
jobPersistence .failAttempt (jobId , attemptId );
337
334
jobPersistence .writeAttemptFailureSummary (jobId , attemptId , input .getAttemptFailureSummary ());
338
335
jobPersistence .cancelJob (jobId );
@@ -487,4 +484,37 @@ private void trackCompletionForInternalFailure(final Long jobId,
487
484
jobTracker .trackSyncForInternalFailure (jobId , connectionId , attemptId , Enums .convertTo (status , JobState .class ), e );
488
485
}
489
486
487
+ /**
488
+ * Adds the failure origins to the APM trace.
489
+ *
490
+ * @param failureSummary The {@link AttemptFailureSummary} containing the failure reason(s).
491
+ */
492
+ private void traceFailures (final AttemptFailureSummary failureSummary ) {
493
+ if (failureSummary != null ) {
494
+ if (CollectionUtils .isNotEmpty (failureSummary .getFailures ())) {
495
+ ApmTraceUtils .addTagsToTrace (Map .of (FAILURE_ORIGINS_KEY , failureSummary .getFailures ().stream ().map (FailureReason ::getFailureOrigin ).map (
496
+ FailureOrigin ::name ).collect (Collectors .joining ("," ))));
497
+ }
498
+ } else {
499
+ ApmTraceUtils .addTagsToTrace (Map .of (FAILURE_ORIGINS_KEY , FailureOrigin .UNKNOWN .value ()));
500
+ }
501
+ }
502
+
503
+ /**
504
+ * Records a metric for each failure reason.
505
+ *
506
+ * @param failureSummary The {@link AttemptFailureSummary} containing the failure reason(s).
507
+ */
508
+ private void trackFailures (final AttemptFailureSummary failureSummary ) {
509
+ if (failureSummary != null ) {
510
+ for (final FailureReason reason : failureSummary .getFailures ()) {
511
+ MetricClientFactory .getMetricClient ().count (OssMetricsRegistry .ATTEMPT_FAILED_BY_FAILURE_ORIGIN , 1 ,
512
+ new MetricAttribute (MetricTags .FAILURE_ORIGIN , MetricTags .getFailureOrigin (reason .getFailureOrigin ())));
513
+ }
514
+ } else {
515
+ MetricClientFactory .getMetricClient ().count (OssMetricsRegistry .ATTEMPT_FAILED_BY_FAILURE_ORIGIN , 1 ,
516
+ new MetricAttribute (MetricTags .FAILURE_ORIGIN , FailureOrigin .UNKNOWN .value ()));
517
+ }
518
+ }
519
+
490
520
}
0 commit comments