@@ -305,10 +305,7 @@ TraceManager::Trace::~Trace()
305
305
setting_->WriteTrace (streams_);
306
306
} else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
307
307
#ifndef _WIN32
308
- auto root_span = opentelemetry::nostd::get<
309
- opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
310
- this ->otel_context_ .GetValue (kRootSpan ));
311
- EndSpanNow (root_span);
308
+ EndSpan (kRootSpan );
312
309
#else
313
310
LOG_ERROR << " Unsupported trace mode: "
314
311
<< TraceManager::InferenceTraceModeString (setting_->mode_ );
@@ -340,12 +337,7 @@ TraceManager::Trace::CaptureTimestamp(
340
337
<< " {\" name\" :\" " << name << " \" ,\" ns\" :" << timestamp_ns << " }]}" ;
341
338
} else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
342
339
#ifndef _WIN32
343
- auto root_span = opentelemetry::nostd::get<
344
- opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
345
- this ->otel_context_ .GetValue (kRootSpan ));
346
- root_span->AddEvent (
347
- name, otel_common::SystemTimestamp{
348
- time_offset_ + std::chrono::nanoseconds{timestamp_ns}});
340
+ AddEvent (kRootSpan , name, timestamp_ns);
349
341
#else
350
342
LOG_ERROR << " Unsupported trace mode: "
351
343
<< TraceManager::InferenceTraceModeString (setting_->mode_ );
@@ -379,26 +371,22 @@ TraceManager::Trace::InitTracer(
379
371
std::chrono::duration_cast<std::chrono::nanoseconds>(
380
372
std::chrono::steady_clock::now ().time_since_epoch ())
381
373
.count ();
382
- auto otel_start_timestamp = otel_common::SystemTimestamp{
383
- time_offset_ + std::chrono::nanoseconds{steady_timestamp_ns}};
384
- auto root_span = InitSpan (
385
- " InferRequest" , otel_start_timestamp, steady_timestamp_ns,
386
- true /* is_root_span*/ );
374
+ auto root_span =
375
+ StartSpan (" InferRequest" , steady_timestamp_ns, true /* is_root_span*/ );
387
376
// Initializing OTel context and storring "InferRequest" span as a root span
388
377
// to keep it alive for the duration of the request.
389
378
otel_context_ = opentelemetry::context::Context ({kRootSpan , root_span});
390
379
}
391
380
392
381
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>
393
- TraceManager::Trace::InitSpan (
394
- std::string name, const otel_common::SystemTimestamp& timestamp_ns,
395
- const uint64_t & raw_timestamp_ns, bool is_root_span,
396
- std::string parent_span_key)
382
+ TraceManager::Trace::StartSpan (
383
+ std::string display_name, const uint64_t & raw_timestamp_ns,
384
+ bool is_root_span, std::string parent_span_key)
397
385
{
398
- opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span{nullptr };
399
386
otel_trace_api::StartSpanOptions options;
400
387
options.kind = otel_trace_api::SpanKind::kServer ;
401
- options.start_system_time = timestamp_ns;
388
+ options.start_system_time =
389
+ time_offset_ + std::chrono::nanoseconds{raw_timestamp_ns};
402
390
options.start_steady_time =
403
391
otel_common::SteadyTimestamp{std::chrono::nanoseconds{raw_timestamp_ns}};
404
392
@@ -410,29 +398,32 @@ TraceManager::Trace::InitSpan(
410
398
otel_context_.GetValue (parent_span_key));
411
399
options.parent = parent_span->GetContext ();
412
400
}
413
- span = provider_->GetTracer (kTritonTracer )->StartSpan (name, options);
414
- return span;
401
+ return provider_->GetTracer (kTritonTracer )->StartSpan (display_name, options);
415
402
}
416
403
417
404
void
418
- TraceManager::Trace::EndSpanNow (
419
- opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span)
405
+ TraceManager::Trace::EndSpan (std::string span_key)
420
406
{
421
- if (span != nullptr ) {
422
- auto timestamp_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
423
- std::chrono::steady_clock::now ().time_since_epoch ())
424
- .count ();
425
- EndSpan (span, timestamp_ns);
426
- }
407
+ auto timestamp_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
408
+ std::chrono::steady_clock::now ().time_since_epoch ())
409
+ .count ();
410
+ EndSpan (span_key, timestamp_ns);
427
411
}
428
412
429
413
430
414
void
431
415
TraceManager::Trace::EndSpan (
432
- opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span,
433
- const uint64_t & raw_timestamp_ns)
416
+ std::string span_key, const uint64_t & raw_timestamp_ns)
434
417
{
435
- if (span != nullptr ) {
418
+ if (otel_context_.HasKey (span_key)) {
419
+ auto span = opentelemetry::nostd::get<
420
+ opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
421
+ otel_context_.GetValue (span_key));
422
+
423
+ if (span == nullptr ) {
424
+ return ;
425
+ }
426
+
436
427
otel_trace_api::EndSpanOptions end_options;
437
428
end_options.end_steady_time = otel_common::SteadyTimestamp{
438
429
std::chrono::nanoseconds{raw_timestamp_ns}};
@@ -453,106 +444,16 @@ TraceManager::Trace::ReportToOpenTelemetry(
453
444
LOG_TRITONSERVER_ERROR (
454
445
TRITONSERVER_InferenceTraceId (trace, &id), " getting trace id" );
455
446
current_span_key = current_span_key + std::to_string (id);
456
- switch (activity) {
457
- case TRITONSERVER_TRACE_REQUEST_START: {
458
- const char * model_name;
459
- int64_t model_version;
460
- uint64_t parent_id;
461
- const char * request_id;
462
-
463
- LOG_TRITONSERVER_ERROR (
464
- TRITONSERVER_InferenceTraceModelName (trace, &model_name),
465
- " getting model name" );
466
- LOG_TRITONSERVER_ERROR (
467
- TRITONSERVER_InferenceTraceModelVersion (trace, &model_version),
468
- " getting model version" );
469
- LOG_TRITONSERVER_ERROR (
470
- TRITONSERVER_InferenceTraceParentId (trace, &parent_id),
471
- " getting trace parent id" );
472
- LOG_TRITONSERVER_ERROR (
473
- TRITONSERVER_InferenceTraceRequestId (trace, &request_id),
474
- " getting request id" );
475
-
476
- // Currently, only 2 types of sub-spans are supported:
477
- // request span and compute span. Compute span is a leaf span
478
- // and can not be a parent of any sub-span. If parent_id=0,
479
- // then current model is either a standalone model, or an ensemble model.
480
- // In both of the above cases, the parent of the new request sub-span
481
- // is the kRootSpan.
482
- std::string parent_span_key = kRootSpan ;
483
- if (parent_id != 0 ) {
484
- // If parent_id > 0, then this is a child trace, spawned from
485
- // the ensamble's main request. For this instance, the parent
486
- // span is the ensembles's request span.
487
- parent_span_key = kRequestSpan + std::to_string (parent_id);
488
- }
489
- auto request_span = this ->InitSpan (
490
- model_name,
491
- this ->time_offset_ + std::chrono::nanoseconds{timestamp_ns},
492
- timestamp_ns, false /* is_root_span*/ , parent_span_key);
493
- request_span->SetAttribute (" triton.model_name" , model_name);
494
- request_span->SetAttribute (" triton.model_version" , model_version);
495
- request_span->SetAttribute (" triton.trace_id" , id);
496
- request_span->SetAttribute (" triton.trace_parent_id" , parent_id);
497
- request_span->AddEvent (
498
- TRITONSERVER_InferenceTraceActivityString (activity),
499
- this ->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
500
- // Adding span to the OpenTelemetry context, to keep it alive and active
501
- // until we end it
502
- this ->otel_context_ =
503
- this ->otel_context_ .SetValue (current_span_key, request_span);
504
- break ;
505
- }
506
- case TRITONSERVER_TRACE_COMPUTE_START: {
507
- // Any compute span is a child of a request span with the same id.
508
- auto compute_span = this ->InitSpan (
509
- " compute" ,
510
- this ->time_offset_ + std::chrono::nanoseconds{timestamp_ns},
511
- timestamp_ns, false /* is_root_span*/ ,
512
- kRequestSpan + std::to_string (id));
513
- compute_span->AddEvent (
514
- TRITONSERVER_InferenceTraceActivityString (activity),
515
- this ->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
516
- // Adding span to the OpenTelemetry context, to keep it alive and active
517
- // until we end it
518
- this ->otel_context_ =
519
- this ->otel_context_ .SetValue (current_span_key, compute_span);
520
- break ;
521
- }
522
- case TRITONSERVER_TRACE_QUEUE_START:
523
- case TRITONSERVER_TRACE_COMPUTE_INPUT_END:
524
- case TRITONSERVER_TRACE_COMPUTE_OUTPUT_START: {
525
- if (this ->otel_context_ .HasKey (current_span_key)) {
526
- auto span = opentelemetry::nostd::get<
527
- opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
528
- this ->otel_context_ .GetValue (current_span_key));
529
- span->AddEvent (
530
- TRITONSERVER_InferenceTraceActivityString (activity),
531
- this ->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
532
- }
533
- break ;
534
- }
535
- case TRITONSERVER_TRACE_COMPUTE_END:
536
- case TRITONSERVER_TRACE_REQUEST_END: {
537
- if (this ->otel_context_ .HasKey (current_span_key)) {
538
- auto span = opentelemetry::nostd::get<
539
- opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
540
- this ->otel_context_ .GetValue (current_span_key));
541
- span->AddEvent (
542
- TRITONSERVER_InferenceTraceActivityString (activity),
543
- this ->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
544
- this ->EndSpan (span, timestamp_ns);
545
- }
546
- break ;
547
- }
548
- case TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT:
549
- case TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT:
550
- case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT:
551
- default : {
552
- LOG_ERROR << " Unsupported activity: "
553
- << TRITONSERVER_InferenceTraceActivityString (activity);
554
- break ;
555
- }
447
+
448
+ MaybeStartSpan (current_span_key, trace, activity, timestamp_ns, id);
449
+
450
+ AddEvent (
451
+ current_span_key, TRITONSERVER_InferenceTraceActivityString (activity),
452
+ timestamp_ns);
453
+
454
+ if (activity == TRITONSERVER_TRACE_REQUEST_END ||
455
+ activity == TRITONSERVER_TRACE_COMPUTE_END) {
456
+ EndSpan (current_span_key, timestamp_ns);
556
457
}
557
458
}
558
459
@@ -590,6 +491,69 @@ TraceManager::Trace::GetSpanNameForActivity(
590
491
return span_name;
591
492
}
592
493
494
+ void
495
+ TraceManager::Trace::AddEvent (
496
+ std::string span_key, std::string event, uint64_t timestamp)
497
+ {
498
+ if (otel_context_.HasKey (span_key)) {
499
+ auto span = opentelemetry::nostd::get<
500
+ opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
501
+ otel_context_.GetValue (span_key));
502
+ span->AddEvent (event, time_offset_ + std::chrono::nanoseconds{timestamp});
503
+ }
504
+ }
505
+
506
+ void
507
+ TraceManager::Trace::MaybeStartSpan (
508
+ std::string span_key, TRITONSERVER_InferenceTrace* trace,
509
+ TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
510
+ uint64_t id)
511
+ {
512
+ if (activity != TRITONSERVER_TRACE_REQUEST_START &&
513
+ activity != TRITONSERVER_TRACE_COMPUTE_START) {
514
+ return ;
515
+ }
516
+
517
+ // Currently, only 2 types of sub-spans are supported:
518
+ // request span and compute span. Compute span is a leaf span
519
+ // and can not be a parent of any sub-span. If parent_id==0,
520
+ // then current model is either a standalone model, or an ensemble model.
521
+ // In both cases, the parent of the new request sub-span is the kRootSpan.
522
+ // If parent_id > 0, then this is a child trace, spawned from
523
+ // the ensamble's main request. For this instance, the parent
524
+ // span is the ensembles's request span.
525
+ uint64_t parent_id;
526
+ LOG_TRITONSERVER_ERROR (
527
+ TRITONSERVER_InferenceTraceParentId (trace, &parent_id),
528
+ " getting trace parent id" );
529
+ std::string parent_span_key =
530
+ (parent_id != 0 ) ? kRequestSpan + std::to_string (parent_id) : kRootSpan ;
531
+
532
+ std::string display_name = " compute" ;
533
+ const char * model_name;
534
+ if (activity == TRITONSERVER_TRACE_REQUEST_START) {
535
+ LOG_TRITONSERVER_ERROR (
536
+ TRITONSERVER_InferenceTraceModelName (trace, &model_name),
537
+ " getting model name" );
538
+ display_name = model_name;
539
+ }
540
+
541
+ auto span = StartSpan (
542
+ display_name, timestamp_ns, false /* is_root_span*/ , parent_span_key);
543
+
544
+ if (activity == TRITONSERVER_TRACE_REQUEST_START) {
545
+ int64_t model_version;
546
+ LOG_TRITONSERVER_ERROR (
547
+ TRITONSERVER_InferenceTraceModelVersion (trace, &model_version),
548
+ " getting model version" );
549
+ span->SetAttribute (" triton.model_name" , model_name);
550
+ span->SetAttribute (" triton.model_version" , model_version);
551
+ span->SetAttribute (" triton.trace_id" , id);
552
+ span->SetAttribute (" triton.trace_parent_id" , parent_id);
553
+ }
554
+
555
+ otel_context_ = otel_context_.SetValue (span_key, span);
556
+ }
593
557
#endif
594
558
595
559
void
@@ -650,17 +614,15 @@ TraceManager::TraceActivity(
650
614
651
615
std::stringstream* ss = nullptr ;
652
616
{
653
- if (ts->setting_ ->mode_ == TRACE_MODE_TRITON) {
654
- if (ts->streams_ .find (id) == ts->streams_ .end ()) {
655
- std::unique_ptr<std::stringstream> stream (new std::stringstream ());
656
- ss = stream.get ();
657
- ts->streams_ .emplace (id, std::move (stream));
658
- } else {
659
- ss = ts->streams_ [id].get ();
660
- // If the string stream is not newly created, add "," as there is
661
- // already content in the string stream
662
- *ss << " ," ;
663
- }
617
+ if (ts->streams_ .find (id) == ts->streams_ .end ()) {
618
+ std::unique_ptr<std::stringstream> stream (new std::stringstream ());
619
+ ss = stream.get ();
620
+ ts->streams_ .emplace (id, std::move (stream));
621
+ } else {
622
+ ss = ts->streams_ [id].get ();
623
+ // If the string stream is not newly created, add "," as there is
624
+ // already content in the string stream
625
+ *ss << " ," ;
664
626
}
665
627
}
666
628
// If 'activity' is TRITONSERVER_TRACE_REQUEST_START then collect
@@ -684,26 +646,22 @@ TraceManager::TraceActivity(
684
646
TRITONSERVER_InferenceTraceRequestId (trace, &request_id),
685
647
" getting request id" );
686
648
687
- if (ts->setting_ ->mode_ == TRACE_MODE_TRITON) {
688
- *ss << " {\" id\" :" << id << " ,\" model_name\" :\" " << model_name
689
- << " \" ,\" model_version\" :" << model_version;
649
+ *ss << " {\" id\" :" << id << " ,\" model_name\" :\" " << model_name
650
+ << " \" ,\" model_version\" :" << model_version;
690
651
691
- if (std::string (request_id) != " " ) {
692
- *ss << " ,\" request_id\" :\" " << request_id << " \" " ;
693
- }
652
+ if (std::string (request_id) != " " ) {
653
+ *ss << " ,\" request_id\" :\" " << request_id << " \" " ;
654
+ }
694
655
695
- if (parent_id != 0 ) {
696
- *ss << " ,\" parent_id\" :" << parent_id;
697
- }
698
- *ss << " }," ;
656
+ if (parent_id != 0 ) {
657
+ *ss << " ,\" parent_id\" :" << parent_id;
699
658
}
659
+ *ss << " }," ;
700
660
}
701
661
702
- if (ts->setting_ ->mode_ == TRACE_MODE_TRITON) {
703
- *ss << " {\" id\" :" << id << " ,\" timestamps\" :["
704
- << " {\" name\" :\" " << TRITONSERVER_InferenceTraceActivityString (activity)
705
- << " \" ,\" ns\" :" << timestamp_ns << " }]}" ;
706
- }
662
+ *ss << " {\" id\" :" << id << " ,\" timestamps\" :["
663
+ << " {\" name\" :\" " << TRITONSERVER_InferenceTraceActivityString (activity)
664
+ << " \" ,\" ns\" :" << timestamp_ns << " }]}" ;
707
665
}
708
666
709
667
void
0 commit comments