Skip to content

Commit eaf9871

Browse files
committed
Revision 3
1 parent 0f3d51a commit eaf9871

File tree

2 files changed

+158
-189
lines changed

2 files changed

+158
-189
lines changed

src/tracer.cc

+118-160
Original file line numberDiff line numberDiff line change
@@ -305,10 +305,7 @@ TraceManager::Trace::~Trace()
305305
setting_->WriteTrace(streams_);
306306
} else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
307307
#ifndef _WIN32
308-
auto root_span = opentelemetry::nostd::get<
309-
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
310-
this->otel_context_.GetValue(kRootSpan));
311-
EndSpanNow(root_span);
308+
EndSpan(kRootSpan);
312309
#else
313310
LOG_ERROR << "Unsupported trace mode: "
314311
<< TraceManager::InferenceTraceModeString(setting_->mode_);
@@ -340,12 +337,7 @@ TraceManager::Trace::CaptureTimestamp(
340337
<< "{\"name\":\"" << name << "\",\"ns\":" << timestamp_ns << "}]}";
341338
} else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
342339
#ifndef _WIN32
343-
auto root_span = opentelemetry::nostd::get<
344-
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
345-
this->otel_context_.GetValue(kRootSpan));
346-
root_span->AddEvent(
347-
name, otel_common::SystemTimestamp{
348-
time_offset_ + std::chrono::nanoseconds{timestamp_ns}});
340+
AddEvent(kRootSpan, name, timestamp_ns);
349341
#else
350342
LOG_ERROR << "Unsupported trace mode: "
351343
<< TraceManager::InferenceTraceModeString(setting_->mode_);
@@ -379,26 +371,22 @@ TraceManager::Trace::InitTracer(
379371
std::chrono::duration_cast<std::chrono::nanoseconds>(
380372
std::chrono::steady_clock::now().time_since_epoch())
381373
.count();
382-
auto otel_start_timestamp = otel_common::SystemTimestamp{
383-
time_offset_ + std::chrono::nanoseconds{steady_timestamp_ns}};
384-
auto root_span = InitSpan(
385-
"InferRequest", otel_start_timestamp, steady_timestamp_ns,
386-
true /*is_root_span*/);
374+
auto root_span =
375+
StartSpan("InferRequest", steady_timestamp_ns, true /*is_root_span*/);
387376
// Initializing OTel context and storring "InferRequest" span as a root span
388377
// to keep it alive for the duration of the request.
389378
otel_context_ = opentelemetry::context::Context({kRootSpan, root_span});
390379
}
391380

392381
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>
393-
TraceManager::Trace::InitSpan(
394-
std::string name, const otel_common::SystemTimestamp& timestamp_ns,
395-
const uint64_t& raw_timestamp_ns, bool is_root_span,
396-
std::string parent_span_key)
382+
TraceManager::Trace::StartSpan(
383+
std::string display_name, const uint64_t& raw_timestamp_ns,
384+
bool is_root_span, std::string parent_span_key)
397385
{
398-
opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span{nullptr};
399386
otel_trace_api::StartSpanOptions options;
400387
options.kind = otel_trace_api::SpanKind::kServer;
401-
options.start_system_time = timestamp_ns;
388+
options.start_system_time =
389+
time_offset_ + std::chrono::nanoseconds{raw_timestamp_ns};
402390
options.start_steady_time =
403391
otel_common::SteadyTimestamp{std::chrono::nanoseconds{raw_timestamp_ns}};
404392

@@ -410,29 +398,32 @@ TraceManager::Trace::InitSpan(
410398
otel_context_.GetValue(parent_span_key));
411399
options.parent = parent_span->GetContext();
412400
}
413-
span = provider_->GetTracer(kTritonTracer)->StartSpan(name, options);
414-
return span;
401+
return provider_->GetTracer(kTritonTracer)->StartSpan(display_name, options);
415402
}
416403

417404
void
418-
TraceManager::Trace::EndSpanNow(
419-
opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span)
405+
TraceManager::Trace::EndSpan(std::string span_key)
420406
{
421-
if (span != nullptr) {
422-
auto timestamp_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
423-
std::chrono::steady_clock::now().time_since_epoch())
424-
.count();
425-
EndSpan(span, timestamp_ns);
426-
}
407+
auto timestamp_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
408+
std::chrono::steady_clock::now().time_since_epoch())
409+
.count();
410+
EndSpan(span_key, timestamp_ns);
427411
}
428412

429413

430414
void
431415
TraceManager::Trace::EndSpan(
432-
opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span,
433-
const uint64_t& raw_timestamp_ns)
416+
std::string span_key, const uint64_t& raw_timestamp_ns)
434417
{
435-
if (span != nullptr) {
418+
if (otel_context_.HasKey(span_key)) {
419+
auto span = opentelemetry::nostd::get<
420+
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
421+
otel_context_.GetValue(span_key));
422+
423+
if (span == nullptr) {
424+
return;
425+
}
426+
436427
otel_trace_api::EndSpanOptions end_options;
437428
end_options.end_steady_time = otel_common::SteadyTimestamp{
438429
std::chrono::nanoseconds{raw_timestamp_ns}};
@@ -453,106 +444,16 @@ TraceManager::Trace::ReportToOpenTelemetry(
453444
LOG_TRITONSERVER_ERROR(
454445
TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");
455446
current_span_key = current_span_key + std::to_string(id);
456-
switch (activity) {
457-
case TRITONSERVER_TRACE_REQUEST_START: {
458-
const char* model_name;
459-
int64_t model_version;
460-
uint64_t parent_id;
461-
const char* request_id;
462-
463-
LOG_TRITONSERVER_ERROR(
464-
TRITONSERVER_InferenceTraceModelName(trace, &model_name),
465-
"getting model name");
466-
LOG_TRITONSERVER_ERROR(
467-
TRITONSERVER_InferenceTraceModelVersion(trace, &model_version),
468-
"getting model version");
469-
LOG_TRITONSERVER_ERROR(
470-
TRITONSERVER_InferenceTraceParentId(trace, &parent_id),
471-
"getting trace parent id");
472-
LOG_TRITONSERVER_ERROR(
473-
TRITONSERVER_InferenceTraceRequestId(trace, &request_id),
474-
"getting request id");
475-
476-
// Currently, only 2 types of sub-spans are supported:
477-
// request span and compute span. Compute span is a leaf span
478-
// and can not be a parent of any sub-span. If parent_id=0,
479-
// then current model is either a standalone model, or an ensemble model.
480-
// In both of the above cases, the parent of the new request sub-span
481-
// is the kRootSpan.
482-
std::string parent_span_key = kRootSpan;
483-
if (parent_id != 0) {
484-
// If parent_id > 0, then this is a child trace, spawned from
485-
// the ensamble's main request. For this instance, the parent
486-
// span is the ensembles's request span.
487-
parent_span_key = kRequestSpan + std::to_string(parent_id);
488-
}
489-
auto request_span = this->InitSpan(
490-
model_name,
491-
this->time_offset_ + std::chrono::nanoseconds{timestamp_ns},
492-
timestamp_ns, false /*is_root_span*/, parent_span_key);
493-
request_span->SetAttribute("triton.model_name", model_name);
494-
request_span->SetAttribute("triton.model_version", model_version);
495-
request_span->SetAttribute("triton.trace_id", id);
496-
request_span->SetAttribute("triton.trace_parent_id", parent_id);
497-
request_span->AddEvent(
498-
TRITONSERVER_InferenceTraceActivityString(activity),
499-
this->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
500-
// Adding span to the OpenTelemetry context, to keep it alive and active
501-
// until we end it
502-
this->otel_context_ =
503-
this->otel_context_.SetValue(current_span_key, request_span);
504-
break;
505-
}
506-
case TRITONSERVER_TRACE_COMPUTE_START: {
507-
// Any compute span is a child of a request span with the same id.
508-
auto compute_span = this->InitSpan(
509-
"compute",
510-
this->time_offset_ + std::chrono::nanoseconds{timestamp_ns},
511-
timestamp_ns, false /*is_root_span*/,
512-
kRequestSpan + std::to_string(id));
513-
compute_span->AddEvent(
514-
TRITONSERVER_InferenceTraceActivityString(activity),
515-
this->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
516-
// Adding span to the OpenTelemetry context, to keep it alive and active
517-
// until we end it
518-
this->otel_context_ =
519-
this->otel_context_.SetValue(current_span_key, compute_span);
520-
break;
521-
}
522-
case TRITONSERVER_TRACE_QUEUE_START:
523-
case TRITONSERVER_TRACE_COMPUTE_INPUT_END:
524-
case TRITONSERVER_TRACE_COMPUTE_OUTPUT_START: {
525-
if (this->otel_context_.HasKey(current_span_key)) {
526-
auto span = opentelemetry::nostd::get<
527-
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
528-
this->otel_context_.GetValue(current_span_key));
529-
span->AddEvent(
530-
TRITONSERVER_InferenceTraceActivityString(activity),
531-
this->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
532-
}
533-
break;
534-
}
535-
case TRITONSERVER_TRACE_COMPUTE_END:
536-
case TRITONSERVER_TRACE_REQUEST_END: {
537-
if (this->otel_context_.HasKey(current_span_key)) {
538-
auto span = opentelemetry::nostd::get<
539-
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
540-
this->otel_context_.GetValue(current_span_key));
541-
span->AddEvent(
542-
TRITONSERVER_InferenceTraceActivityString(activity),
543-
this->time_offset_ + std::chrono::nanoseconds{timestamp_ns});
544-
this->EndSpan(span, timestamp_ns);
545-
}
546-
break;
547-
}
548-
case TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT:
549-
case TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT:
550-
case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT:
551-
default: {
552-
LOG_ERROR << "Unsupported activity: "
553-
<< TRITONSERVER_InferenceTraceActivityString(activity);
554-
break;
555-
}
447+
448+
MaybeStartSpan(current_span_key, trace, activity, timestamp_ns, id);
449+
450+
AddEvent(
451+
current_span_key, TRITONSERVER_InferenceTraceActivityString(activity),
452+
timestamp_ns);
453+
454+
if (activity == TRITONSERVER_TRACE_REQUEST_END ||
455+
activity == TRITONSERVER_TRACE_COMPUTE_END) {
456+
EndSpan(current_span_key, timestamp_ns);
556457
}
557458
}
558459

@@ -590,6 +491,69 @@ TraceManager::Trace::GetSpanNameForActivity(
590491
return span_name;
591492
}
592493

494+
void
495+
TraceManager::Trace::AddEvent(
496+
std::string span_key, std::string event, uint64_t timestamp)
497+
{
498+
if (otel_context_.HasKey(span_key)) {
499+
auto span = opentelemetry::nostd::get<
500+
opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
501+
otel_context_.GetValue(span_key));
502+
span->AddEvent(event, time_offset_ + std::chrono::nanoseconds{timestamp});
503+
}
504+
}
505+
506+
void
507+
TraceManager::Trace::MaybeStartSpan(
508+
std::string span_key, TRITONSERVER_InferenceTrace* trace,
509+
TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
510+
uint64_t id)
511+
{
512+
if (activity != TRITONSERVER_TRACE_REQUEST_START &&
513+
activity != TRITONSERVER_TRACE_COMPUTE_START) {
514+
return;
515+
}
516+
517+
// Currently, only 2 types of sub-spans are supported:
518+
// request span and compute span. Compute span is a leaf span
519+
// and can not be a parent of any sub-span. If parent_id==0,
520+
// then current model is either a standalone model, or an ensemble model.
521+
// In both cases, the parent of the new request sub-span is the kRootSpan.
522+
// If parent_id > 0, then this is a child trace, spawned from
523+
// the ensamble's main request. For this instance, the parent
524+
// span is the ensembles's request span.
525+
uint64_t parent_id;
526+
LOG_TRITONSERVER_ERROR(
527+
TRITONSERVER_InferenceTraceParentId(trace, &parent_id),
528+
"getting trace parent id");
529+
std::string parent_span_key =
530+
(parent_id != 0) ? kRequestSpan + std::to_string(parent_id) : kRootSpan;
531+
532+
std::string display_name = "compute";
533+
const char* model_name;
534+
if (activity == TRITONSERVER_TRACE_REQUEST_START) {
535+
LOG_TRITONSERVER_ERROR(
536+
TRITONSERVER_InferenceTraceModelName(trace, &model_name),
537+
"getting model name");
538+
display_name = model_name;
539+
}
540+
541+
auto span = StartSpan(
542+
display_name, timestamp_ns, false /*is_root_span*/, parent_span_key);
543+
544+
if (activity == TRITONSERVER_TRACE_REQUEST_START) {
545+
int64_t model_version;
546+
LOG_TRITONSERVER_ERROR(
547+
TRITONSERVER_InferenceTraceModelVersion(trace, &model_version),
548+
"getting model version");
549+
span->SetAttribute("triton.model_name", model_name);
550+
span->SetAttribute("triton.model_version", model_version);
551+
span->SetAttribute("triton.trace_id", id);
552+
span->SetAttribute("triton.trace_parent_id", parent_id);
553+
}
554+
555+
otel_context_ = otel_context_.SetValue(span_key, span);
556+
}
593557
#endif
594558

595559
void
@@ -650,17 +614,15 @@ TraceManager::TraceActivity(
650614

651615
std::stringstream* ss = nullptr;
652616
{
653-
if (ts->setting_->mode_ == TRACE_MODE_TRITON) {
654-
if (ts->streams_.find(id) == ts->streams_.end()) {
655-
std::unique_ptr<std::stringstream> stream(new std::stringstream());
656-
ss = stream.get();
657-
ts->streams_.emplace(id, std::move(stream));
658-
} else {
659-
ss = ts->streams_[id].get();
660-
// If the string stream is not newly created, add "," as there is
661-
// already content in the string stream
662-
*ss << ",";
663-
}
617+
if (ts->streams_.find(id) == ts->streams_.end()) {
618+
std::unique_ptr<std::stringstream> stream(new std::stringstream());
619+
ss = stream.get();
620+
ts->streams_.emplace(id, std::move(stream));
621+
} else {
622+
ss = ts->streams_[id].get();
623+
// If the string stream is not newly created, add "," as there is
624+
// already content in the string stream
625+
*ss << ",";
664626
}
665627
}
666628
// If 'activity' is TRITONSERVER_TRACE_REQUEST_START then collect
@@ -684,26 +646,22 @@ TraceManager::TraceActivity(
684646
TRITONSERVER_InferenceTraceRequestId(trace, &request_id),
685647
"getting request id");
686648

687-
if (ts->setting_->mode_ == TRACE_MODE_TRITON) {
688-
*ss << "{\"id\":" << id << ",\"model_name\":\"" << model_name
689-
<< "\",\"model_version\":" << model_version;
649+
*ss << "{\"id\":" << id << ",\"model_name\":\"" << model_name
650+
<< "\",\"model_version\":" << model_version;
690651

691-
if (std::string(request_id) != "") {
692-
*ss << ",\"request_id\":\"" << request_id << "\"";
693-
}
652+
if (std::string(request_id) != "") {
653+
*ss << ",\"request_id\":\"" << request_id << "\"";
654+
}
694655

695-
if (parent_id != 0) {
696-
*ss << ",\"parent_id\":" << parent_id;
697-
}
698-
*ss << "},";
656+
if (parent_id != 0) {
657+
*ss << ",\"parent_id\":" << parent_id;
699658
}
659+
*ss << "},";
700660
}
701661

702-
if (ts->setting_->mode_ == TRACE_MODE_TRITON) {
703-
*ss << "{\"id\":" << id << ",\"timestamps\":["
704-
<< "{\"name\":\"" << TRITONSERVER_InferenceTraceActivityString(activity)
705-
<< "\",\"ns\":" << timestamp_ns << "}]}";
706-
}
662+
*ss << "{\"id\":" << id << ",\"timestamps\":["
663+
<< "{\"name\":\"" << TRITONSERVER_InferenceTraceActivityString(activity)
664+
<< "\",\"ns\":" << timestamp_ns << "}]}";
707665
}
708666

709667
void

0 commit comments

Comments
 (0)