Skip to content

Commit 6950941

Browse files
committed
Adding InferenceTrace object
1 parent fa9be85 commit 6950941

File tree

5 files changed

+29
-13
lines changed

5 files changed

+29
-13
lines changed

src/infer_request.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ InferRequest::InferRequest(
4444
const std::string& model_name, const int64_t model_version,
4545
const std::string& parameters, const uint32_t flags, const int32_t timeout,
4646
const intptr_t response_factory_address, const intptr_t request_address,
47-
const PreferredMemory& preferred_memory, TRITONSERVER_InferenceTrace* trace)
47+
const PreferredMemory& preferred_memory, const InferenceTrace& trace)
4848
: request_id_(request_id), correlation_id_(correlation_id), inputs_(inputs),
4949
requested_output_names_(requested_output_names), model_name_(model_name),
5050
model_version_(model_version), parameters_(parameters), flags_(flags),
@@ -167,7 +167,7 @@ InferRequest::GetPreferredMemory()
167167
return preferred_memory_;
168168
}
169169

170-
TRITONSERVER_InferenceTrace*
170+
InferenceTrace&
171171
InferRequest::Trace()
172172
{
173173
return trace_;

src/infer_request.h

+15-4
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@ namespace triton { namespace backend { namespace python {
4141

4242
class Stub;
4343

44+
//
45+
// Inference Trace
46+
//
47+
struct InferenceTrace {
48+
#ifndef TRITON_PB_STUB
49+
TRITONSERVER_InferenceTrace* triton_trace_;
50+
#else
51+
void* triton_trace_;
52+
#endif
53+
};
54+
4455
//
4556
// Inference Request
4657
//
@@ -55,7 +66,7 @@ struct InferRequestShm {
5566
bool is_decoupled;
5667
int32_t timeout;
5768
PreferredMemory preferred_memory;
58-
TRITONSERVER_InferenceTrace* trace;
69+
InferenceTrace trace;
5970
};
6071

6172
class InferRequest {
@@ -70,7 +81,7 @@ class InferRequest {
7081
const intptr_t request_address = 0,
7182
const PreferredMemory& preferred_memory =
7283
PreferredMemory(PreferredMemory::DEFAULT, 0),
73-
TRITONSERVER_InferenceTrace* trace = nullptr);
84+
const InferenceTrace& trace = {.triton_trace_ = nullptr});
7485

7586
const std::vector<std::shared_ptr<PbTensor>>& Inputs();
7687
const std::string& RequestId();
@@ -86,7 +97,7 @@ class InferRequest {
8697
bool IsDecoupled();
8798
void SetIsDecoupled(const bool is_decoupled);
8899
PreferredMemory& GetPreferredMemory();
89-
TRITONSERVER_InferenceTrace* Trace();
100+
InferenceTrace& Trace();
90101

91102
#ifdef TRITON_PB_STUB
92103
std::shared_ptr<InferResponse> Exec(const bool is_decoupled);
@@ -142,7 +153,7 @@ class InferRequest {
142153
intptr_t request_address_;
143154
bool is_decoupled_;
144155
PreferredMemory preferred_memory_;
145-
TRITONSERVER_InferenceTrace* trace_;
156+
InferenceTrace trace_;
146157

147158
// Shared Memory Data Structures
148159
AllocatedSharedMemory<char> infer_request_shm_;

src/pb_stub.cc

+6-3
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,9 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
13621362
.value("TRITONSERVER_MEMORY_CPU", PreferredMemory::MemoryType::CPU)
13631363
.export_values();
13641364

1365+
py::class_<InferenceTrace, std::shared_ptr<InferenceTrace>>(
1366+
module, "InferenceTrace");
1367+
13651368
py::class_<InferRequest, std::shared_ptr<InferRequest>>(
13661369
module, "InferenceRequest")
13671370
.def(
@@ -1372,12 +1375,11 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
13721375
const int64_t model_version, const uint32_t flags,
13731376
const int32_t timeout,
13741377
const PreferredMemory& preferred_memory,
1375-
std::shared_ptr<InferRequest>& request) {
1378+
const InferenceTrace& trace) {
13761379
std::set<std::string> requested_outputs;
13771380
for (auto& requested_output_name : requested_output_names) {
13781381
requested_outputs.emplace(requested_output_name);
13791382
}
1380-
auto trace = (request != nullptr) ? request->Trace() : nullptr;
13811383
// FIXME: InferenceRequest parameters are not supported in BLS now.
13821384
return std::make_shared<InferRequest>(
13831385
request_id, correlation_id, inputs, requested_outputs,
@@ -1394,7 +1396,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
13941396
py::arg("flags").none(false) = 0, py::arg("timeout").none(false) = 0,
13951397
py::arg("preferred_memory").none(false) =
13961398
PreferredMemory(PreferredMemory::DEFAULT, 0),
1397-
py::arg("request").none(false) = nullptr)
1399+
py::arg("trace").none(false) = nullptr)
13981400
.def(
13991401
"inputs", &InferRequest::Inputs,
14001402
py::return_value_policy::reference_internal)
@@ -1404,6 +1406,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
14041406
.def("set_flags", &InferRequest::SetFlags)
14051407
.def("timeout", &InferRequest::Timeout)
14061408
.def("parameters", &InferRequest::Parameters)
1409+
.def("trace", &InferRequest::Trace)
14071410
.def(
14081411
"exec",
14091412
[](std::shared_ptr<InferRequest>& infer_request,

src/python_be.cc

+4-2
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,10 @@ ModelInstanceState::SaveRequestsToSharedMemory(
364364
uint32_t flags;
365365
RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(request, &flags));
366366

367-
TRITONSERVER_InferenceTrace* trace;
368-
RETURN_IF_ERROR(TRITONBACKEND_RequestTrace(request, &trace));
367+
TRITONSERVER_InferenceTrace* triton_trace;
368+
RETURN_IF_ERROR(TRITONBACKEND_RequestTrace(request, &triton_trace));
369+
370+
InferenceTrace trace = {triton_trace};
369371

370372
std::unique_ptr<InferRequest> infer_request;
371373
if (model_state->IsDecoupled()) {

src/request_executor.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -360,9 +360,9 @@ RequestExecutor::Infer(
360360
irequest, InferRequestComplete, nullptr /* request_release_userp */));
361361

362362
TRITONSERVER_InferenceTrace* trace = nullptr;
363-
if (infer_request->Trace() != nullptr) {
363+
if (infer_request->Trace().triton_trace_ != nullptr) {
364364
THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceTraceSpawnChildTrace(
365-
infer_request->Trace(), &trace));
365+
infer_request->Trace().triton_trace_, &trace));
366366
}
367367

368368
for (auto& infer_input : infer_request->Inputs()) {

0 commit comments

Comments
 (0)