Properly remove first 4 bytes of BYTES data (#550)

tgerdesnv · mc-nv · commit 77ae1112d878 · 2024-03-27T16:57:18.000-07:00
* Add plumbing to get output datatype

* ignore first 4 bytes of BYTES data for gathering output

* Update py code to no longer remove leading bytes

* remove workaround in tests

* Fix order of args

* more fixes

* handle case where bytes response is empty
diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc
@@ -279,9 +279,9 @@ InferInput::GetNext(
 Error
 InferRequestedOutput::Create(
     InferRequestedOutput** infer_output, const std::string& name,
-    const size_t class_count)
+    const size_t class_count, const std::string& datatype)
 {
-  *infer_output = new InferRequestedOutput(name, class_count);
+  *infer_output = new InferRequestedOutput(name, datatype, class_count);
   return Error::Success;
 }
 
@@ -309,8 +309,10 @@ InferRequestedOutput::UnsetSharedMemory()
 }
 
 InferRequestedOutput::InferRequestedOutput(
-    const std::string& name, const size_t class_count)
-    : name_(name), class_count_(class_count), io_type_(NONE)
+    const std::string& name, const std::string& datatype,
+    const size_t class_count)
+    : name_(name), datatype_(datatype), class_count_(class_count),
+      io_type_(NONE)
 {
 }
 
diff --git a/src/c++/library/common.h b/src/c++/library/common.h
@@ -400,7 +400,7 @@ class InferRequestedOutput {
   /// \return Error object indicating success or failure.
   static Error Create(
       InferRequestedOutput** infer_output, const std::string& name,
-      const size_t class_count = 0);
+      const size_t class_count = 0, const std::string& datatype = "");
 
   /// Gets name of the associated output tensor.
   /// \return The name of the tensor.
@@ -455,9 +455,11 @@ class InferRequestedOutput {
 #endif
 
   explicit InferRequestedOutput(
-      const std::string& name, const size_t class_count = 0);
+      const std::string& name, const std::string& datatype,
+      const size_t class_count = 0);
 
   std::string name_;
+  std::string datatype_;
   size_t class_count_;
 
   // Used only if working with Shared Memory
diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc
@@ -519,16 +519,17 @@ InferInput::InferInput(
 Error
 InferRequestedOutput::Create(
     InferRequestedOutput** infer_output, const BackendKind kind,
-    const std::string& name, const size_t class_count)
+    const std::string& name, const std::string& datatype,
+    const size_t class_count)
 {
   if (kind == TRITON) {
     RETURN_IF_CB_ERROR(tritonremote::TritonInferRequestedOutput::Create(
-        infer_output, name, class_count));
+        infer_output, name, class_count, datatype));
   }
 #ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
   else if (kind == OPENAI) {
-    RETURN_IF_CB_ERROR(
-        openai::OpenAiInferRequestedOutput::Create(infer_output, name));
+    RETURN_IF_CB_ERROR(openai::OpenAiInferRequestedOutput::Create(
+        infer_output, name, datatype));
   }
 #endif  // TRITON_ENABLE_PERF_ANALYZER_OPENAI
 #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
@@ -540,7 +541,7 @@ InferRequestedOutput::Create(
 #ifdef TRITON_ENABLE_PERF_ANALYZER_C_API
   else if (kind == TRITON_C_API) {
     RETURN_IF_CB_ERROR(tritoncapi::TritonCApiInferRequestedOutput::Create(
-        infer_output, name, class_count));
+        infer_output, name, class_count, datatype));
   }
 #endif  // TRITON_ENABLE_PERF_ANALYZER_C_API
   else {
@@ -564,8 +565,9 @@ InferRequestedOutput::SetSharedMemory(
 }
 
 InferRequestedOutput::InferRequestedOutput(
-    const BackendKind kind, const std::string& name)
-    : kind_(kind), name_(name)
+    const BackendKind kind, const std::string& name,
+    const std::string& datatype)
+    : kind_(kind), name_(name), datatype_(datatype)
 {
 }
 
diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h
@@ -581,18 +581,24 @@ class InferRequestedOutput {
   /// \param infer_output Returns a new InferOutputGrpc object.
   /// \param kind The kind of the associated client backend.
   /// \param name The name of output being requested.
+  /// \param datatype The datatype of the output
   /// \param class_count The number of classifications to be requested. The
   /// default value is 0 which means the classification results are not
   /// requested.
   /// \return Error object indicating success or failure.
   static Error Create(
       InferRequestedOutput** infer_output, const BackendKind kind,
-      const std::string& name, const size_t class_count = 0);
+      const std::string& name, const std::string& datatype,
+      const size_t class_count = 0);
 
   /// Gets name of the associated output tensor.
   /// \return The name of the tensor.
   const std::string& Name() const { return name_; }
 
+  /// Gets datatype of the associated output tensor.
+  /// \return The datatype of the tensor
+  const std::string& Datatype() const { return datatype_; }
+
   /// Set the output tensor data to be written to specified shared
   /// memory region.
   /// \param region_name The name of the shared memory region.
@@ -605,9 +611,12 @@ class InferRequestedOutput {
       const size_t offset = 0);
 
  protected:
-  InferRequestedOutput(const BackendKind kind, const std::string& name);
+  InferRequestedOutput(
+      const BackendKind kind, const std::string& name,
+      const std::string& datatype = "");
   const BackendKind kind_;
   const std::string name_;
+  const std::string datatype_;
 };
 
 //
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc
@@ -83,23 +83,25 @@ OpenAiClientBackend::ClientInferStat(InferStat* infer_stat)
 
 Error
 OpenAiInferRequestedOutput::Create(
-    InferRequestedOutput** infer_output, const std::string& name)
+    InferRequestedOutput** infer_output, const std::string& name,
+    const std::string& datatype)
 {
   OpenAiInferRequestedOutput* local_infer_output =
-      new OpenAiInferRequestedOutput(name);
+      new OpenAiInferRequestedOutput(name, datatype);
 
   tc::InferRequestedOutput* openai_infer_output;
-  RETURN_IF_TRITON_ERROR(
-      tc::InferRequestedOutput::Create(&openai_infer_output, name));
+  RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(
+      &openai_infer_output, name, 0, datatype));
   local_infer_output->output_.reset(openai_infer_output);
 
   *infer_output = local_infer_output;
 
   return Error::Success;
 }
 
-OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(const std::string& name)
-    : InferRequestedOutput(BackendKind::OPENAI, name)
+OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(
+    const std::string& name, const std::string& datatype)
+    : InferRequestedOutput(BackendKind::OPENAI, name, datatype)
 {
 }
 
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h
@@ -95,13 +95,15 @@ class OpenAiClientBackend : public ClientBackend {
 class OpenAiInferRequestedOutput : public InferRequestedOutput {
  public:
   static Error Create(
-      InferRequestedOutput** infer_output, const std::string& name);
+      InferRequestedOutput** infer_output, const std::string& name,
+      const std::string& datatype);
   /// Returns the raw InferRequestedOutput object required by OpenAi client
   /// library.
   tc::InferRequestedOutput* Get() const { return output_.get(); }
 
  private:
-  explicit OpenAiInferRequestedOutput(const std::string& name);
+  explicit OpenAiInferRequestedOutput(
+      const std::string& name, const std::string& datatype);
 
   std::unique_ptr<tc::InferRequestedOutput> output_;
 };
diff --git a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc
@@ -768,14 +768,14 @@ TritonInferInput::TritonInferInput(
 Error
 TritonInferRequestedOutput::Create(
     InferRequestedOutput** infer_output, const std::string& name,
-    const size_t class_count)
+    const size_t class_count, const std::string& datatype)
 {
   TritonInferRequestedOutput* local_infer_output =
-      new TritonInferRequestedOutput(name);
+      new TritonInferRequestedOutput(name, datatype);
 
   tc::InferRequestedOutput* triton_infer_output;
   RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(
-      &triton_infer_output, name, class_count));
+      &triton_infer_output, name, class_count, datatype));
   local_infer_output->output_.reset(triton_infer_output);
 
   *infer_output = local_infer_output;
@@ -793,8 +793,9 @@ TritonInferRequestedOutput::SetSharedMemory(
 }
 
 
-TritonInferRequestedOutput::TritonInferRequestedOutput(const std::string& name)
-    : InferRequestedOutput(BackendKind::TRITON, name)
+TritonInferRequestedOutput::TritonInferRequestedOutput(
+    const std::string& name, const std::string& datatype)
+    : InferRequestedOutput(BackendKind::TRITON, name, datatype)
 {
 }
 
diff --git a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h
@@ -299,7 +299,7 @@ class TritonInferRequestedOutput : public InferRequestedOutput {
  public:
   static Error Create(
       InferRequestedOutput** infer_output, const std::string& name,
-      const size_t class_count = 0);
+      const size_t class_count = 0, const std::string& datatype = "");
   /// Returns the raw InferRequestedOutput object required by triton client
   /// library.
   tc::InferRequestedOutput* Get() const { return output_.get(); }
@@ -309,7 +309,8 @@ class TritonInferRequestedOutput : public InferRequestedOutput {
       const size_t offset = 0) override;
 
  private:
-  explicit TritonInferRequestedOutput(const std::string& name);
+  explicit TritonInferRequestedOutput(
+      const std::string& name, const std::string& datatype);
 
   std::unique_ptr<tc::InferRequestedOutput> output_;
 };
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.cc b/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.cc
@@ -335,14 +335,14 @@ TritonCApiInferInput::TritonCApiInferInput(
 Error
 TritonCApiInferRequestedOutput::Create(
     InferRequestedOutput** infer_output, const std::string& name,
-    const size_t class_count)
+    const size_t class_count, const std::string& datatype)
 {
   TritonCApiInferRequestedOutput* local_infer_output =
       new TritonCApiInferRequestedOutput(name);
 
   tc::InferRequestedOutput* triton_infer_output;
   RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(
-      &triton_infer_output, name, class_count));
+      &triton_infer_output, name, class_count, datatype));
   local_infer_output->output_.reset(triton_infer_output);
 
   *infer_output = local_infer_output;
diff --git a/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.h b/src/c++/perf_analyzer/client_backend/triton_c_api/triton_c_api_backend.h
@@ -186,7 +186,7 @@ class TritonCApiInferRequestedOutput : public InferRequestedOutput {
  public:
   static Error Create(
       InferRequestedOutput** infer_output, const std::string& name,
-      const size_t class_count = 0);
+      const size_t class_count = 0, const std::string& datatype = "");
   /// Returns the raw InferRequestedOutput object required by triton client
   /// library.
   tc::InferRequestedOutput* Get() const { return output_.get(); }
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py
@@ -460,28 +460,11 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:
             num_generated_tokens,
         )
 
-    def _remove_leading_invalid_chars(self, text: str):
-        if len(text) < 4:
-            return text
-
-        for i, char in enumerate(text):
-            # There will be 3 or 4 chars
-            # (but sometimes the first char looks valid, so don't stop until we've seen at least 3)
-            if char.isprintable() and i > 2:
-                break
-
-        return text[i:]
-
     def _preprocess_response(
         self, res_timestamps: list[int], res_outputs: list[dict[str, str]]
     ) -> None:
         """Helper function to preprocess responses of a request."""
-        # FIXME -- remove this triton code once it is properly fixed in PA
-        # (PA/triton will add junk to the start of the BYTES array. Remove it here)
-        if self._service_kind == "triton":
-            for d in res_outputs:
-                d["text_output"] = self._remove_leading_invalid_chars(d["text_output"])
-        elif self._service_kind == "openai":
+        if self._service_kind == "openai":
             # remove the null final response in streaming mode
             last_response = res_outputs[-1]["response"]
             last_response = remove_sse_prefix(last_response)
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py
@@ -385,23 +385,19 @@ def test_llm_metrics_get_base_name(self) -> None:
                     {
                         "timestamp": 1,
                         "response_timestamps": [3, 5, 8],
-                        # FIXME - remove the whitespace once PA handles it.
-                        # LLMProfileDataParser preprocessse the responses
-                        # from triton server and removes first few chars.
-                        # Add whitespace to avoid valid chars being removed.
                         "response_outputs": [
-                            {"text_output": "   dogs"},
-                            {"text_output": "   are"},
-                            {"text_output": "   cool"},
+                            {"text_output": "dogs"},
+                            {"text_output": "are"},
+                            {"text_output": "cool"},
                         ],
                     },
                     {
                         "timestamp": 2,
                         "response_timestamps": [4, 7, 11],
                         "response_outputs": [
-                            {"text_output": "   I"},
-                            {"text_output": "   don't"},
-                            {"text_output": "   cook food"},
+                            {"text_output": "I"},
+                            {"text_output": "don't"},
+                            {"text_output": "cook food"},
                         ],
                     },
                 ],
@@ -416,19 +412,19 @@ def test_llm_metrics_get_base_name(self) -> None:
                         "timestamp": 5,
                         "response_timestamps": [7, 8, 13, 18],
                         "response_outputs": [
-                            {"text_output": "   cats"},
-                            {"text_output": "   are"},
-                            {"text_output": "   cool"},
-                            {"text_output": "   too"},
+                            {"text_output": "cats"},
+                            {"text_output": "are"},
+                            {"text_output": "cool"},
+                            {"text_output": "too"},
                         ],
                     },
                     {
                         "timestamp": 3,
                         "response_timestamps": [6, 8, 11],
                         "response_outputs": [
-                            {"text_output": "   it's"},
-                            {"text_output": "   very"},
-                            {"text_output": "   simple work"},
+                            {"text_output": "it's"},
+                            {"text_output": "very"},
+                            {"text_output": "simple work"},
                         ],
                     },
                 ],
diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc
@@ -188,6 +188,14 @@ InferContext::GetOutput(const cb::InferResult& infer_result)
     const uint8_t* buf{nullptr};
     size_t byte_size{0};
     infer_result.RawData(requested_output->Name(), &buf, &byte_size);
+
+    // The first 4 bytes of BYTES data is a 32-bit integer to indicate the size
+    // of the rest of the data (which we already know based on byte_size). It
+    // should be ignored here, as it isn't part of the actual response
+    if (requested_output->Datatype() == "BYTES" && byte_size >= 4) {
+      buf += 4;
+      byte_size -= 4;
+    }
     output.emplace(requested_output->Name(), ResponseData(buf, byte_size));
   }
   return output;
diff --git a/src/c++/perf_analyzer/infer_data_manager.cc b/src/c++/perf_analyzer/infer_data_manager.cc
@@ -175,11 +175,12 @@ InferDataManager::InitInferDataInput(
 
 cb::Error
 InferDataManager::InitInferDataOutput(
-    const std::string& name, InferData& infer_data)
+    const std::string& name, const ModelTensor& model_tensor,
+    InferData& infer_data)
 {
   cb::InferRequestedOutput* requested_output;
-  RETURN_IF_ERROR(
-      cb::InferRequestedOutput::Create(&requested_output, backend_kind_, name));
+  RETURN_IF_ERROR(cb::InferRequestedOutput::Create(
+      &requested_output, backend_kind_, name, model_tensor.datatype_));
   infer_data.outputs_.push_back(requested_output);
 
   return cb::Error::Success;
diff --git a/src/c++/perf_analyzer/infer_data_manager.h b/src/c++/perf_analyzer/infer_data_manager.h
@@ -74,7 +74,8 @@ class InferDataManager : public InferDataManagerBase {
       InferData& infer_data) override;
 
   cb::Error InitInferDataOutput(
-      const std::string& name, InferData& infer_data) override;
+      const std::string& name, const ModelTensor& model_tensor,
+      InferData& infer_data) override;
 
   /// Helper function to update the inputs
   /// \param thread_id The ID of the calling thread
diff --git a/src/c++/perf_analyzer/infer_data_manager_base.cc b/src/c++/perf_analyzer/infer_data_manager_base.cc
@@ -115,7 +115,8 @@ InferDataManagerBase::InitInferData(InferData& infer_data)
   }
 
   for (const auto& output : *(parser_->Outputs())) {
-    RETURN_IF_ERROR(InitInferDataOutput(output.first, infer_data));
+    RETURN_IF_ERROR(
+        InitInferDataOutput(output.first, output.second, infer_data));
   }
 
   return cb::Error::Success;
diff --git a/src/c++/perf_analyzer/infer_data_manager_base.h b/src/c++/perf_analyzer/infer_data_manager_base.h
diff --git a/src/c++/perf_analyzer/infer_data_manager_shm.cc b/src/c++/perf_analyzer/infer_data_manager_shm.cc
diff --git a/src/c++/perf_analyzer/infer_data_manager_shm.h b/src/c++/perf_analyzer/infer_data_manager_shm.h

Original file line number	Diff line number	Diff line change
`@@ -279,9 +279,9 @@ InferInput::GetNext(`
`279`	`279`	`Error`
`280`	`280`	`InferRequestedOutput::Create(`
`281`	`281`	`InferRequestedOutput** infer_output, const std::string& name,`
`282`		`- const size_t class_count)`
	`282`	`+ const size_t class_count, const std::string& datatype)`
`283`	`283`	`{`
`284`		`- *infer_output = new InferRequestedOutput(name, class_count);`
	`284`	`+ *infer_output = new InferRequestedOutput(name, datatype, class_count);`
`285`	`285`	`return Error::Success;`
`286`	`286`	`}`
`287`	`287`
`@@ -309,8 +309,10 @@ InferRequestedOutput::UnsetSharedMemory()`
`309`	`309`	`}`
`310`	`310`
`311`	`311`	`InferRequestedOutput::InferRequestedOutput(`
`312`		`- const std::string& name, const size_t class_count)`
`313`		`- : name_(name), class_count_(class_count), io_type_(NONE)`
	`312`	`+ const std::string& name, const std::string& datatype,`
	`313`	`+ const size_t class_count)`
	`314`	`+ : name_(name), datatype_(datatype), class_count_(class_count),`
	`315`	`+ io_type_(NONE)`
`314`	`316`	`{`
`315`	`317`	`}`
`316`	`318`
Original file line number	Diff line number	Diff line change
`@@ -519,16 +519,17 @@ InferInput::InferInput(`
`519`	`519`	`Error`
`520`	`520`	`InferRequestedOutput::Create(`
`521`	`521`	`InferRequestedOutput** infer_output, const BackendKind kind,`
`522`		`- const std::string& name, const size_t class_count)`
	`522`	`+ const std::string& name, const std::string& datatype,`
	`523`	`+ const size_t class_count)`
`523`	`524`	`{`
`524`	`525`	`if (kind == TRITON) {`
`525`	`526`	`RETURN_IF_CB_ERROR(tritonremote::TritonInferRequestedOutput::Create(`
`526`		`- infer_output, name, class_count));`
	`527`	`+ infer_output, name, class_count, datatype));`
`527`	`528`	`}`
`528`	`529`	`#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI`
`529`	`530`	`else if (kind == OPENAI) {`
`530`		`- RETURN_IF_CB_ERROR(`
`531`		`- openai::OpenAiInferRequestedOutput::Create(infer_output, name));`
	`531`	`+ RETURN_IF_CB_ERROR(openai::OpenAiInferRequestedOutput::Create(`
	`532`	`+ infer_output, name, datatype));`
`532`	`533`	`}`
`533`	`534`	`#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI`
`534`	`535`	`#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS`
`@@ -540,7 +541,7 @@ InferRequestedOutput::Create(`
`540`	`541`	`#ifdef TRITON_ENABLE_PERF_ANALYZER_C_API`
`541`	`542`	`else if (kind == TRITON_C_API) {`
`542`	`543`	`RETURN_IF_CB_ERROR(tritoncapi::TritonCApiInferRequestedOutput::Create(`
`543`		`- infer_output, name, class_count));`
	`544`	`+ infer_output, name, class_count, datatype));`
`544`	`545`	`}`
`545`	`546`	`#endif // TRITON_ENABLE_PERF_ANALYZER_C_API`
`546`	`547`	`else {`
`@@ -564,8 +565,9 @@ InferRequestedOutput::SetSharedMemory(`
`564`	`565`	`}`
`565`	`566`
`566`	`567`	`InferRequestedOutput::InferRequestedOutput(`
`567`		`- const BackendKind kind, const std::string& name)`
`568`		`- : kind_(kind), name_(name)`
	`568`	`+ const BackendKind kind, const std::string& name,`
	`569`	`+ const std::string& datatype)`
	`570`	`+ : kind_(kind), name_(name), datatype_(datatype)`
`569`	`571`	`{`
`570`	`572`	`}`
`571`	`573`
Original file line number	Diff line number	Diff line change
`@@ -768,14 +768,14 @@ TritonInferInput::TritonInferInput(`
`768`	`768`	`Error`
`769`	`769`	`TritonInferRequestedOutput::Create(`
`770`	`770`	`InferRequestedOutput** infer_output, const std::string& name,`
`771`		`- const size_t class_count)`
	`771`	`+ const size_t class_count, const std::string& datatype)`
`772`	`772`	`{`
`773`	`773`	`TritonInferRequestedOutput* local_infer_output =`
`774`		`- new TritonInferRequestedOutput(name);`
	`774`	`+ new TritonInferRequestedOutput(name, datatype);`
`775`	`775`
`776`	`776`	`tc::InferRequestedOutput* triton_infer_output;`
`777`	`777`	`RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(`
`778`		`- &triton_infer_output, name, class_count));`
	`778`	`+ &triton_infer_output, name, class_count, datatype));`
`779`	`779`	`local_infer_output->output_.reset(triton_infer_output);`
`780`	`780`
`781`	`781`	`*infer_output = local_infer_output;`
`@@ -793,8 +793,9 @@ TritonInferRequestedOutput::SetSharedMemory(`
`793`	`793`	`}`
`794`	`794`
`795`	`795`
`796`		`-TritonInferRequestedOutput::TritonInferRequestedOutput(const std::string& name)`
`797`		`- : InferRequestedOutput(BackendKind::TRITON, name)`
	`796`	`+TritonInferRequestedOutput::TritonInferRequestedOutput(`
	`797`	`+ const std::string& name, const std::string& datatype)`
	`798`	`+ : InferRequestedOutput(BackendKind::TRITON, name, datatype)`
`798`	`799`	`{`
`799`	`800`	`}`
`800`	`801`
Original file line number	Diff line number	Diff line change
`@@ -115,7 +115,8 @@ InferDataManagerBase::InitInferData(InferData& infer_data)`
`115`	`115`	`}`
`116`	`116`
`117`	`117`	`for (const auto& output : *(parser_->Outputs())) {`
`118`		`- RETURN_IF_ERROR(InitInferDataOutput(output.first, infer_data));`
	`118`	`+ RETURN_IF_ERROR(`
	`119`	`+ InitInferDataOutput(output.first, output.second, infer_data));`
`119`	`120`	`}`
`120`	`121`
`121`	`122`	`return cb::Error::Success;`