@@ -885,7 +885,11 @@ ModelState::AutoCompleteIO(const char* key, const OnnxTensorInfoMap& io_infos)
885
885
triton::common::TritonJson::Value reshape_dims (
886
886
ModelConfig (), triton::common::TritonJson::ValueType::ARRAY);
887
887
RETURN_IF_ERROR (reshape.Add (" shape" , std::move (reshape_dims)));
888
- RETURN_IF_ERROR (io.Add (" reshape" , std::move (reshape)));
888
+ // Empty reshape with `max_batch_size` indicates a scalar tensor in the
889
+ // model configuration which is not a valid model configuration.
890
+ if (MaxBatchSize () > 0 ) {
891
+ RETURN_IF_ERROR (io.Add (" reshape" , std::move (reshape)));
892
+ }
889
893
}
890
894
RETURN_IF_ERROR (io.Add (" dims" , std::move (dims)));
891
895
RETURN_IF_ERROR (ios.Append (std::move (io)));
@@ -998,6 +1002,12 @@ class ModelInstanceState : public BackendModelInstance {
998
1002
// map of output name -> tensor info
999
1003
OnnxTensorInfoMap output_tensor_infos_;
1000
1004
1005
+ // map of input name -> tensor info
1006
+ OnnxTensorInfoMap input_tensor_infos_;
1007
+
1008
+ // A map from scalar output tensors to the dimension specified in model config
1009
+ std::unordered_map<std::string, std::vector<int64_t >> scalar_outputs_;
1010
+
1001
1011
// Onnx Runtime variables that will be reset and used for every run
1002
1012
// on this instance.
1003
1013
std::vector<OrtValue*> input_tensors_;
@@ -1313,9 +1323,8 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1313
1323
{
1314
1324
std::set<std::string> input_tensor_names;
1315
1325
RETURN_IF_ERROR (InputNames (session_, input_tensor_names));
1316
-
1317
- OnnxTensorInfoMap input_tensor_infos;
1318
- RETURN_IF_ERROR (InputInfos (session_, default_allocator_, input_tensor_infos));
1326
+ RETURN_IF_ERROR (
1327
+ InputInfos (session_, default_allocator_, input_tensor_infos_));
1319
1328
1320
1329
std::set<std::string> overridable_initializer_tensor_names;
1321
1330
RETURN_IF_ERROR (OverridableInitializerNames (
@@ -1325,12 +1334,13 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1325
1334
RETURN_IF_ERROR (OverridableInitializerInfos (
1326
1335
session_, default_allocator_, overridable_initializer_tensor_infos));
1327
1336
1328
- if (input_tensor_infos .size () != expected_input_cnt) {
1337
+ if (input_tensor_infos_ .size () != expected_input_cnt) {
1329
1338
return TRITONSERVER_ErrorNew (
1330
1339
TRITONSERVER_ERROR_INVALID_ARG,
1331
1340
(std::string (" unable to load model '" ) + model_state_->Name () +
1332
1341
" ', configuration expects " + std::to_string (expected_input_cnt) +
1333
- " inputs, model provides " + std::to_string (input_tensor_infos.size ()))
1342
+ " inputs, model provides " +
1343
+ std::to_string (input_tensor_infos_.size ()))
1334
1344
.c_str ());
1335
1345
}
1336
1346
@@ -1357,8 +1367,9 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1357
1367
1358
1368
const auto & tensor_names =
1359
1369
io_optional ? overridable_initializer_tensor_names : input_tensor_names;
1360
- const auto & tensor_infos =
1361
- io_optional ? overridable_initializer_tensor_infos : input_tensor_infos;
1370
+ const auto & tensor_infos = io_optional
1371
+ ? overridable_initializer_tensor_infos
1372
+ : input_tensor_infos_;
1362
1373
auto iit = tensor_infos.find (io_name);
1363
1374
if (iit == tensor_infos.end ()) {
1364
1375
RETURN_IF_ERROR (CheckAllowedModelInput (io, tensor_names));
@@ -1419,9 +1430,28 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1419
1430
.c_str ());
1420
1431
}
1421
1432
} else {
1422
- RETURN_IF_ERROR (CompareDimsSupported (
1423
- model_state_->Name (), io_name, iit->second .dims_ , dims,
1424
- model_state_->MaxBatchSize (), false /* compare_exact */ ));
1433
+ // Only compare the dimensions if the tensor is not scalar
1434
+ if (iit->second .dims_ .size () != 0 ) {
1435
+ RETURN_IF_ERROR (CompareDimsSupported (
1436
+ model_state_->Name (), io_name, iit->second .dims_ , dims,
1437
+ model_state_->MaxBatchSize (), false /* compare_exact */ ));
1438
+ } else {
1439
+ // if max_batch_size == 0 and is a scalar tensor all the
1440
+ // dimensions specified must be equal to 1
1441
+ for (auto & dim : dims) {
1442
+ if (dim != 1 ) {
1443
+ return TRITONSERVER_ErrorNew (
1444
+ TRITONSERVER_ERROR_INVALID_ARG,
1445
+ (std::string (" unable to load model '" ) + model_state_->Name () +
1446
+ " ', scalar tensor '" + io_name +
1447
+ " ', should only provide 1 in the model configuration when the "
1448
+ " model doesn't support batching. Model configuration "
1449
+ " provided: " +
1450
+ ShapeToString (dims) + " ." )
1451
+ .c_str ());
1452
+ }
1453
+ }
1454
+ }
1425
1455
}
1426
1456
}
1427
1457
@@ -1482,9 +1512,29 @@ ModelInstanceState::ValidateOutputs()
1482
1512
1483
1513
// The batch output shape doesn't necessarily match the model
1484
1514
if (model_state_->FindBatchOutput (io_name) == nullptr ) {
1485
- RETURN_IF_ERROR (CompareDimsSupported (
1486
- model_state_->Name (), io_name, iit->second .dims_ , dims,
1487
- model_state_->MaxBatchSize (), true /* compare_exact */ ));
1515
+ // Only compare the dimensions if the tensor is not scalar
1516
+ if (iit->second .dims_ .size () != 0 ) {
1517
+ RETURN_IF_ERROR (CompareDimsSupported (
1518
+ model_state_->Name (), io_name, iit->second .dims_ , dims,
1519
+ model_state_->MaxBatchSize (), true /* compare_exact */ ));
1520
+ } else {
1521
+ for (auto & dim : dims) {
1522
+ if (dim != 1 ) {
1523
+ return TRITONSERVER_ErrorNew (
1524
+ TRITONSERVER_ERROR_INVALID_ARG,
1525
+ (std::string (" unable to load model '" ) + model_state_->Name () +
1526
+ " ', scalar tensor '" + io_name +
1527
+ " ', should only provide 1 in the model configuration when the "
1528
+ " model doesn't support batching. Model configuration "
1529
+ " provided: " +
1530
+ ShapeToString (dims) + " ." )
1531
+ .c_str ());
1532
+ }
1533
+ }
1534
+
1535
+ // store the dimension for reference.
1536
+ scalar_outputs_[io_name] = dims;
1537
+ }
1488
1538
}
1489
1539
}
1490
1540
@@ -1900,13 +1950,34 @@ ModelInstanceState::SetInputTensors(
1900
1950
input_name, nullptr , 0 , allowed_input_types, &input_buffer,
1901
1951
&batchn_byte_size, &memory_type, &memory_type_id));
1902
1952
1953
+ auto iti = input_tensor_infos_.find (input_name);
1954
+ if (iti == input_tensor_infos_.end ()) {
1955
+ return TRITONSERVER_ErrorNew (
1956
+ TRITONSERVER_ERROR_INTERNAL,
1957
+ std::string (
1958
+ std::string (
1959
+ " Failed to retrieve the ONNX input tensor info from '" ) +
1960
+ input_name + " '." )
1961
+ .c_str ());
1962
+ }
1963
+
1903
1964
// Create ORT Tensor
1904
- RETURN_IF_ORT_ERROR (ort_api->CreateTensorWithDataAsOrtValue (
1905
- memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_
1906
- : cpu_allocator_info_,
1907
- (void *)input_buffer, batchn_byte_size, batchn_shape.data (),
1908
- batchn_shape.size (), ConvertToOnnxDataType (input_datatype),
1909
- &input_tensors_.back ()));
1965
+ if (iti->second .dims_ .size () == 0 ) {
1966
+ // scalar tensor
1967
+ RETURN_IF_ORT_ERROR (ort_api->CreateTensorWithDataAsOrtValue (
1968
+ memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_
1969
+ : cpu_allocator_info_,
1970
+ (void *)input_buffer, batchn_byte_size, nullptr /* scalar */ ,
1971
+ 0 /* number of dims */ , ConvertToOnnxDataType (input_datatype),
1972
+ &input_tensors_.back ()));
1973
+ } else {
1974
+ RETURN_IF_ORT_ERROR (ort_api->CreateTensorWithDataAsOrtValue (
1975
+ memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_
1976
+ : cpu_allocator_info_,
1977
+ (void *)input_buffer, batchn_byte_size, batchn_shape.data (),
1978
+ batchn_shape.size (), ConvertToOnnxDataType (input_datatype),
1979
+ &input_tensors_.back ()));
1980
+ }
1910
1981
RETURN_IF_ORT_ERROR (
1911
1982
ort_api->BindInput (io_binding_, input_name, input_tensors_.back ()));
1912
1983
} else {
@@ -2283,6 +2354,22 @@ ModelInstanceState::ReadOutputTensors(
2283
2354
batchn_shape, dtype, output_tensor, &output_buffer, string_buffers,
2284
2355
offsets));
2285
2356
2357
+ // If the number of dimensions is equal to zero, it means that it is a
2358
+ // scalar and it would use the dimensions specified in the model
2359
+ // configuration.
2360
+ if (batchn_shape.size () == 0 ) {
2361
+ auto scalar_output_dims_it = scalar_outputs_.find (name);
2362
+ if (scalar_output_dims_it == scalar_outputs_.end ()) {
2363
+ return TRITONSERVER_ErrorNew (
2364
+ TRITONSERVER_ERROR_INTERNAL,
2365
+ std::string (
2366
+ " Failed to find the scalar output dimension for " + name +
2367
+ " in the model configuration." )
2368
+ .c_str ());
2369
+ }
2370
+ batchn_shape = scalar_output_dims_it->second ;
2371
+ }
2372
+
2286
2373
if (output_tensor_pair.first != -1 ) {
2287
2374
if (dtype == TRITONSERVER_TYPE_BYTES) {
2288
2375
auto content = string_buffers.back ().data ();
0 commit comments