@@ -885,7 +885,9 @@ ModelState::AutoCompleteIO(const char* key, const OnnxTensorInfoMap& io_infos)
885
885
triton::common::TritonJson::Value reshape_dims (
886
886
ModelConfig (), triton::common::TritonJson::ValueType::ARRAY);
887
887
RETURN_IF_ERROR (reshape.Add (" shape" , std::move (reshape_dims)));
888
- RETURN_IF_ERROR (io.Add (" reshape" , std::move (reshape)));
888
+ if (MaxBatchSize () > 0 ) {
889
+ RETURN_IF_ERROR (io.Add (" reshape" , std::move (reshape)));
890
+ }
889
891
}
890
892
RETURN_IF_ERROR (io.Add (" dims" , std::move (dims)));
891
893
RETURN_IF_ERROR (ios.Append (std::move (io)));
@@ -998,6 +1000,12 @@ class ModelInstanceState : public BackendModelInstance {
998
1000
// map of output name -> tensor info
999
1001
OnnxTensorInfoMap output_tensor_infos_;
1000
1002
1003
+ // map of input name -> tensor info
1004
+ OnnxTensorInfoMap input_tensor_infos_;
1005
+
1006
+ // A map from scalar output tensors to the dimension specified in model config
1007
+ std::unordered_map<std::string, std::vector<int64_t >> scalar_outputs_;
1008
+
1001
1009
// Onnx Runtime variables that will be reset and used for every run
1002
1010
// on this instance.
1003
1011
std::vector<OrtValue*> input_tensors_;
@@ -1313,9 +1321,8 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1313
1321
{
1314
1322
std::set<std::string> input_tensor_names;
1315
1323
RETURN_IF_ERROR (InputNames (session_, input_tensor_names));
1316
-
1317
- OnnxTensorInfoMap input_tensor_infos;
1318
- RETURN_IF_ERROR (InputInfos (session_, default_allocator_, input_tensor_infos));
1324
+ RETURN_IF_ERROR (
1325
+ InputInfos (session_, default_allocator_, input_tensor_infos_));
1319
1326
1320
1327
std::set<std::string> overridable_initializer_tensor_names;
1321
1328
RETURN_IF_ERROR (OverridableInitializerNames (
@@ -1325,12 +1332,13 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1325
1332
RETURN_IF_ERROR (OverridableInitializerInfos (
1326
1333
session_, default_allocator_, overridable_initializer_tensor_infos));
1327
1334
1328
- if (input_tensor_infos .size () != expected_input_cnt) {
1335
+ if (input_tensor_infos_ .size () != expected_input_cnt) {
1329
1336
return TRITONSERVER_ErrorNew (
1330
1337
TRITONSERVER_ERROR_INVALID_ARG,
1331
1338
(std::string (" unable to load model '" ) + model_state_->Name () +
1332
1339
" ', configuration expects " + std::to_string (expected_input_cnt) +
1333
- " inputs, model provides " + std::to_string (input_tensor_infos.size ()))
1340
+ " inputs, model provides " +
1341
+ std::to_string (input_tensor_infos_.size ()))
1334
1342
.c_str ());
1335
1343
}
1336
1344
@@ -1357,8 +1365,9 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1357
1365
1358
1366
const auto & tensor_names =
1359
1367
io_optional ? overridable_initializer_tensor_names : input_tensor_names;
1360
- const auto & tensor_infos =
1361
- io_optional ? overridable_initializer_tensor_infos : input_tensor_infos;
1368
+ const auto & tensor_infos = io_optional
1369
+ ? overridable_initializer_tensor_infos
1370
+ : input_tensor_infos_;
1362
1371
auto iit = tensor_infos.find (io_name);
1363
1372
if (iit == tensor_infos.end ()) {
1364
1373
RETURN_IF_ERROR (CheckAllowedModelInput (io, tensor_names));
@@ -1419,9 +1428,30 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
1419
1428
.c_str ());
1420
1429
}
1421
1430
} else {
1422
- RETURN_IF_ERROR (CompareDimsSupported (
1423
- model_state_->Name (), io_name, iit->second .dims_ , dims,
1424
- model_state_->MaxBatchSize (), false /* compare_exact */ ));
1431
+ if (model_state_->MaxBatchSize () != 0 || iit->second .dims_ .size () > 0 ) {
1432
+ RETURN_IF_ERROR (CompareDimsSupported (
1433
+ model_state_->Name (), io_name, iit->second .dims_ , dims,
1434
+ model_state_->MaxBatchSize (), false /* compare_exact */ ));
1435
+ } else {
1436
+ // if max_batch_size == 0 and is a scalar tensor all the
1437
+ // dimensions specified must be equal to 1
1438
+ for (auto & dim : dims) {
1439
+ if (dim != 1 ) {
1440
+ return TRITONSERVER_ErrorNew (
1441
+ TRITONSERVER_ERROR_INVALID_ARG,
1442
+ (std::string (" unable to load model '" ) + model_state_->Name () +
1443
+ " ', scalar tensor '" + io_name +
1444
+ " ', should only provide 1 in the model configuration when the "
1445
+ " model doesn't support batching. Model configuration "
1446
+ " provided: " +
1447
+ ShapeToString (dims) + " ." )
1448
+ .c_str ());
1449
+ }
1450
+ }
1451
+
1452
+ // store the dimension for reference.
1453
+ scalar_inputs_[io_name] = dims;
1454
+ }
1425
1455
}
1426
1456
}
1427
1457
@@ -1482,9 +1512,30 @@ ModelInstanceState::ValidateOutputs()
1482
1512
1483
1513
// The batch output shape doesn't necessarily match the model
1484
1514
if (model_state_->FindBatchOutput (io_name) == nullptr ) {
1485
- RETURN_IF_ERROR (CompareDimsSupported (
1486
- model_state_->Name (), io_name, iit->second .dims_ , dims,
1487
- model_state_->MaxBatchSize (), true /* compare_exact */ ));
1515
+ // if max_batch_size == 0 and is a scalar tensor all the
1516
+ // dimensions specified must be equal to 1
1517
+ if (model_state_->MaxBatchSize () > 0 || iit->second .dims_ .size () > 0 ) {
1518
+ RETURN_IF_ERROR (CompareDimsSupported (
1519
+ model_state_->Name (), io_name, iit->second .dims_ , dims,
1520
+ model_state_->MaxBatchSize (), true /* compare_exact */ ));
1521
+ } else {
1522
+ for (auto & dim : dims) {
1523
+ if (dim != 1 ) {
1524
+ return TRITONSERVER_ErrorNew (
1525
+ TRITONSERVER_ERROR_INVALID_ARG,
1526
+ (std::string (" unable to load model '" ) + model_state_->Name () +
1527
+ " ', scalar tensor '" + io_name +
1528
+ " ', should only provide 1 in the model configuration when the "
1529
+ " model doesn't support batching. Model configuration "
1530
+ " provided: " +
1531
+ ShapeToString (dims) + " ." )
1532
+ .c_str ());
1533
+ }
1534
+ }
1535
+
1536
+ // store the dimension for reference.
1537
+ scalar_outputs_[io_name] = dims;
1538
+ }
1488
1539
}
1489
1540
}
1490
1541
@@ -1900,13 +1951,34 @@ ModelInstanceState::SetInputTensors(
1900
1951
input_name, nullptr , 0 , allowed_input_types, &input_buffer,
1901
1952
&batchn_byte_size, &memory_type, &memory_type_id));
1902
1953
1954
+ auto iti = input_tensor_infos_.find (input_name);
1955
+ if (iti == input_tensor_infos_.end ()) {
1956
+ return TRITONSERVER_ErrorNew (
1957
+ TRITONSERVER_ERROR_INTERNAL,
1958
+ std::string (
1959
+ std::string (
1960
+ " Failed to retrieve the ONNX input tensor info from '" ) +
1961
+ input_name + " '." )
1962
+ .c_str ());
1963
+ }
1964
+
1903
1965
// Create ORT Tensor
1904
- RETURN_IF_ORT_ERROR (ort_api->CreateTensorWithDataAsOrtValue (
1905
- memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_
1906
- : cpu_allocator_info_,
1907
- (void *)input_buffer, batchn_byte_size, batchn_shape.data (),
1908
- batchn_shape.size (), ConvertToOnnxDataType (input_datatype),
1909
- &input_tensors_.back ()));
1966
+ if (iti->second .dims_ .size () == 0 ) {
1967
+ // scalar tensor
1968
+ RETURN_IF_ORT_ERROR (ort_api->CreateTensorWithDataAsOrtValue (
1969
+ memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_
1970
+ : cpu_allocator_info_,
1971
+ (void *)input_buffer, batchn_byte_size, nullptr /* scalar */ ,
1972
+ 0 /* number of dims */ , ConvertToOnnxDataType (input_datatype),
1973
+ &input_tensors_.back ()));
1974
+ } else {
1975
+ RETURN_IF_ORT_ERROR (ort_api->CreateTensorWithDataAsOrtValue (
1976
+ memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_
1977
+ : cpu_allocator_info_,
1978
+ (void *)input_buffer, batchn_byte_size, batchn_shape.data (),
1979
+ batchn_shape.size (), ConvertToOnnxDataType (input_datatype),
1980
+ &input_tensors_.back ()));
1981
+ }
1910
1982
RETURN_IF_ORT_ERROR (
1911
1983
ort_api->BindInput (io_binding_, input_name, input_tensors_.back ()));
1912
1984
} else {
@@ -2283,6 +2355,22 @@ ModelInstanceState::ReadOutputTensors(
2283
2355
batchn_shape, dtype, output_tensor, &output_buffer, string_buffers,
2284
2356
offsets));
2285
2357
2358
+ // If the number of dimensions is equal to zero, it means that it is a
2359
+ // scalar and it would use the dimensions specified in the mdel
2360
+ // configuration.
2361
+ if (batchn_shape.size () == 0 ) {
2362
+ auto scalar_output_dims_it = scalar_outputs_.find (name);
2363
+ if (scalar_output_dims_it == scalar_outputs_.end ()) {
2364
+ return TRITONSERVER_ErrorNew (
2365
+ TRITONSERVER_ERROR_INTERNAL,
2366
+ std::string (
2367
+ " Failed to find the scalar output dimension for " + name +
2368
+ " in the model configuration." )
2369
+ .c_str ());
2370
+ }
2371
+ batchn_shape = scalar_output_dims_it->second ;
2372
+ }
2373
+
2286
2374
if (output_tensor_pair.first != -1 ) {
2287
2375
if (dtype == TRITONSERVER_TYPE_BYTES) {
2288
2376
auto content = string_buffers.back ().data ();
0 commit comments