@@ -1015,6 +1015,17 @@ InferenceRequest::Normalize()
1015
1015
for (auto & pr : original_inputs_) {
1016
1016
auto & input = pr.second ;
1017
1017
*input.MutableShape () = input.OriginalShape ();
1018
+
1019
+ const inference::ModelInput* input_config;
1020
+ RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
1021
+ if (input_config->is_shape_tensor ()) {
1022
+ // For a shape tensor, mark that the input is a shape tensor.
1023
+ input.SetIsShapeTensor ();
1024
+ } else if (input_config->is_non_linear_format_io ()) {
1025
+ // If a tensor uses a non-linear IO format, indicate that the input uses
1026
+ // a non-linear IO format.
1027
+ input.SetIsNonLinearFormatIo ();
1028
+ }
1018
1029
}
1019
1030
} else {
1020
1031
// Model does support Triton-style batching so each input tensor
@@ -1024,15 +1035,19 @@ InferenceRequest::Normalize()
1024
1035
batch_size_ = 0 ;
1025
1036
for (auto & pr : original_inputs_) {
1026
1037
auto & input = pr.second ;
1038
+ const inference::ModelInput* input_config;
1039
+ RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
1027
1040
1028
1041
// For a shape tensor, keep the tensor's shape as it is and mark
1029
1042
// that the input is a shape tensor.
1030
- const inference::ModelInput* input_config;
1031
- RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
1032
1043
if (input_config->is_shape_tensor ()) {
1033
1044
*input.MutableShape () = input.OriginalShape ();
1034
- input.SetIsShapeTensor (true );
1045
+ input.SetIsShapeTensor ();
1035
1046
continue ;
1047
+ } else if (input_config->is_non_linear_format_io ()) {
1048
+ // If a tensor uses a non-linear IO format, indicate that the input uses
1049
+ // a non-linear IO format.
1050
+ input.SetIsNonLinearFormatIo ();
1036
1051
}
1037
1052
1038
1053
if (input.OriginalShape ().size () == 0 ) {
@@ -1182,28 +1197,26 @@ InferenceRequest::Normalize()
1182
1197
{
1183
1198
const auto & data_type = input.DType ();
1184
1199
1185
- // FIXME: Skip byte size validation for TensorRT backend because it breaks
1186
- // shape-size assumption. See DLIS-6805 for proper fix for TRT backend
1187
- // reformat_free tensors.
1188
- bool skip_byte_size_check = false ;
1189
- constexpr char trt_prefix[] = " tensorrt_" ;
1190
- const std::string& platform = model_raw_->Config ().platform ();
1191
- skip_byte_size_check |= (platform.rfind (trt_prefix) == 0 );
1192
-
1193
- if (!skip_byte_size_check) {
1200
+ // Non-linear IO format input byte size validation will be handled in the
1201
+ // TensorRT backend.
1202
+ if (!input.IsNonLinearFormatIo ()) {
1194
1203
TRITONSERVER_MemoryType input_memory_type;
1195
1204
// Because Triton expects STRING type to be in special format
1196
1205
// (prepend 4 bytes to specify string length), so need to add all the
1197
1206
// first 4 bytes for each element to find expected byte size
1198
1207
if (data_type == inference::DataType::TYPE_STRING) {
1199
1208
RETURN_IF_ERROR (
1200
1209
ValidateBytesInputs (input_id, input, &input_memory_type));
1210
+
1201
1211
// FIXME: Temporarily skips byte size checks for GPU tensors. See
1202
1212
// DLIS-6820.
1203
- skip_byte_size_check |=
1204
- (input_memory_type == TRITONSERVER_MEMORY_GPU);
1205
1213
} else {
1206
- const auto & input_dims = input.ShapeWithBatchDim ();
1214
+ // Shape tensor with dynamic batching does not introduce a new
1215
+ // dimension to the tensor but adds an additional value to the 1-D
1216
+ // array.
1217
+ const std::vector<int64_t >& input_dims =
1218
+ input.IsShapeTensor () ? input.OriginalShape ()
1219
+ : input.ShapeWithBatchDim ();
1207
1220
int64_t expected_byte_size = INT_MAX;
1208
1221
expected_byte_size =
1209
1222
triton::common::GetByteSize (data_type, input_dims);
@@ -1506,7 +1519,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
1506
1519
// Input
1507
1520
//
1508
1521
InferenceRequest::Input::Input ()
1509
- : is_shape_tensor_( false ), data_(new MemoryReference),
1522
+ : tensor_type_(TensorType::TENSOR ), data_(new MemoryReference),
1510
1523
has_host_policy_specific_data_(false )
1511
1524
{
1512
1525
}
@@ -1515,16 +1528,17 @@ InferenceRequest::Input::Input(
1515
1528
const std::string& name, const inference::DataType datatype,
1516
1529
const int64_t * shape, const uint64_t dim_count)
1517
1530
: name_(name), datatype_(datatype),
1518
- original_shape_(shape, shape + dim_count), is_shape_tensor_(false ),
1519
- data_(new MemoryReference), has_host_policy_specific_data_(false )
1531
+ original_shape_(shape, shape + dim_count),
1532
+ tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
1533
+ has_host_policy_specific_data_(false )
1520
1534
{
1521
1535
}
1522
1536
1523
1537
InferenceRequest::Input::Input (
1524
1538
const std::string& name, const inference::DataType datatype,
1525
1539
const std::vector<int64_t >& shape)
1526
1540
: name_(name), datatype_(datatype), original_shape_(shape),
1527
- is_shape_tensor_( false ), data_(new MemoryReference),
1541
+ tensor_type_(TensorType::TENSOR ), data_(new MemoryReference),
1528
1542
has_host_policy_specific_data_(false )
1529
1543
{
1530
1544
}
@@ -1540,9 +1554,16 @@ InferenceRequest::Input::SetMetadata(
1540
1554
}
1541
1555
1542
1556
Status
1543
- InferenceRequest::Input::SetIsShapeTensor (const bool is_shape_tensor)
1557
+ InferenceRequest::Input::SetIsShapeTensor ()
1558
+ {
1559
+ tensor_type_ = TensorType::SHAPE_TENSOR;
1560
+ return Status::Success;
1561
+ }
1562
+
1563
+ Status
1564
+ InferenceRequest::Input::SetIsNonLinearFormatIo ()
1544
1565
{
1545
- is_shape_tensor_ = is_shape_tensor ;
1566
+ tensor_type_ = TensorType::NON_LINEAR ;
1546
1567
return Status::Success;
1547
1568
}
1548
1569
0 commit comments