Skip to content

Commit 9ed1544

Browse files
authored
Fix shape and reformat free tensor handling in the input byte size check (#380)
* Update * Enhancements * Update ValidateNonLinearFormatIO() * Undo ShapeWithBatchDim() change * Fix pre-commit error
1 parent a9048db commit 9ed1544

File tree

4 files changed

+94
-26
lines changed

4 files changed

+94
-26
lines changed

src/infer_request.cc

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,17 @@ InferenceRequest::Normalize()
10151015
for (auto& pr : original_inputs_) {
10161016
auto& input = pr.second;
10171017
*input.MutableShape() = input.OriginalShape();
1018+
1019+
const inference::ModelInput* input_config;
1020+
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
1021+
if (input_config->is_shape_tensor()) {
1022+
// For a shape tensor, mark that the input is a shape tensor.
1023+
input.SetIsShapeTensor();
1024+
} else if (input_config->is_non_linear_format_io()) {
1025+
// If a tensor uses a non-linear IO format, indicate that the input uses
1026+
// a non-linear IO format.
1027+
input.SetIsNonLinearFormatIo();
1028+
}
10181029
}
10191030
} else {
10201031
// Model does support Triton-style batching so each input tensor
@@ -1024,15 +1035,19 @@ InferenceRequest::Normalize()
10241035
batch_size_ = 0;
10251036
for (auto& pr : original_inputs_) {
10261037
auto& input = pr.second;
1038+
const inference::ModelInput* input_config;
1039+
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
10271040

10281041
// For a shape tensor, keep the tensor's shape as it is and mark
10291042
// that the input is a shape tensor.
1030-
const inference::ModelInput* input_config;
1031-
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
10321043
if (input_config->is_shape_tensor()) {
10331044
*input.MutableShape() = input.OriginalShape();
1034-
input.SetIsShapeTensor(true);
1045+
input.SetIsShapeTensor();
10351046
continue;
1047+
} else if (input_config->is_non_linear_format_io()) {
1048+
// If a tensor uses a non-linear IO format, indicate that the input uses
1049+
// a non-linear IO format.
1050+
input.SetIsNonLinearFormatIo();
10361051
}
10371052

10381053
if (input.OriginalShape().size() == 0) {
@@ -1182,28 +1197,26 @@ InferenceRequest::Normalize()
11821197
{
11831198
const auto& data_type = input.DType();
11841199

1185-
// FIXME: Skip byte size validation for TensorRT backend because it breaks
1186-
// shape-size assumption. See DLIS-6805 for proper fix for TRT backend
1187-
// reformat_free tensors.
1188-
bool skip_byte_size_check = false;
1189-
constexpr char trt_prefix[] = "tensorrt_";
1190-
const std::string& platform = model_raw_->Config().platform();
1191-
skip_byte_size_check |= (platform.rfind(trt_prefix) == 0);
1192-
1193-
if (!skip_byte_size_check) {
1200+
// Non-linear IO format input byte size validation will be handled in the
1201+
// TensorRT backend.
1202+
if (!input.IsNonLinearFormatIo()) {
11941203
TRITONSERVER_MemoryType input_memory_type;
11951204
// Because Triton expects STRING type to be in special format
11961205
// (prepend 4 bytes to specify string length), so need to add all the
11971206
// first 4 bytes for each element to find expected byte size
11981207
if (data_type == inference::DataType::TYPE_STRING) {
11991208
RETURN_IF_ERROR(
12001209
ValidateBytesInputs(input_id, input, &input_memory_type));
1210+
12011211
// FIXME: Temporarily skips byte size checks for GPU tensors. See
12021212
// DLIS-6820.
1203-
skip_byte_size_check |=
1204-
(input_memory_type == TRITONSERVER_MEMORY_GPU);
12051213
} else {
1206-
const auto& input_dims = input.ShapeWithBatchDim();
1214+
// Shape tensor with dynamic batching does not introduce a new
1215+
// dimension to the tensor but adds an additional value to the 1-D
1216+
// array.
1217+
const std::vector<int64_t>& input_dims =
1218+
input.IsShapeTensor() ? input.OriginalShape()
1219+
: input.ShapeWithBatchDim();
12071220
int64_t expected_byte_size = INT_MAX;
12081221
expected_byte_size =
12091222
triton::common::GetByteSize(data_type, input_dims);
@@ -1506,7 +1519,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
15061519
// Input
15071520
//
15081521
InferenceRequest::Input::Input()
1509-
: is_shape_tensor_(false), data_(new MemoryReference),
1522+
: tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
15101523
has_host_policy_specific_data_(false)
15111524
{
15121525
}
@@ -1515,16 +1528,17 @@ InferenceRequest::Input::Input(
15151528
const std::string& name, const inference::DataType datatype,
15161529
const int64_t* shape, const uint64_t dim_count)
15171530
: name_(name), datatype_(datatype),
1518-
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
1519-
data_(new MemoryReference), has_host_policy_specific_data_(false)
1531+
original_shape_(shape, shape + dim_count),
1532+
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
1533+
has_host_policy_specific_data_(false)
15201534
{
15211535
}
15221536

15231537
InferenceRequest::Input::Input(
15241538
const std::string& name, const inference::DataType datatype,
15251539
const std::vector<int64_t>& shape)
15261540
: name_(name), datatype_(datatype), original_shape_(shape),
1527-
is_shape_tensor_(false), data_(new MemoryReference),
1541+
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
15281542
has_host_policy_specific_data_(false)
15291543
{
15301544
}
@@ -1540,9 +1554,16 @@ InferenceRequest::Input::SetMetadata(
15401554
}
15411555

15421556
Status
1543-
InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
1557+
InferenceRequest::Input::SetIsShapeTensor()
1558+
{
1559+
tensor_type_ = TensorType::SHAPE_TENSOR;
1560+
return Status::Success;
1561+
}
1562+
1563+
Status
1564+
InferenceRequest::Input::SetIsNonLinearFormatIo()
15441565
{
1545-
is_shape_tensor_ = is_shape_tensor;
1566+
tensor_type_ = TensorType::NON_LINEAR;
15461567
return Status::Success;
15471568
}
15481569

src/infer_request.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ class InferenceRequest {
8282
// Input tensor
8383
class Input {
8484
public:
85+
enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR };
86+
8587
Input();
8688
Input(
8789
const std::string& name, const inference::DataType datatype,
@@ -134,10 +136,22 @@ class InferenceRequest {
134136
}
135137

136138
// Whether or not the input is a tensorrt shape tensor
137-
bool IsShapeTensor() const { return is_shape_tensor_; }
139+
bool IsShapeTensor() const
140+
{
141+
return tensor_type_ == TensorType::SHAPE_TENSOR;
142+
}
143+
144+
// Specifies whether the input uses a non-linear IO format
145+
bool IsNonLinearFormatIo() const
146+
{
147+
return tensor_type_ == TensorType::NON_LINEAR;
148+
}
138149

139150
// Set the input to be treated as a shape tensor.
140-
Status SetIsShapeTensor(const bool is_shape_tensor);
151+
Status SetIsShapeTensor();
152+
153+
// Set the input uses a non-linear IO format
154+
Status SetIsNonLinearFormatIo();
141155

142156
// The data for this input.
143157
const std::shared_ptr<Memory>& Data() const { return data_; }
@@ -240,7 +254,7 @@ class InferenceRequest {
240254
std::vector<int64_t> original_shape_;
241255
std::vector<int64_t> shape_;
242256
std::vector<int64_t> shape_with_batch_dim_;
243-
bool is_shape_tensor_;
257+
TensorType tensor_type_;
244258
std::shared_ptr<Memory> data_;
245259

246260
bool has_host_policy_specific_data_;

src/model_config_utils.cc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,34 @@ ValidateIOShape(
418418
return Status::Success;
419419
}
420420

421+
/// Validate that Non-linear format inputs or outputs are specified correctly
422+
/// in a model configuration.
423+
template <class ModelIO>
424+
Status
425+
ValidateNonLinearFormatIO(
426+
const ModelIO& io, const std::string& platform, bool is_input)
427+
{
428+
if (!io.is_non_linear_format_io()) {
429+
// Nothing to validate as the tensor is not non-linear format.
430+
return Status::Success;
431+
}
432+
433+
if (platform != kTensorRTPlanPlatform) {
434+
return Status(
435+
Status::Code::INVALID_ARG,
436+
"Non-linear IO format is only supported for the TensorRT platform");
437+
}
438+
439+
if (io.dims_size() != 3) {
440+
std::string io_type = is_input ? "input" : "output";
441+
return Status(
442+
Status::Code::INVALID_ARG,
443+
"Non-linear IO format " + io_type + " requires 3 dims");
444+
}
445+
446+
return Status::Success;
447+
}
448+
421449
} // namespace
422450

423451
Status
@@ -1732,6 +1760,8 @@ ValidateModelInput(
17321760
"shape tensors are only supported for TensorRT platform");
17331761
}
17341762

1763+
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true /* is_input*/));
1764+
17351765
return Status::Success;
17361766
}
17371767

@@ -1768,6 +1798,8 @@ ValidateModelOutput(
17681798
"shape tensors are only supported for TensorRT platform");
17691799
}
17701800

1801+
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false /* is_input*/));
1802+
17711803
return Status::Success;
17721804
}
17731805

src/test/response_cache_test.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ InferenceRequest::Input::Input(
7070
const std::string& name, const inference::DataType datatype,
7171
const int64_t* shape, const uint64_t dim_count)
7272
: name_(name), datatype_(datatype),
73-
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
74-
data_(new MemoryReference), has_host_policy_specific_data_(false)
73+
original_shape_(shape, shape + dim_count),
74+
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
75+
has_host_policy_specific_data_(false)
7576
{
7677
}
7778

0 commit comments

Comments
 (0)