|
37 | 37 | import test_util as tu
|
38 | 38 | import tritonclient.http as tritonhttpclient
|
39 | 39 | import tritonclient.utils.shared_memory as shm
|
| 40 | +from tritonclient.utils import InferenceServerException |
40 | 41 |
|
41 | 42 |
|
42 | 43 | def div_up(a, b):
|
@@ -141,6 +142,41 @@ def test_nobatch_chw2_input(self):
|
141 | 142 | "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
|
142 | 143 | )
|
143 | 144 |
|
| 145 | + def test_wrong_nobatch_chw2_input(self): |
| 146 | + model_name = "plan_nobatch_CHW2_LINEAR_float16_float16_float16" |
| 147 | + input_np = np.arange(26, dtype=np.float16).reshape((13, 2, 1)) |
| 148 | + |
| 149 | + # Use shared memory to bypass the shape check in client library, because |
| 150 | + # for non-linear format tensor, the data buffer is padded and thus the |
| 151 | + # data byte size may not match what is calculated from tensor shape |
| 152 | + inputs = [] |
| 153 | + inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP16")) |
| 154 | + # Send the original size input instead of the reformatted size input. |
| 155 | + self.add_reformat_free_data_as_shared_memory("input0", inputs[-1], input_np) |
| 156 | + |
| 157 | + inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP16")) |
| 158 | + # Send the original size input instead of the reformatted size input. |
| 159 | + self.add_reformat_free_data_as_shared_memory("input1", inputs[-1], input_np) |
| 160 | + |
| 161 | + outputs = [] |
| 162 | + outputs.append( |
| 163 | + tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True) |
| 164 | + ) |
| 165 | + outputs.append( |
| 166 | + tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True) |
| 167 | + ) |
| 168 | + |
| 169 | + with self.assertRaises(InferenceServerException) as e: |
| 170 | + results = self.triton_client.infer( |
| 171 | + model_name=model_name, inputs=inputs, outputs=outputs |
| 172 | + ) |
| 173 | + |
| 174 | + err_str = str(e.exception) |
| 175 | + self.assertIn( |
| 176 | + "input byte size mismatch for input 'INPUT0' for model 'plan_nobatch_CHW2_LINEAR_float16_float16_float16'. Expected 56, got 52", |
| 177 | + err_str, |
| 178 | + ) |
| 179 | + |
144 | 180 | def test_chw2_input(self):
|
145 | 181 | model_name = "plan_CHW2_LINEAR_float16_float16_float16"
|
146 | 182 | for bs in [1, 8]:
|
@@ -186,6 +222,50 @@ def test_chw2_input(self):
|
186 | 222 | "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
|
187 | 223 | )
|
188 | 224 |
|
| 225 | + def test_wrong_chw2_input(self): |
| 226 | + model_name = "plan_CHW2_LINEAR_float16_float16_float16" |
| 227 | + for bs in [1, 8]: |
| 228 | + input_np = np.arange(26 * bs, dtype=np.float16).reshape((bs, 13, 2, 1)) |
| 229 | + |
| 230 | + # Use shared memory to bypass the shape check in client library, |
| 231 | + # because for non-linear format tensor, the data buffer is padded |
| 232 | + # and thus the data byte size may not match what is calculated from |
| 233 | + # tensor shape |
| 234 | + inputs = [] |
| 235 | + inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP16")) |
| 236 | + # Send the original size input instead of the reformatted size input. |
| 237 | + self.add_reformat_free_data_as_shared_memory( |
| 238 | + "input0" + str(bs), inputs[-1], input_np |
| 239 | + ) |
| 240 | + |
| 241 | + inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP16")) |
| 242 | + # Send the original size input instead of the reformatted size input. |
| 243 | + self.add_reformat_free_data_as_shared_memory( |
| 244 | + "input1" + str(bs), inputs[-1], input_np |
| 245 | + ) |
| 246 | + |
| 247 | + outputs = [] |
| 248 | + outputs.append( |
| 249 | + tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True) |
| 250 | + ) |
| 251 | + outputs.append( |
| 252 | + tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True) |
| 253 | + ) |
| 254 | + |
| 255 | + with self.assertRaises(InferenceServerException) as e: |
| 256 | + results = self.triton_client.infer( |
| 257 | + model_name=model_name, inputs=inputs, outputs=outputs |
| 258 | + ) |
| 259 | + err_str = str(e.exception) |
| 260 | + # reformatted input size - (bs, 14, 2, 1) * size(float16) |
| 261 | + expected_size = bs * 28 * 2 |
| 262 | + # original input size - (bs, 13, 2, 1) * size(float16) |
| 263 | + received_size = bs * 26 * 2 |
| 264 | + self.assertIn( |
| 265 | + f"input byte size mismatch for input 'INPUT0' for model 'plan_CHW2_LINEAR_float16_float16_float16'. Expected {expected_size}, got {received_size}", |
| 266 | + err_str, |
| 267 | + ) |
| 268 | + |
189 | 269 | def test_nobatch_chw32_input(self):
|
190 | 270 | model_name = "plan_nobatch_CHW32_LINEAR_float32_float32_float32"
|
191 | 271 | input_np = np.arange(26, dtype=np.float32).reshape((13, 2, 1))
|
|
0 commit comments