test(op): The zeros operator adds an ub overflow constraint and fix matmul of Floor Division

wangzhanpeng5 · it-is-a-robot · commit a43e5931d865 · 2025-07-07T03:05:50.000Z
diff --git a/ascend/examples/generalization_cases/test_matmul.py b/ascend/examples/generalization_cases/test_matmul.py
@@ -61,7 +61,7 @@ def test_matmul(shape, dtype):
     # bisheng not support yet
     if M % 16 != 0 or N % 16 != 0 or get_dtype_size(dtype) * K % 32 != 0:
         return
-    kalign = 32 / get_dtype_size(dtype)  # 32byte/Dtype_bytes
+    kalign = 32 // get_dtype_size(dtype)  # 32byte/Dtype_bytes
     BLOCK_M, BLOCK_N, BLOCK_K = min(max(M, 16), 32), min(max(N, 16), 32), min(max(K, kalign), 32)
     a = test_common.generate_tensor((M, K), dtype)
     b = test_common.generate_tensor((K, N), dtype)
diff --git a/ascend/examples/generalization_cases/test_zeros_op.py b/ascend/examples/generalization_cases/test_zeros_op.py
@@ -519,7 +519,8 @@ def fn_npu_multi_d(output_ptr, XB: tl.constexpr, YB: tl.constexpr, ZB: tl.conste
                          )
 def test_case_4d_5d(param_list):
     dtype, shape = param_list
-
+    if check_ub_mem_overflow(sigtype, shape):
+        pytest.skip(f"dtype:{sigtype} shape:{shape} mem overflow")
     y_ref = torch.full(shape, 0, dtype=eval('torch.' + dtype)).npu()
     print(f"y_ref = {torch.flatten(y_ref)[0:4]}")
 
diff --git a/ascend/examples/generalization_cases/test_zeroslike.py b/ascend/examples/generalization_cases/test_zeroslike.py
@@ -136,6 +136,8 @@ def fn_npu_multi_d(output_ptr, x_ptr, XB: tl.constexpr, YB: tl.constexpr, ZB: tl
                          )
 def test_case_4d_5d(param_list):
     dtype, shape = param_list
+    if check_ub_mem_overflow(dtype, shape):
+        return
     x0 = test_common.generate_tensor(shape, dtype)
     y_ref = torch.zeros_like(x0, dtype=eval('torch.' + dtype)).npu()
     print(f"y_ref = {torch.flatten(y_ref)[0:4]}")

Original file line number	Diff line number	Diff line change
`@@ -519,7 +519,8 @@ def fn_npu_multi_d(output_ptr, XB: tl.constexpr, YB: tl.constexpr, ZB: tl.conste`
`519`	`519`	`)`
`520`	`520`	`def test_case_4d_5d(param_list):`
`521`	`521`	`dtype, shape = param_list`
`522`		`-`
	`522`	`+ if check_ub_mem_overflow(sigtype, shape):`
	`523`	`+ pytest.skip(f"dtype:{sigtype} shape:{shape} mem overflow")`
`523`	`524`	`y_ref = torch.full(shape, 0, dtype=eval('torch.' + dtype)).npu()`
`524`	`525`	`print(f"y_ref = {torch.flatten(y_ref)[0:4]}")`
`525`	`526`
Original file line number	Diff line number	Diff line change
`@@ -136,6 +136,8 @@ def fn_npu_multi_d(output_ptr, x_ptr, XB: tl.constexpr, YB: tl.constexpr, ZB: tl`
`136`	`136`	`)`
`137`	`137`	`def test_case_4d_5d(param_list):`
`138`	`138`	`dtype, shape = param_list`
	`139`	`+ if check_ub_mem_overflow(dtype, shape):`
	`140`	`+ return`
`139`	`141`	`x0 = test_common.generate_tensor(shape, dtype)`
`140`	`142`	`y_ref = torch.zeros_like(x0, dtype=eval('torch.' + dtype)).npu()`
`141`	`143`	`print(f"y_ref = {torch.flatten(y_ref)[0:4]}")`