test(op): modify bool support list for some ops

melo882 · it-is-a-robot · commit 9f5ad4fac175 · 2025-07-08T01:24:18.000Z
diff --git a/ascend/examples/generalization_cases/test_common.py b/ascend/examples/generalization_cases/test_common.py
@@ -145,6 +145,8 @@ def generate_tensor_int_withSigns(shape, dtype):
         return torch.randint(low=-32768, high=32767, size=shape, dtype=eval('torch.' + dtype))
     elif dtype == 'int8':
         return torch.randint(low=-128, high=127, size=shape, dtype=eval('torch.' + dtype))
+    elif dtype == 'bool':
+        return torch.randint(low=0, high=2, size=shape).bool()
     else:
         raise ValueError('Invalid parameter \"dtype\" is found : {}'.format(dtype))
 
diff --git a/ascend/examples/generalization_cases/test_eq.py b/ascend/examples/generalization_cases/test_eq.py
@@ -63,7 +63,7 @@ def triton_eq_4d_5d(
 
 
 @pytest.mark.parametrize('shape', TestUtils.test_shape1_2_3d)
-@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32'])
+@pytest.mark.parametrize('dtype', ['bool', 'int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32'])
 def test_eq(shape, dtype):
     logging.debug(f'dtype:{dtype} shape:{shape}')
     # 生成数据
diff --git a/ascend/examples/generalization_cases/test_ge_op.py b/ascend/examples/generalization_cases/test_ge_op.py
@@ -81,7 +81,7 @@ def triton_ge_4d_5d(
     tl.store(output_ptr + offsets, ret, mask=masks)
 
 
-typelist = ['int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32']
+typelist = ['bool', 'int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32']
 
 dtype_mapping = {
     'int8': (torch.int8),
diff --git a/ascend/examples/generalization_cases/test_general_floordiv.py b/ascend/examples/generalization_cases/test_general_floordiv.py
@@ -66,7 +66,7 @@ def triton_floordiv_4d_5d(
 
 
 @pytest.mark.parametrize('shape', TestUtils.full_shape) # some shape with int8 over ub
-@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64'])
+@pytest.mark.parametrize('dtype', ['bool', 'int8', 'int16', 'int32', 'int64'])
 def test_floordiv(shape, dtype):
     logging.log(logging.DEBUG, f"shape = {shape}")
     x = test_common.generate_tensor_int_withSigns(shape, dtype).npu()
@@ -83,38 +83,21 @@ def test_floordiv(shape, dtype):
     ans = ans + ans_mask
 
     if len(shape) == 1:
-        XB = 1
-        xnumel = 1
-        YB = 1
-        ynumel = 1
-        ZB = shape[0]
-        znumel = shape[0]
+        triton_floordiv[1, 1, shape[0]](output, x, y, z, 1, 1, 1, 1, 1, shape[0])
     elif len(shape) == 2:
-        XB = 1
-        xnumel = 1
-        YB = shape[0]
-        ynumel = shape[0]
-        ZB = shape[1]
-        znumel = shape[1]
-    else:
-        XB = shape[0]
-        xnumel = shape[0]
-        YB = shape[1]
-        ynumel = shape[1]
-        ZB = shape[2]
-        znumel = shape[2]
-
-    grid = (1, 1, 1)
-    if dtype == 'int8':
-        if x.numel() * x.element_size() >= 512:
-            grid = (1, 1, ZB)
-            ZB = 1
+        if shape[0] > shape[1]:
+            triton_floordiv[1, shape[0], 1](output, x, y, z, 1, 1, shape[1], 1, shape[0], shape[1])
+        else:
+            triton_floordiv[1, 1, shape[1]](output, x, y, z, 1, shape[0], 1, 1, shape[0], shape[1])
+    elif len(shape) == 3:
+        if max(shape[0], shape[1], shape[2]) == shape[0]:
+            triton_floordiv[shape[0], 1, 1](output, x, y, z, 1, shape[1], shape[2], shape[0], shape[1], shape[2])
+        elif max(shape[0], shape[1], shape[2]) == shape[1]:
+            triton_floordiv[1, shape[1], 1](output, x, y, z, shape[0], 1, shape[2], shape[0], shape[1], shape[2])
+        else:
+            triton_floordiv[1, 1, shape[2]](output, x, y, z, shape[0], shape[1], 1, shape[0], shape[1], shape[2])
     else:
-        if x.numel() * x.element_size() >= 8192:
-            grid = (1, 1, ZB)
-            ZB = 1
-
-    triton_floordiv[grid](output, x, y, z, XB, YB, ZB, xnumel, ynumel, znumel)
+        triton_floordiv[1, 1, 1](output, x, y, z, 1, 1, 1, 1, 1, 1)
 
     test_common.validate_cmp(dtype, ans, output)
 
diff --git a/ascend/examples/generalization_cases/test_gt_op.py b/ascend/examples/generalization_cases/test_gt_op.py
@@ -81,7 +81,7 @@ def triton_gt_4d_5d(
     tl.store(output_ptr + offsets, ret, mask=masks)
 
 
-typelist = ['int8','int16','int32','int64','float16','bfloat16','float32']
+typelist = ['bool', 'int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32']
 
 dtype_mapping = {
     'int8': (torch.int8),
diff --git a/ascend/examples/generalization_cases/test_le_op.py b/ascend/examples/generalization_cases/test_le_op.py
@@ -81,7 +81,7 @@ def triton_le_4d_5d(
     tl.store(output_ptr + offsets, ret, mask=masks)
 
 
-typelist = ['int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32']
+typelist = ['bool', 'int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32']
 
 dtype_mapping = {
     'int8': (torch.int8),
diff --git a/ascend/examples/generalization_cases/test_lt_op.py b/ascend/examples/generalization_cases/test_lt_op.py
@@ -81,7 +81,7 @@ def triton_lt_4d_5d(
     tl.store(output_ptr + offsets, ret, mask=masks)
 
 
-typelist = ['int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32']
+typelist = ['bool', 'int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32']
 
 dtype_mapping = {
     'int8': (torch.int8),
diff --git a/ascend/examples/generalization_cases/test_mod.py b/ascend/examples/generalization_cases/test_mod.py
@@ -79,46 +79,26 @@ def test_case2(dtype, shape):
     new_shape = shape
     z[z <= 0] = 1
 
-    output = torch.randint(1, new_shape, dtype=eval('torch.' + dtype)).npu()
-    output1 = output
-    logging.debug(f"output.dtype={output.dtype}")
-
     ans = torch_pointwise(x.cpu(), y.cpu())
     ans = ans.npu()
+    output = torch.zeros_like(ans)
 
     if len(shape) == 1:
-        XB = 1
-        xnumel = 1
-        YB = 1
-        ynumel = 1
-        ZB = shape[0]
-        znumel = shape[0]
+        fn_npu_[1, 1, shape[0]](output, x, y, z, 1, 1, 1, 1, 1, shape[0])
     elif len(shape) == 2:
-        XB = 1
-        xnumel = 1
-        YB = shape[0]
-        ynumel = shape[0]
-        ZB = shape[1]
-        znumel = shape[1]
-    else:
-        XB = shape[0]
-        xnumel = shape[0]
-        YB = shape[1]
-        ynumel = shape[1]
-        ZB = shape[2]
-        znumel = shape[2]
-
-    grid = (1, 1, 1)
-    if dtype == 'int8':
-        if x.numel() * x.element_size() >= 512:
-            grid = (1, 1, ZB)
-            ZB = 1
+        if shape[0] > shape[1]:
+            fn_npu_[1, shape[0], 1](output, x, y, z, 1, 1, shape[1], 1, shape[0], shape[1])
+        else:
+            fn_npu_[1, 1, shape[1]](output, x, y, z, 1, shape[0], 1, 1, shape[0], shape[1])
+    elif len(shape) == 3:
+        if max(shape[0], shape[1], shape[2]) == shape[0]:
+            fn_npu_[shape[0], 1, 1](output, x, y, z, 1, shape[1], shape[2], shape[0], shape[1], shape[2])
+        elif max(shape[0], shape[1], shape[2]) == shape[1]:
+            fn_npu_[1, shape[1], 1](output, x, y, z, shape[0], 1, shape[2], shape[0], shape[1], shape[2])
+        else:
+            fn_npu_[1, 1, shape[2]](output, x, y, z, shape[0], shape[1], 1, shape[0], shape[1], shape[2])
     else:
-        if x.numel() * x.element_size() >= 8192:
-            grid = (1, 1, ZB)
-            ZB = 1
-
-    fn_npu_[grid](output, x, y, z, XB, YB, ZB, xnumel, ynumel, znumel)
+        fn_npu_[1, 1, 1](output, x, y, z, 1, 1, 1, 1, 1, 1)
 
     test_common.validate_cmp(dtype, ans, output)
 
diff --git a/ascend/examples/generalization_cases/test_ne.py b/ascend/examples/generalization_cases/test_ne.py
@@ -63,7 +63,7 @@ def triton_ne_4d_5d(
 
 
 @pytest.mark.parametrize('shape', TestUtils.test_shape1_2_3d)
-@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32'])
+@pytest.mark.parametrize('dtype', ['bool', 'int8', 'int16', 'int32', 'int64', 'float16', 'bfloat16', 'float32'])
 def test_ne(shape, dtype):
     logging.debug(f'dtype:{dtype} shape:{shape}')
     # 生成数据
diff --git a/docs/sources/python-api/outline.md b/docs/sources/python-api/outline.md
@@ -28,12 +28,12 @@
 |                          | make_block_ptr         | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
 |                          | advance                | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
 |       Indexing Ops       | flip                   | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓    |
-|                          | where                  | ✓    | ✓     | ✓     | ×      | ×     | ✓    | ✓    | ✓    | ✓    |
+|                          | where                  | ✓    | ✓     | ✓     | ×      | ×     | ✓    | ✓    | ✓    | ✓*   |
 |                          | swizzle2d              | ✓    | ✓     | ✓     | ×      | ✓     | ×    | ×    | ×    | ×    |
-|         Math Ops         | add                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓    |
-|                          | sub                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓    |
-|                          | mul                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓    |
-|                          | div                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓    |
+|         Math Ops         | add                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | sub                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | mul                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | div                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
 |                          | floordiv(//)           | ✓    | ✓     | ✓     | ×      | ✓     | ×    | ×    | ×    | ×    |
 |                          | mod                    | ✓    | ✓     | ✓     | ×      | ×     | ×    | ×    | ×    | ×    |
 |                          | neg                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
@@ -44,12 +44,12 @@
 |                          | not(~)                 | ✓    | ✓     | ✓     | ×      | ✓     | ×    | ×    | ×    | ✓    |
 |                          | lshift(<<)             | ✓    | ✓     | ✓     | ×      | ✓     | ×    | ×    | ×    | ×    |
 |                          | rshift(>>)             | ✓    | ✓     | ✓     | ×      | ✓     | ×    | ×    | ×    | ×    |
-|                          | gt                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
-|                          | ge                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
-|                          | lt                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
-|                          | le                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
-|                          | eq                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
-|                          | ne                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ×    |
+|                          | gt                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | ge                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | lt                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | le                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | eq                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
+|                          | ne                     | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
 |                          | logical and            | ×    | ×     | ×     | ×      | ×     | ×    | ×    | ×    | ✓    |
 |                          | logical or             | ×    | ×     | ×     | ×      | ×     | ×    | ×    | ×    | ✓    |
 |                          | abs                    | ✓    | ✓     | ✓     | ×      | ✓     | ✓    | ✓    | ✓    | ✓*   |
@@ -138,4 +138,5 @@
 
 - ALL: int8类型由于特殊处理，会占用更大的片上空间，编译时容易造成ub overflow报错，通常调整tilling即可解决；
        triton kernel中同时存在所有tensor总和不能超过96KB，若关闭double buffer，则不能超过192KB；
-       所有tensor不允许某个shape的size小于1。
+       所有tensor不允许某个shape的size小于1;
+       ✓*表示triton内部将bool类型转为int8类型进行运算，并能够执行得到结果的OP。