fix(accuracy): cast input of exp(x) to fp32 in order to guarantee accuracy of sigmoid and softmax

张春立 · it-is-a-robot · commit e453e368abfb · 2025-07-04T08:47:22.000Z
diff --git a/triton_patch/python/triton_patch/language/standard.py b/triton_patch/python/triton_patch/language/standard.py
@@ -20,15 +20,17 @@ def flip(x, dim=None):
 @jit
 @math._add_math_1arg_docstr("sigmoid")
 def sigmoid(x):
-    assert core.constexpr(x.dtype.is_floating()), "Unexpected dtype"
-    return 1 / (1 + math.exp(-x))
+    _is_floating_type: core.constexpr = x.dtype.is_floating()
+    core.static_assert(_is_floating_type == True, f"Expected dtype fp16/fp32/bf16, but got {core.constexpr(x.dtype)}")
+    return (1 / (1 + math.exp(-x.to(core.float32)))).to(x.dtype)
 
 @core._tensor_member_fn
 @jit
 @math._add_math_1arg_docstr("softmax")
 def softmax(x, ieee_rounding=False):
-    assert core.constexpr(x.dtype.is_floating()), "Unexpected dtype"
-    z = x - max(x, 0)
+    _is_floating_type: core.constexpr = x.dtype.is_floating()
+    core.static_assert(_is_floating_type == True, f"Expected dtype fp16/fp32/bf16, but got {core.constexpr(x.dtype)}")
+    z = x.to(core.float32) - max(x, 0)
     num = math.exp(z)
     den = sum(num, 0)
-    return math.fdiv(num, den, ieee_rounding)
+    return math.fdiv(num, den, ieee_rounding).to(x.dtype)