We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 045fdbc commit 457cac9Copy full SHA for 457cac9
python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -285,6 +285,7 @@ def apply(
285
activation: str = "silu",
286
inplace: bool = True,
287
no_combine: bool = False,
288
+ apply_router_weight_on_input: bool = False,
289
) -> torch.Tensor:
290
from sglang.srt.layers.moe.fused_moe_triton import fused_experts
291
from sglang.srt.layers.moe.topk import select_experts
@@ -314,6 +315,7 @@ def apply(
314
315
w2_scale=layer.w2_weight_scale,
316
a1_scale=layer.w13_input_scale,
317
a2_scale=layer.w2_input_scale,
318
+ apply_router_weight_on_input=apply_router_weight_on_input,
319
)
320
321
0 commit comments