Refactor rope (#199)

yao-fengchen · web-flow · commit d82312d062bc · 2025-03-27T22:40:25.000+08:00
diff --git a/dlinfer/graph/dicp/vendor/AtbGraph/conversion.py b/dlinfer/graph/dicp/vendor/AtbGraph/conversion.py
@@ -133,8 +133,8 @@ def npu_rms_norm_w8a8(self, x, w, eps=1e-6, quant_dtype=torch.int8):
         )
         return rms_norm_w8a8
 
-    @register_conversion("torch.ops.lmdeploy.apply_rotary_pos_emb.default")
-    def apply_rotary_pos_emb(self, q, k, cos, sin, q_out, k_out):
+    @register_conversion("torch.ops.dlinfer.apply_rotary_pos_emb.default")
+    def apply_rotary_pos_emb(self, q, k, cos, sin):
         q_shape = list(q.node.meta["val"].shape)
         k_shape = list(k.node.meta["val"].shape)
         is_qk_require_reshape = len(q_shape) == 3
@@ -151,22 +151,6 @@ def apply_rotary_pos_emb(self, q, k, cos, sin, q_out, k_out):
             else self.get_proxy(atb_op.View, (k, (-1, k_shape[1] * k_shape[2])))
         )
         out = self.get_proxy(atb_op.Rope, (new_q, new_k, cos, sin, None))
-        if is_qk_require_reshape:
-            out_q = self.get_proxy(atb_op.GetItem, (out, 0))
-            out_q = self.get_proxy(atb_op.View, (out_q, (-1, q_shape[1], q_shape[2])))
-            out_k = self.get_proxy(atb_op.GetItem, (out, 1))
-            out_k = self.get_proxy(atb_op.View, (out_k, (-1, k_shape[1], k_shape[2])))
-            out = self.get_proxy(atb_op.Tuple, (out_q, out_k))
-        if (q_out is not None) and (k_out is not None):
-            self.get_proxy(
-                atb_op.AclNnInplaceCopy,
-                (q_out, self.get_proxy(atb_op.GetItem, (out, 0))),
-            )
-            self.get_proxy(
-                atb_op.AclNnInplaceCopy,
-                (k_out, self.get_proxy(atb_op.GetItem, (out, 1))),
-            )
-            out = self.get_proxy(atb_op.Tuple, (q_out, k_out))
         return out
 
     @register_conversion("torch.ops.atb.inplace_div.default")
diff --git a/dlinfer/ops/llm.py b/dlinfer/ops/llm.py
@@ -59,8 +59,6 @@ def apply_rotary_pos_emb(
     key: Tensor,
     cos: Optional[Tensor],
     sin: Optional[Tensor],
-    position_ids: Optional[Tensor],
-    cos_sin_cache: Optional[Tensor],
 ) -> Tuple[Tensor, Tensor]:
     """
     Apply rotary position embeddings to the query and key tensors.
@@ -73,13 +71,6 @@ def apply_rotary_pos_emb(
         key (Tensor): The key tensor to apply the rotary position embeddings to.
         cos (Optional[Tensor]): The cosine component of the rotary position embeddings.
         sin (Optional[Tensor]): The sine component of the rotary position embeddings.
-        position_ids (Optional[Tensor]): The position ids used to look up the rotary position embeddings.
-        cos_sin_cache (Optional[Tensor]): A cache of pre-computed cosine and sine values.
-
-    Note:
-        The parameter groups are mutually exclusive:
-        - If `cos` and `sin` are both `None`, then `position_ids` and `cos_sin_cache` must both be Tensor.
-        - If `position_ids` and `cos_sin_cache` are both `None`, then `cos` and `sin` must both be Tensor.
 
     Returns:
         Tuple[Tensor, Tensor]:
@@ -91,8 +82,6 @@ def apply_rotary_pos_emb(
         key,
         cos,
         sin,
-        position_ids,
-        cos_sin_cache,
     )
 
 
diff --git a/dlinfer/vendor/ascend/torch_npu_ops.py b/dlinfer/vendor/ascend/torch_npu_ops.py
@@ -41,16 +41,14 @@ def apply_rotary_pos_emb(
     key: Tensor,
     cos: Optional[Tensor],
     sin: Optional[Tensor],
-    position_ids: Optional[Tensor],
-    cos_sin_cache: Optional[Tensor],
 ) -> Tuple[Tensor, Tensor]:
     # rotary pos emb helpers:
+    query = query.contiguous().unsqueeze(0)
+    key = key.contiguous().unsqueeze(0)
     assert len(query.shape) == 4
     batch, seq_len, _, _ = query.shape
     cos = cos.reshape(batch, seq_len, 1, -1)
     sin = sin.reshape(batch, seq_len, 1, -1)
-    query = query.contiguous()
-    key = key.contiguous()
 
     def rotate_half_(x):
         x1, x2 = x[..., : x.shape[-1] // 2], x[..., x.shape[-1] // 2 :]
diff --git a/dlinfer/vendor/camb/camb_ops.py b/dlinfer/vendor/camb/camb_ops.py
@@ -86,9 +86,9 @@ def apply_rotary_pos_emb(
     key: Tensor,
     cos: Optional[Tensor],  # (total_seq_len, head_dim)
     sin: Optional[Tensor],
-    position_ids: Optional[Tensor],
-    cos_sin_cache: Optional[Tensor],
 ) -> Tuple[Tensor, Tensor]:
+    query = query.contiguous().unsqueeze(0)
+    key = key.contiguous().unsqueeze(0)
     interleaved = False  # False for fold rope, True for cross rope
     # [1, total_seq_len, q_head_num, head_dim]
     _, total_seq_len, _, head_dim = query.shape
diff --git a/dlinfer/vendor/maca/maca_ops.py b/dlinfer/vendor/maca/maca_ops.py
@@ -79,9 +79,9 @@ def apply_rotary_pos_emb(
     key: Tensor,
     cos: Optional[Tensor],
     sin: Optional[Tensor],
-    position_ids: Optional[Tensor],
-    cos_sin_cache: Optional[Tensor],
 ) -> Tuple[Tensor, Tensor]:
+    query = query.contiguous().unsqueeze(0)
+    key = key.contiguous().unsqueeze(0)
     position_ids_1d = torch.arange(0, query.size(1), device=query.device)
     head_size = query.size(-1)
     query = query.flatten(-2, -1)