From 3af6e100b1b3ad01f61790cef09fbd7a9a46c5c5 Mon Sep 17 00:00:00 2001
From: Lequn Chen <lequn@perplexity.ai>
Date: Mon, 17 Mar 2025 20:52:40 +0000
Subject: [PATCH] misc: Temporarily disable POD from AOT wheels

---
 csrc/flashinfer_ops.cu | 3 ++-
 setup.py               | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/csrc/flashinfer_ops.cu b/csrc/flashinfer_ops.cu
index 3869621bf..4509dd281 100644
--- a/csrc/flashinfer_ops.cu
+++ b/csrc/flashinfer_ops.cu
@@ -284,7 +284,8 @@ TORCH_LIBRARY_FRAGMENT(TORCH_EXTENSION_NAME, m) {
   m.def("batch_prefill_with_paged_kv_cache_run", BatchPrefillWithPagedKVCacheRun);
 
   // pod-attention
-  m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor);
+  // Temporarily disabled because we don't generate the implementation yet.
+  // m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor);
 
   // quantization
   // GPU packbits operator
diff --git a/setup.py b/setup.py
index ac093848b..7e72d0f94 100644
--- a/setup.py
+++ b/setup.py
@@ -243,7 +243,7 @@ def __init__(self, *args, **kwargs) -> None:
         "csrc/batch_prefill.cu",
         "csrc/single_decode.cu",
         "csrc/single_prefill.cu",
-        "csrc/pod.cu",
+        # "csrc/pod.cu",  # Temporarily disabled
         "csrc/flashinfer_ops.cu",
     ]
     kernel_sm90_sources = [