diff --git a/csrc/flashinfer_ops.cu b/csrc/flashinfer_ops.cu index 3869621bf..4509dd281 100644 --- a/csrc/flashinfer_ops.cu +++ b/csrc/flashinfer_ops.cu @@ -284,7 +284,8 @@ TORCH_LIBRARY_FRAGMENT(TORCH_EXTENSION_NAME, m) { m.def("batch_prefill_with_paged_kv_cache_run", BatchPrefillWithPagedKVCacheRun); // pod-attention - m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor); + // Temporarily disabled because we don't generate the implementation yet. + // m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor); // quantization // GPU packbits operator diff --git a/setup.py b/setup.py index ac093848b..7e72d0f94 100644 --- a/setup.py +++ b/setup.py @@ -243,7 +243,7 @@ def __init__(self, *args, **kwargs) -> None: "csrc/batch_prefill.cu", "csrc/single_decode.cu", "csrc/single_prefill.cu", - "csrc/pod.cu", + # "csrc/pod.cu", # Temporarily disabled "csrc/flashinfer_ops.cu", ] kernel_sm90_sources = [