From 3af6e100b1b3ad01f61790cef09fbd7a9a46c5c5 Mon Sep 17 00:00:00 2001 From: Lequn Chen Date: Mon, 17 Mar 2025 20:52:40 +0000 Subject: [PATCH] misc: Temporarily disable POD from AOT wheels --- csrc/flashinfer_ops.cu | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/csrc/flashinfer_ops.cu b/csrc/flashinfer_ops.cu index 3869621bf..4509dd281 100644 --- a/csrc/flashinfer_ops.cu +++ b/csrc/flashinfer_ops.cu @@ -284,7 +284,8 @@ TORCH_LIBRARY_FRAGMENT(TORCH_EXTENSION_NAME, m) { m.def("batch_prefill_with_paged_kv_cache_run", BatchPrefillWithPagedKVCacheRun); // pod-attention - m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor); + // Temporarily disabled because we don't generate the implementation yet. + // m.def("pod_with_kv_cache_tensor", pod_with_kv_cache_tensor); // quantization // GPU packbits operator diff --git a/setup.py b/setup.py index ac093848b..7e72d0f94 100644 --- a/setup.py +++ b/setup.py @@ -243,7 +243,7 @@ def __init__(self, *args, **kwargs) -> None: "csrc/batch_prefill.cu", "csrc/single_decode.cu", "csrc/single_prefill.cu", - "csrc/pod.cu", + # "csrc/pod.cu", # Temporarily disabled "csrc/flashinfer_ops.cu", ] kernel_sm90_sources = [