Skip to content

Commit 25d67b5

Browse files
authored
3rdparty: upgrade cutlass to 3.9 (#997)
Update cutlass to latest version (v3.9) for incoming pull requests on blackwell support. Hopper workloads experienced slight performance degradation, will investigate them later.
1 parent 5751fc6 commit 25d67b5

File tree

3 files changed

+3
-3
lines changed

3 files changed

+3
-3
lines changed

3rdparty/cutlass

Submodule cutlass updated 2434 files

include/flashinfer/attention/hopper/block_sparse_gather.cuh

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ CUTE_HOST_DEVICE constexpr auto upcast(Shape const& shape, Stride const& stride)
158158
[](auto const& s, auto const& d) { return upcast<N, I>(s, d); });
159159
} else if constexpr (is_scaled_basis<Stride>::value) {
160160
if constexpr (Stride::mode() == I) {
161-
return make_layout(shape_div(shape, Int<N>{}), shape_div(stride, Int<N>{}));
161+
return make_layout(ceil_div(shape, Int<N>{}), ceil_div(stride, Int<N>{}));
162162
} else {
163163
return make_layout(shape, stride);
164164
}

include/flashinfer/attention/mla_hopper.cuh

+1-1
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ __global__ __launch_bounds__(KTraits::NUM_THREADS) void BatchMLAPageAttentionHop
656656
pipeline_params.producer_arv_count = 128;
657657
pipeline_params.consumer_arv_count = 128;
658658
MainloopPipeline pipeline_q(smem_storage.pipeline_q, pipeline_params);
659-
pipeline_params.role = warp_group_idx == 0 ? MainloopPipeline::ThreadCategory::Producer
659+
pipeline_params.role = warp_group_idx == 0 ? MainloopPipeline::ThreadCategory::ProducerConsumer
660660
: MainloopPipeline::ThreadCategory::Consumer;
661661
pipeline_params.producer_arv_count = 128;
662662
pipeline_params.consumer_arv_count = 256;

0 commit comments

Comments
 (0)