Skip to content

Commit 532c332

Browse files
committed
correct pixel shuffle
1 parent 9d1a4d6 commit 532c332

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

tools/llava/clip.cpp

+10-3
Original file line numberDiff line numberDiff line change
@@ -965,23 +965,30 @@ static ggml_cgraph * clip_image_build_graph_llama4(clip_ctx * ctx, const clip_im
965965
ggml_row_size(cur->type, hidden_size),
966966
ggml_row_size(cur->type, hidden_size * num_patches), 0);
967967

968-
cur = ggml_reshape_3d(ctx0, cur,
968+
cur = ggml_reshape_4d(ctx0, cur,
969969
hidden_size * scale_factor,
970-
num_patches / scale_factor,
970+
px / scale_factor,
971+
py,
971972
batch_size);
972973
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
973974

974975
cur = ggml_reshape_4d(ctx0, ggml_cont(ctx0, cur),
975976
hidden_size * scale_factor * scale_factor,
976-
py / scale_factor,
977977
px / scale_factor,
978+
py / scale_factor,
978979
batch_size);
979980
cur = ggml_permute(ctx0, cur, 0, 2, 1, 3);
980981

982+
cur = ggml_reshape_3d(ctx0, ggml_cont(ctx0, cur),
983+
hidden_size * scale_factor * scale_factor,
984+
num_patches / scale_factor / scale_factor,
985+
batch_size);
986+
981987
// based on Llama4VisionMLP2 (always uses GELU activation, no bias)
982988
cur = ggml_mul_mat(ctx0, model.mm_model_mlp_1_w, cur);
983989
cur = ggml_gelu(ctx0, cur);
984990
cur = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, cur);
991+
cur = ggml_gelu(ctx0, cur);
985992
embeddings = cur;
986993
}
987994

0 commit comments

Comments
 (0)