Skip to content

Commit 5215b91

Browse files
authored
clip : fix confused naming ffn_up and ffn_down (ggml-org#13290)
* clip : fix confused naming ffn_up and ffn_down * rm ffn_i/o/g naming * rename n_embd, n_ff * small fix * no check n_ff
1 parent ae803bf commit 5215b91

File tree

4 files changed

+86
-82
lines changed

4 files changed

+86
-82
lines changed

convert_hf_to_gguf.py

+6
Original file line numberDiff line numberDiff line change
@@ -1778,6 +1778,12 @@ class LlamaModel(TextModel):
17781778
model_arch = gguf.MODEL_ARCH.LLAMA
17791779
undo_permute = True
17801780

1781+
def __init__(self, *args, **kwargs):
1782+
super().__init__(*args, **kwargs)
1783+
# fix for SmolVLM2, missing `num_attention_heads` in config.json
1784+
if self.hf_arch == "VLlama3ForCausalLM":
1785+
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
1786+
17811787
def set_vocab(self):
17821788
try:
17831789
self._set_vocab_sentencepiece()

gguf-py/gguf/tensor_mapping.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -977,15 +977,12 @@ class TensorNameMap:
977977
"visual.blocks.{bid}.norm2", # qwen2vl
978978
),
979979

980-
# some namings are messed up because the original llava code swapped fc1 and fc2
981-
# we have no better way to fix it, just be careful
982-
# new models like pixtral use the correct naming
983980
MODEL_TENSOR.V_ENC_FFN_UP: (
984981
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1",
985982
"vpm.encoder.layers.{bid}.mlp.fc1",
986-
"model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3 (note: name is swapped)
983+
"model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3
987984
"vision_tower.transformer.layers.{bid}.feed_forward.up_proj", # pixtral
988-
"visual.blocks.{bid}.mlp.fc2", # qwen2vl
985+
"visual.blocks.{bid}.mlp.fc1", # qwen2vl
989986
"visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl
990987
),
991988

@@ -997,9 +994,9 @@ class TensorNameMap:
997994
MODEL_TENSOR.V_ENC_FFN_DOWN: (
998995
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2",
999996
"vpm.encoder.layers.{bid}.mlp.fc2",
1000-
"model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3 (note: name is swapped)
997+
"model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3
1001998
"vision_tower.transformer.layers.{bid}.feed_forward.down_proj", # pixtral
1002-
"visual.blocks.{bid}.mlp.fc1", # qwen2vl
999+
"visual.blocks.{bid}.mlp.fc2", # qwen2vl
10031000
"visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl
10041001
),
10051002

0 commit comments

Comments
 (0)