Skip to content

Commit 31bc6f5

Browse files
committed
refactor: enhance diffusion component detection
Signed-off-by: thxCode <[email protected]>
1 parent 640f79c commit 31bc6f5

File tree

4 files changed

+54
-15
lines changed

4 files changed

+54
-15
lines changed

cmd/gguf-parser/main.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,7 +1844,7 @@ func mainAction(c *cli.Context) error {
18441844
}
18451845
bds = [][]any{
18461846
{
1847-
a.DiffusionArchitecture,
1847+
sprintf(tenary(a.DiffusionArchitecture != "", a.DiffusionArchitecture, "N/A")),
18481848
sprintf(tenary(a.DiffusionHasConditioners(), a.DiffusionConditioners, "N/A")),
18491849
sprintf(tenary(a.DiffusionHasAutoencoder(), a.DiffusionAutoencoder, "N/A")),
18501850
},
@@ -2000,14 +2000,14 @@ func mainAction(c *cli.Context) error {
20002000
for i := range lmes.Items {
20012001
if !inShort {
20022002
bds[i] = []any{
2003-
sprintf(lmes.Architecture),
2003+
sprintf(tenary(lmes.Architecture != "", lmes.Architecture, "N/A")),
20042004
sprintf(lmes.ContextSize),
20052005
sprintf("%d / %d", lmes.LogicalBatchSize, lmes.PhysicalBatchSize),
20062006
sprintf(tenary(flashAttention, tenary(lmes.FlashAttention, "Enabled", "Unsupported"), "Disabled")),
20072007
sprintf(tenary(mmap, tenary(!lmes.NoMMap, "Enabled", "Unsupported"), "Disabled")),
20082008
sprintf(tenary(lmes.EmbeddingOnly, "Yes", "No")),
20092009
sprintf(tenary(lmes.Reranking, "Supported", "Unsupported")),
2010-
sprintf(tenary(lmes.Distributable, "Supported", "Unsupported")),
2010+
sprintf(tenary(lmes.Architecture != "" && lmes.Distributable, "Supported", "Unsupported")),
20112011
sprintf(tenary(lmes.Items[i].FullOffloaded, sprintf("%d (%d + 1)",
20122012
lmes.Items[i].OffloadLayers, lmes.Items[i].OffloadLayers-1), lmes.Items[i].OffloadLayers)),
20132013
sprintf(tenary(lmes.Items[i].FullOffloaded, "Yes", "No")),
@@ -2071,10 +2071,10 @@ func mainAction(c *cli.Context) error {
20712071
for i := range sdes.Items {
20722072
if !inShort {
20732073
bds[i] = []any{
2074-
sprintf(sdes.Architecture),
2074+
sprintf(tenary(sdes.Architecture != "", sdes.Architecture, "N/A")),
20752075
sprintf(tenary(flashAttention, tenary(sdes.FlashAttention, "Enabled", "Unsupported"), "Disabled")),
20762076
sprintf(tenary(mmap, tenary(!sdes.NoMMap, "Enabled", "Unsupported"), "Disabled")),
2077-
sprintf(tenary(sdes.Distributable, "Supported", "Unsupported")),
2077+
sprintf(tenary(sdes.Architecture != "" && sdes.Distributable, "Supported", "Unsupported")),
20782078
sprintf(tenary(sdes.Items[i].FullOffloaded, "Yes", "No")),
20792079
}
20802080
}

file_architecture.go

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,15 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
414414

415415
// Conditioner
416416

417-
openAiClipVitL14Key = "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight" // OpenAI CLIP ViT-L/14
418-
openClipVitH14Key = "cond_stage_model.transformer.text_model.encoder.layers.22.self_attn.k_proj.weight" // OpenCLIP ViT-H/14
419-
openClipVitG14Key = "cond_stage_model.1.transformer.text_model.encoder.layers.31.self_attn.k_proj.weight" // OpenCLIP ViT-G/14
420-
t5xxlKey = "cond_stage_model.1.transformer.encoder.block.23.layer.0.SelfAttention.k.weight" // Google T5-xxl
421-
t5xxlKey2 = "cond_stage_model.2.transformer.encoder.block.23.layer.0.SelfAttention.k.weight"
417+
openAiClipVitL14Key = "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight" // OpenAI CLIP ViT-L/14
418+
openAiClipVitL14Key2 = "text_model.encoder.layers.11.self_attn.k_proj.weight"
419+
openClipVitH14Key = "cond_stage_model.transformer.text_model.encoder.layers.22.self_attn.k_proj.weight" // OpenCLIP ViT-H/14
420+
openClipVitH14Key2 = "text_model.encoder.layers.22.self_attn.k_proj.weight"
421+
openClipVitG14Key = "cond_stage_model.1.transformer.text_model.encoder.layers.31.self_attn.k_proj.weight" // OpenCLIP ViT-G/14
422+
openClipVitG14Key2 = "text_model.encoder.layers.31.self_attn.k_proj.weight"
423+
t5xxlKey = "cond_stage_model.1.transformer.encoder.block.23.layer.0.SelfAttention.k.weight" // Google T5-xxl
424+
t5xxlKey2 = "cond_stage_model.2.transformer.encoder.block.23.layer.0.SelfAttention.k.weight"
425+
t5xxlKey3 = "encoder.block.23.layer.0.SelfAttention.k.weight"
422426
)
423427

424428
tis, _ := gf.TensorInfos.Index([]string{
@@ -439,10 +443,14 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
439443
fluxFillFeatureKey2,
440444

441445
openAiClipVitL14Key,
446+
openAiClipVitL14Key2,
442447
openClipVitH14Key,
448+
openClipVitH14Key2,
443449
openClipVitG14Key,
450+
openClipVitG14Key2,
444451
t5xxlKey,
445452
t5xxlKey2,
453+
t5xxlKey3,
446454
})
447455

448456
ga.Type = "model"
@@ -513,12 +521,29 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
513521
}
514522
}
515523
ga.DiffusionConditioners = append(ga.DiffusionConditioners, cond)
524+
} else if ti, ok := tis[openAiClipVitL14Key2]; ok {
525+
cond := GGUFArchitectureDiffusionConditioner{
526+
Architecture: "OpenAI CLIP ViT-L/14",
527+
FileType: ti.GetFileType(),
528+
}
529+
if ti, ok = tis[openClipVitH14Key2]; ok {
530+
cond = GGUFArchitectureDiffusionConditioner{
531+
Architecture: "OpenCLIP ViT-H/14",
532+
FileType: ti.GetFileType(),
533+
}
534+
}
535+
ga.DiffusionConditioners = append(ga.DiffusionConditioners, cond)
516536
}
517537
if ti, ok := tis[openClipVitG14Key]; ok {
518538
ga.DiffusionConditioners = append(ga.DiffusionConditioners, GGUFArchitectureDiffusionConditioner{
519539
Architecture: "OpenCLIP ViT-G/14",
520540
FileType: ti.GetFileType(),
521541
})
542+
} else if ti, ok = tis[openClipVitG14Key2]; ok {
543+
ga.DiffusionConditioners = append(ga.DiffusionConditioners, GGUFArchitectureDiffusionConditioner{
544+
Architecture: "OpenCLIP ViT-G/14",
545+
FileType: ti.GetFileType(),
546+
})
522547
}
523548
if ti, ok := tis[t5xxlKey]; ok {
524549
ga.DiffusionConditioners = append(ga.DiffusionConditioners, GGUFArchitectureDiffusionConditioner{
@@ -530,12 +555,23 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
530555
Architecture: "Google T5-xxl",
531556
FileType: ti.GetFileType(),
532557
})
558+
} else if ti, ok = tis[t5xxlKey3]; ok {
559+
ga.DiffusionConditioners = append(ga.DiffusionConditioners, GGUFArchitectureDiffusionConditioner{
560+
Architecture: "Google T5-xxl",
561+
FileType: ti.GetFileType(),
562+
})
533563
}
534564

535-
if tis := gf.TensorInfos.Search(regexp.MustCompile(`^first_stage_model\..*`)); len(tis) != 0 {
536-
ga.DiffusionAutoencoder = &GGUFArchitectureDiffusionAutoencoder{
537-
Architecture: ga.DiffusionArchitecture + " VAE",
538-
FileType: GGUFTensorInfos(tis).GetFileType(),
565+
for _, re := range []*regexp.Regexp{
566+
regexp.MustCompile(`^first_stage_model\..*`),
567+
regexp.MustCompile(`^decoder\.conv_in\..*`),
568+
} {
569+
if tis := gf.TensorInfos.Search(re); len(tis) != 0 {
570+
ga.DiffusionAutoencoder = &GGUFArchitectureDiffusionAutoencoder{
571+
Architecture: ga.DiffusionArchitecture + " VAE",
572+
FileType: GGUFTensorInfos(tis).GetFileType(),
573+
}
574+
break
539575
}
540576
}
541577

file_estimate__llamacpp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ func (gf *GGUFFile) estimateLLaMACppRunInModel(o *_GGUFRunEstimateOptions, a *GG
641641
// Computation.
642642
{
643643
// See https://github.com/ggml-org/llama.cpp/blob/ec9e0301fef6476df83e94842c3b625501c95566/src/llama-context.cpp#L1241-L1243.
644-
maxNodes := max(65536, uint64(5*len(gf.TensorInfos)))
644+
maxNodes := max(1024, uint64(8*len(gf.TensorInfos)))
645645

646646
// Bootstrap, compute metadata.
647647
cm := GGMLTensorOverhead()*maxNodes + GGMLComputationGraphOverhead(maxNodes, false)

file_metadata.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ var _GGUFPotentialDiffusionArchitectureTensorsRegexes = []*regexp.Regexp{
146146
regexp.MustCompile(`^model\.diffusion_model\..*`),
147147
regexp.MustCompile(`^double_blocks\..*`),
148148
regexp.MustCompile(`^joint_blocks\..*`),
149+
regexp.MustCompile(`^decoder\..*`),
150+
regexp.MustCompile(`^encoder\..*`),
151+
regexp.MustCompile(`^text_model\..*`),
149152
}
150153

151154
// Metadata returns the metadata of the GGUF file.

0 commit comments

Comments
 (0)