@@ -414,11 +414,15 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
414
414
415
415
// Conditioner
416
416
417
- openAiClipVitL14Key = "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight" // OpenAI CLIP ViT-L/14
418
- openClipVitH14Key = "cond_stage_model.transformer.text_model.encoder.layers.22.self_attn.k_proj.weight" // OpenCLIP ViT-H/14
419
- openClipVitG14Key = "cond_stage_model.1.transformer.text_model.encoder.layers.31.self_attn.k_proj.weight" // OpenCLIP ViT-G/14
420
- t5xxlKey = "cond_stage_model.1.transformer.encoder.block.23.layer.0.SelfAttention.k.weight" // Google T5-xxl
421
- t5xxlKey2 = "cond_stage_model.2.transformer.encoder.block.23.layer.0.SelfAttention.k.weight"
417
+ openAiClipVitL14Key = "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight" // OpenAI CLIP ViT-L/14
418
+ openAiClipVitL14Key2 = "text_model.encoder.layers.11.self_attn.k_proj.weight"
419
+ openClipVitH14Key = "cond_stage_model.transformer.text_model.encoder.layers.22.self_attn.k_proj.weight" // OpenCLIP ViT-H/14
420
+ openClipVitH14Key2 = "text_model.encoder.layers.22.self_attn.k_proj.weight"
421
+ openClipVitG14Key = "cond_stage_model.1.transformer.text_model.encoder.layers.31.self_attn.k_proj.weight" // OpenCLIP ViT-G/14
422
+ openClipVitG14Key2 = "text_model.encoder.layers.31.self_attn.k_proj.weight"
423
+ t5xxlKey = "cond_stage_model.1.transformer.encoder.block.23.layer.0.SelfAttention.k.weight" // Google T5-xxl
424
+ t5xxlKey2 = "cond_stage_model.2.transformer.encoder.block.23.layer.0.SelfAttention.k.weight"
425
+ t5xxlKey3 = "encoder.block.23.layer.0.SelfAttention.k.weight"
422
426
)
423
427
424
428
tis , _ := gf .TensorInfos .Index ([]string {
@@ -439,10 +443,14 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
439
443
fluxFillFeatureKey2 ,
440
444
441
445
openAiClipVitL14Key ,
446
+ openAiClipVitL14Key2 ,
442
447
openClipVitH14Key ,
448
+ openClipVitH14Key2 ,
443
449
openClipVitG14Key ,
450
+ openClipVitG14Key2 ,
444
451
t5xxlKey ,
445
452
t5xxlKey2 ,
453
+ t5xxlKey3 ,
446
454
})
447
455
448
456
ga .Type = "model"
@@ -513,12 +521,29 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
513
521
}
514
522
}
515
523
ga .DiffusionConditioners = append (ga .DiffusionConditioners , cond )
524
+ } else if ti , ok := tis [openAiClipVitL14Key2 ]; ok {
525
+ cond := GGUFArchitectureDiffusionConditioner {
526
+ Architecture : "OpenAI CLIP ViT-L/14" ,
527
+ FileType : ti .GetFileType (),
528
+ }
529
+ if ti , ok = tis [openClipVitH14Key2 ]; ok {
530
+ cond = GGUFArchitectureDiffusionConditioner {
531
+ Architecture : "OpenCLIP ViT-H/14" ,
532
+ FileType : ti .GetFileType (),
533
+ }
534
+ }
535
+ ga .DiffusionConditioners = append (ga .DiffusionConditioners , cond )
516
536
}
517
537
if ti , ok := tis [openClipVitG14Key ]; ok {
518
538
ga .DiffusionConditioners = append (ga .DiffusionConditioners , GGUFArchitectureDiffusionConditioner {
519
539
Architecture : "OpenCLIP ViT-G/14" ,
520
540
FileType : ti .GetFileType (),
521
541
})
542
+ } else if ti , ok = tis [openClipVitG14Key2 ]; ok {
543
+ ga .DiffusionConditioners = append (ga .DiffusionConditioners , GGUFArchitectureDiffusionConditioner {
544
+ Architecture : "OpenCLIP ViT-G/14" ,
545
+ FileType : ti .GetFileType (),
546
+ })
522
547
}
523
548
if ti , ok := tis [t5xxlKey ]; ok {
524
549
ga .DiffusionConditioners = append (ga .DiffusionConditioners , GGUFArchitectureDiffusionConditioner {
@@ -530,12 +555,23 @@ func (gf *GGUFFile) diffuserArchitecture() (ga GGUFArchitecture) {
530
555
Architecture : "Google T5-xxl" ,
531
556
FileType : ti .GetFileType (),
532
557
})
558
+ } else if ti , ok = tis [t5xxlKey3 ]; ok {
559
+ ga .DiffusionConditioners = append (ga .DiffusionConditioners , GGUFArchitectureDiffusionConditioner {
560
+ Architecture : "Google T5-xxl" ,
561
+ FileType : ti .GetFileType (),
562
+ })
533
563
}
534
564
535
- if tis := gf .TensorInfos .Search (regexp .MustCompile (`^first_stage_model\..*` )); len (tis ) != 0 {
536
- ga .DiffusionAutoencoder = & GGUFArchitectureDiffusionAutoencoder {
537
- Architecture : ga .DiffusionArchitecture + " VAE" ,
538
- FileType : GGUFTensorInfos (tis ).GetFileType (),
565
+ for _ , re := range []* regexp.Regexp {
566
+ regexp .MustCompile (`^first_stage_model\..*` ),
567
+ regexp .MustCompile (`^decoder\.conv_in\..*` ),
568
+ } {
569
+ if tis := gf .TensorInfos .Search (re ); len (tis ) != 0 {
570
+ ga .DiffusionAutoencoder = & GGUFArchitectureDiffusionAutoencoder {
571
+ Architecture : ga .DiffusionArchitecture + " VAE" ,
572
+ FileType : GGUFTensorInfos (tis ).GetFileType (),
573
+ }
574
+ break
539
575
}
540
576
}
541
577
0 commit comments