@@ -54,15 +54,13 @@ const char* modes_str[] = {
54
54
" txt2img" ,
55
55
" img2img" ,
56
56
" img2vid" ,
57
- " edit" ,
58
57
" convert" ,
59
58
};
60
59
61
60
enum SDMode {
62
61
TXT2IMG,
63
62
IMG2IMG,
64
63
IMG2VID,
65
- EDIT,
66
64
CONVERT,
67
65
MODE_COUNT
68
66
};
@@ -88,7 +86,8 @@ struct SDParams {
88
86
std::string input_path;
89
87
std::string mask_path;
90
88
std::string control_image_path;
91
- std::vector<std::string> ref_image_paths;
89
+
90
+ std::vector<std::string> kontext_image_paths;
92
91
93
92
std::string prompt;
94
93
std::string negative_prompt;
@@ -154,10 +153,6 @@ void print_params(SDParams params) {
154
153
printf (" init_img: %s\n " , params.input_path .c_str ());
155
154
printf (" mask_img: %s\n " , params.mask_path .c_str ());
156
155
printf (" control_image: %s\n " , params.control_image_path .c_str ());
157
- printf (" ref_images_paths:\n " );
158
- for (auto & path : params.ref_image_paths ) {
159
- printf (" %s\n " , path.c_str ());
160
- };
161
156
printf (" clip on cpu: %s\n " , params.clip_on_cpu ? " true" : " false" );
162
157
printf (" controlnet cpu: %s\n " , params.control_net_cpu ? " true" : " false" );
163
158
printf (" vae decoder on cpu:%s\n " , params.vae_on_cpu ? " true" : " false" );
@@ -212,7 +207,6 @@ void print_usage(int argc, const char* argv[]) {
212
207
printf (" -i, --init-img [IMAGE] path to the input image, required by img2img\n " );
213
208
printf (" --mask [MASK] path to the mask image, required by img2img with mask\n " );
214
209
printf (" --control-image [IMAGE] path to image condition, control net\n " );
215
- printf (" -r, --ref_image [PATH] reference image for Flux Kontext models (can be used multiple times) \n " );
216
210
printf (" -o, --output OUTPUT path to write result image to (default: ./output.png)\n " );
217
211
printf (" -p, --prompt [PROMPT] the prompt to render\n " );
218
212
printf (" -n, --negative-prompt PROMPT the negative prompt (default: \"\" )\n " );
@@ -248,8 +242,9 @@ void print_usage(int argc, const char* argv[]) {
248
242
printf (" This might crash if it is not supported by the backend.\n " );
249
243
printf (" --control-net-cpu keep controlnet in cpu (for low vram)\n " );
250
244
printf (" --canny apply canny preprocessor (edge detection)\n " );
251
- printf (" --color colors the logging tags according to level\n " );
245
+ printf (" --color Colors the logging tags according to level\n " );
252
246
printf (" -v, --verbose print extra info\n " );
247
+ printf (" -ki, --kontext_img [PATH] Reference image for Flux Kontext models (can be used multiple times) \n " );
253
248
}
254
249
255
250
void parse_args (int argc, const char ** argv, SDParams& params) {
@@ -634,12 +629,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
634
629
break ;
635
630
}
636
631
params.skip_layer_end = std::stof (argv[i]);
637
- } else if (arg == " -r " || arg == " --ref-image " ) {
632
+ } else if (arg == " -ki " || arg == " --kontext-img " ) {
638
633
if (++i >= argc) {
639
634
invalid_arg = true ;
640
635
break ;
641
636
}
642
- params.ref_image_paths .push_back (argv[i]);
637
+ params.kontext_image_paths .push_back (argv[i]);
643
638
} else {
644
639
fprintf (stderr, " error: unknown argument: %s\n " , arg.c_str ());
645
640
print_usage (argc, argv);
@@ -668,13 +663,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
668
663
}
669
664
670
665
if ((params.mode == IMG2IMG || params.mode == IMG2VID) && params.input_path .length () == 0 ) {
671
- fprintf (stderr, " error: when using the img2img/img2vid mode, the following arguments are required: init-img\n " );
672
- print_usage (argc, argv);
673
- exit (1 );
674
- }
675
-
676
- if (params.mode == EDIT && params.ref_image_paths .size () == 0 ) {
677
- fprintf (stderr, " error: when using the edit mode, the following arguments are required: ref-image\n " );
666
+ fprintf (stderr, " error: when using the img2img mode, the following arguments are required: init-img\n " );
678
667
print_usage (argc, argv);
679
668
exit (1 );
680
669
}
@@ -838,12 +827,43 @@ int main(int argc, const char* argv[]) {
838
827
fprintf (stderr, " SVD support is broken, do not use it!!!\n " );
839
828
return 1 ;
840
829
}
830
+ bool vae_decode_only = true ;
831
+
832
+ std::vector<sd_image_t > kontext_imgs;
833
+ for (auto & path : params.kontext_image_paths ) {
834
+ vae_decode_only = false ;
835
+ int c = 0 ;
836
+ int width = 0 ;
837
+ int height = 0 ;
838
+ uint8_t * image_buffer = stbi_load (path.c_str (), &width, &height, &c, 3 );
839
+ if (image_buffer == NULL ) {
840
+ fprintf (stderr, " load image from '%s' failed\n " , path.c_str ());
841
+ return 1 ;
842
+ }
843
+ if (c < 3 ) {
844
+ fprintf (stderr, " the number of channels for the input image must be >= 3, but got %d channels\n " , c);
845
+ free (image_buffer);
846
+ return 1 ;
847
+ }
848
+ if (width <= 0 ) {
849
+ fprintf (stderr, " error: the width of image must be greater than 0\n " );
850
+ free (image_buffer);
851
+ return 1 ;
852
+ }
853
+ if (height <= 0 ) {
854
+ fprintf (stderr, " error: the height of image must be greater than 0\n " );
855
+ free (image_buffer);
856
+ return 1 ;
857
+ }
858
+ kontext_imgs.push_back ({(uint32_t )width,
859
+ (uint32_t )height,
860
+ 3 ,
861
+ image_buffer});
862
+ }
841
863
842
- bool vae_decode_only = true ;
843
864
uint8_t * input_image_buffer = NULL ;
844
865
uint8_t * control_image_buffer = NULL ;
845
866
uint8_t * mask_image_buffer = NULL ;
846
- std::vector<sd_image_t > ref_images;
847
867
848
868
if (params.mode == IMG2IMG || params.mode == IMG2VID) {
849
869
vae_decode_only = false ;
@@ -895,37 +915,6 @@ int main(int argc, const char* argv[]) {
895
915
free (input_image_buffer);
896
916
input_image_buffer = resized_image_buffer;
897
917
}
898
- } else if (params.mode == EDIT) {
899
- vae_decode_only = false ;
900
- for (auto & path : params.ref_image_paths ) {
901
- int c = 0 ;
902
- int width = 0 ;
903
- int height = 0 ;
904
- uint8_t * image_buffer = stbi_load (path.c_str (), &width, &height, &c, 3 );
905
- if (image_buffer == NULL ) {
906
- fprintf (stderr, " load image from '%s' failed\n " , path.c_str ());
907
- return 1 ;
908
- }
909
- if (c < 3 ) {
910
- fprintf (stderr, " the number of channels for the input image must be >= 3, but got %d channels\n " , c);
911
- free (image_buffer);
912
- return 1 ;
913
- }
914
- if (width <= 0 ) {
915
- fprintf (stderr, " error: the width of image must be greater than 0\n " );
916
- free (image_buffer);
917
- return 1 ;
918
- }
919
- if (height <= 0 ) {
920
- fprintf (stderr, " error: the height of image must be greater than 0\n " );
921
- free (image_buffer);
922
- return 1 ;
923
- }
924
- ref_images.push_back ({(uint32_t )width,
925
- (uint32_t )height,
926
- 3 ,
927
- image_buffer});
928
- }
929
918
}
930
919
931
920
sd_ctx_t * sd_ctx = new_sd_ctx (params.model_path .c_str (),
@@ -1012,12 +1001,14 @@ int main(int argc, const char* argv[]) {
1012
1001
params.style_ratio ,
1013
1002
params.normalize_input ,
1014
1003
params.input_id_images_path .c_str (),
1004
+ kontext_imgs.data (), kontext_imgs.size (),
1015
1005
params.skip_layers .data (),
1016
1006
params.skip_layers .size (),
1017
1007
params.slg_scale ,
1018
1008
params.skip_layer_start ,
1019
- params.skip_layer_end );
1020
- } else if (params.mode == IMG2IMG || params.mode == IMG2VID) {
1009
+ params.skip_layer_end ,
1010
+ std::vector<sd_image_t *>());
1011
+ } else {
1021
1012
sd_image_t input_image = {(uint32_t )params.width ,
1022
1013
(uint32_t )params.height ,
1023
1014
3 ,
@@ -1081,38 +1072,14 @@ int main(int argc, const char* argv[]) {
1081
1072
params.style_ratio ,
1082
1073
params.normalize_input ,
1083
1074
params.input_id_images_path .c_str (),
1075
+ kontext_imgs.data (), kontext_imgs.size (),
1084
1076
params.skip_layers .data (),
1085
1077
params.skip_layers .size (),
1086
1078
params.slg_scale ,
1087
1079
params.skip_layer_start ,
1088
- params.skip_layer_end );
1080
+ params.skip_layer_end ,
1081
+ std::vector<sd_image_t *>());
1089
1082
}
1090
- } else { // EDIT
1091
- results = edit (sd_ctx,
1092
- ref_images.data (),
1093
- ref_images.size (),
1094
- params.prompt .c_str (),
1095
- params.negative_prompt .c_str (),
1096
- params.clip_skip ,
1097
- params.cfg_scale ,
1098
- params.guidance ,
1099
- params.eta ,
1100
- params.width ,
1101
- params.height ,
1102
- params.sample_method ,
1103
- params.sample_steps ,
1104
- params.strength ,
1105
- params.seed ,
1106
- params.batch_count ,
1107
- control_image,
1108
- params.control_strength ,
1109
- params.style_ratio ,
1110
- params.normalize_input ,
1111
- params.skip_layers .data (),
1112
- params.skip_layers .size (),
1113
- params.slg_scale ,
1114
- params.skip_layer_start ,
1115
- params.skip_layer_end );
1116
1083
}
1117
1084
1118
1085
if (results == NULL ) {
@@ -1150,19 +1117,19 @@ int main(int argc, const char* argv[]) {
1150
1117
1151
1118
std::string dummy_name, ext, lc_ext;
1152
1119
bool is_jpg;
1153
- size_t last = params.output_path .find_last_of (" ." );
1120
+ size_t last = params.output_path .find_last_of (" ." );
1154
1121
size_t last_path = std::min (params.output_path .find_last_of (" /" ),
1155
1122
params.output_path .find_last_of (" \\ " ));
1156
- if (last != std::string::npos // filename has extension
1157
- && (last_path == std::string::npos || last > last_path)) {
1123
+ if (last != std::string::npos // filename has extension
1124
+ && (last_path == std::string::npos || last > last_path)) {
1158
1125
dummy_name = params.output_path .substr (0 , last);
1159
1126
ext = lc_ext = params.output_path .substr (last);
1160
1127
std::transform (ext.begin (), ext.end (), lc_ext.begin (), ::tolower);
1161
1128
is_jpg = lc_ext == " .jpg" || lc_ext == " .jpeg" || lc_ext == " .jpe" ;
1162
1129
} else {
1163
1130
dummy_name = params.output_path ;
1164
1131
ext = lc_ext = " " ;
1165
- is_jpg = false ;
1132
+ is_jpg = false ;
1166
1133
}
1167
1134
// appending ".png" to absent or unknown extension
1168
1135
if (!is_jpg && lc_ext != " .png" ) {
@@ -1174,7 +1141,7 @@ int main(int argc, const char* argv[]) {
1174
1141
continue ;
1175
1142
}
1176
1143
std::string final_image_path = i > 0 ? dummy_name + " _" + std::to_string (i + 1 ) + ext : dummy_name + ext;
1177
- if (is_jpg) {
1144
+ if (is_jpg) {
1178
1145
stbi_write_jpg (final_image_path.c_str (), results[i].width , results[i].height , results[i].channel ,
1179
1146
results[i].data , 90 );
1180
1147
printf (" save result JPEG image to '%s'\n " , final_image_path.c_str ());
0 commit comments