9
9
#include < cstdio>
10
10
#include < string>
11
11
#include < thread>
12
+ #include < utility>
12
13
#include < vector>
13
14
#include < cstring>
14
15
@@ -379,15 +380,7 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct
379
380
}
380
381
}
381
382
382
- static bool output_txt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
383
- std::ofstream fout (fname);
384
- if (!fout.is_open ()) {
385
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
386
- return false ;
387
- }
388
-
389
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
390
-
383
+ static void output_txt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
391
384
const int n_segments = whisper_full_n_segments (ctx);
392
385
for (int i = 0 ; i < n_segments; ++i) {
393
386
const char * text = whisper_full_get_segment_text (ctx, i);
@@ -402,19 +395,9 @@ static bool output_txt(struct whisper_context * ctx, const char * fname, const w
402
395
403
396
fout << speaker << text << " \n " ;
404
397
}
405
-
406
- return true ;
407
398
}
408
399
409
- static bool output_vtt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
410
- std::ofstream fout (fname);
411
- if (!fout.is_open ()) {
412
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
413
- return false ;
414
- }
415
-
416
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
417
-
400
+ static void output_vtt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
418
401
fout << " WEBVTT\n\n " ;
419
402
420
403
const int n_segments = whisper_full_n_segments (ctx);
@@ -434,19 +417,9 @@ static bool output_vtt(struct whisper_context * ctx, const char * fname, const w
434
417
fout << to_timestamp (t0) << " --> " << to_timestamp (t1) << " \n " ;
435
418
fout << speaker << text << " \n\n " ;
436
419
}
437
-
438
- return true ;
439
420
}
440
421
441
- static bool output_srt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
442
- std::ofstream fout (fname);
443
- if (!fout.is_open ()) {
444
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
445
- return false ;
446
- }
447
-
448
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
449
-
422
+ static void output_srt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
450
423
const int n_segments = whisper_full_n_segments (ctx);
451
424
for (int i = 0 ; i < n_segments; ++i) {
452
425
const char * text = whisper_full_get_segment_text (ctx, i);
@@ -463,8 +436,6 @@ static bool output_srt(struct whisper_context * ctx, const char * fname, const w
463
436
fout << to_timestamp (t0, true ) << " --> " << to_timestamp (t1, true ) << " \n " ;
464
437
fout << speaker << text << " \n\n " ;
465
438
}
466
-
467
- return true ;
468
439
}
469
440
470
441
static char * escape_double_quotes_and_backslashes (const char * str) {
@@ -530,15 +501,7 @@ static char * escape_double_quotes_in_csv(const char * str) {
530
501
return escaped;
531
502
}
532
503
533
- static bool output_csv (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
534
- std::ofstream fout (fname);
535
- if (!fout.is_open ()) {
536
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
537
- return false ;
538
- }
539
-
540
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
541
-
504
+ static void output_csv (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
542
505
const int n_segments = whisper_full_n_segments (ctx);
543
506
fout << " start,end," ;
544
507
if (params.diarize && pcmf32s.size () == 2 )
@@ -561,14 +524,9 @@ static bool output_csv(struct whisper_context * ctx, const char * fname, const w
561
524
}
562
525
fout << " \" " << text_escaped << " \"\n " ;
563
526
}
564
-
565
- return true ;
566
527
}
567
528
568
- static bool output_score (struct whisper_context * ctx, const char * fname, const whisper_params & /* params*/ , std::vector<std::vector<float >> /* pcmf32s*/ ) {
569
- std::ofstream fout (fname);
570
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
571
-
529
+ static void output_score (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & /* params*/ , std::vector<std::vector<float >> /* pcmf32s*/ ) {
572
530
const int n_segments = whisper_full_n_segments (ctx);
573
531
// fprintf(stderr,"segments: %d\n",n_segments);
574
532
for (int i = 0 ; i < n_segments; ++i) {
@@ -581,16 +539,14 @@ static bool output_score(struct whisper_context * ctx, const char * fname, const
581
539
// fprintf(stderr,"token: %s %f\n",token,probability);
582
540
}
583
541
}
584
- return true ;
585
542
}
586
543
587
- static bool output_json (
544
+ static void output_json (
588
545
struct whisper_context * ctx,
589
- const char * fname ,
546
+ std::ofstream & fout ,
590
547
const whisper_params & params,
591
- std::vector<std::vector<float >> pcmf32s,
592
- bool full) {
593
- std::ofstream fout (fname);
548
+ std::vector<std::vector<float >> pcmf32s) {
549
+ const bool full = params.output_jsn_full ;
594
550
int indent = 0 ;
595
551
596
552
auto doindent = [&]() {
@@ -670,12 +626,6 @@ static bool output_json(
670
626
end_obj (end);
671
627
};
672
628
673
- if (!fout.is_open ()) {
674
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
675
- return false ;
676
- }
677
-
678
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
679
629
start_obj (nullptr );
680
630
value_s (" systeminfo" , whisper_print_system_info (), false );
681
631
start_obj (" model" );
@@ -749,17 +699,12 @@ static bool output_json(
749
699
750
700
end_arr (true );
751
701
end_obj (true );
752
- return true ;
753
702
}
754
703
755
704
// karaoke video generation
756
705
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
757
706
// TODO: font parameter adjustments
758
- static bool output_wts (struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec, std::vector<std::vector<float >> pcmf32s) {
759
- std::ofstream fout (fname);
760
-
761
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
762
-
707
+ static bool output_wts (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s, const char * fname_inp, float t_sec, const char * fname_out) {
763
708
static const char * font = params.font_path .c_str ();
764
709
765
710
std::ifstream fin (font);
@@ -875,20 +820,12 @@ static bool output_wts(struct whisper_context * ctx, const char * fname, const c
875
820
876
821
fout.close ();
877
822
878
- fprintf (stderr, " %s: run 'source %s' to generate karaoke video\n " , __func__, fname );
823
+ fprintf (stderr, " # %s: run 'source %s' to generate karaoke video\n " , __func__, fname_out );
879
824
880
825
return true ;
881
826
}
882
827
883
- static bool output_lrc (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
884
- std::ofstream fout (fname);
885
- if (!fout.is_open ()) {
886
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
887
- return false ;
888
- }
889
-
890
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
891
-
828
+ static void output_lrc (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
892
829
fout << " [by:whisper.cpp]\n " ;
893
830
894
831
const int n_segments = whisper_full_n_segments (ctx);
@@ -916,8 +853,6 @@ static bool output_lrc(struct whisper_context * ctx, const char * fname, const w
916
853
917
854
fout << ' [' << timestamp_lrc << ' ]' << speaker << text << " \n " ;
918
855
}
919
-
920
- return true ;
921
856
}
922
857
923
858
@@ -1066,8 +1001,53 @@ int main(int argc, char ** argv) {
1066
1001
}
1067
1002
1068
1003
for (int f = 0 ; f < (int ) params.fname_inp .size (); ++f) {
1069
- const auto fname_inp = params.fname_inp [f];
1070
- const auto fname_out = f < (int ) params.fname_out .size () && !params.fname_out [f].empty () ? params.fname_out [f] : params.fname_inp [f];
1004
+ const auto & fname_inp = params.fname_inp [f];
1005
+ struct fout_factory {
1006
+ std::string fname_out;
1007
+ const size_t basename_length;
1008
+ const bool is_stdout;
1009
+ bool used_stdout;
1010
+ decltype (whisper_print_segment_callback) * const print_segment_callback;
1011
+ std::ofstream fout;
1012
+
1013
+ fout_factory (const std::string & fname_out_, const std::string & fname_inp, whisper_params & params) :
1014
+ fname_out{!fname_out_.empty () ? fname_out_ : fname_inp},
1015
+ basename_length{fname_out.size ()},
1016
+ is_stdout{fname_out == " -" },
1017
+ used_stdout{},
1018
+ print_segment_callback{is_stdout ? nullptr : whisper_print_segment_callback} {
1019
+ if (!print_segment_callback) {
1020
+ params.no_timestamps = true ;
1021
+ params.print_progress = false ;
1022
+ }
1023
+ }
1024
+
1025
+ bool open (const char * ext, const char * function) {
1026
+ if (is_stdout) {
1027
+ if (std::exchange (used_stdout, true )) {
1028
+ fprintf (stderr, " warning: Not appending multiple file formats to stdout\n " );
1029
+ return false ;
1030
+ }
1031
+ #ifdef _WIN32
1032
+ fout = std::ofstream{" CON" };
1033
+ #else
1034
+ fout = std::ofstream{" /dev/stdout" };
1035
+ #endif
1036
+ // Not using fprintf stderr here because it might equal stdout
1037
+ // Also assuming /dev is mounted
1038
+ return true ;
1039
+ }
1040
+ fname_out.resize (basename_length);
1041
+ fname_out += ext;
1042
+ fout = std::ofstream{fname_out};
1043
+ if (!fout.is_open ()) {
1044
+ fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname_out.c_str ());
1045
+ return false ;
1046
+ }
1047
+ fprintf (stderr, " %s: saving output to '%s'\n " , function, fname_out.c_str ());
1048
+ return true ;
1049
+ }
1050
+ } fout_factory{f < (int ) params.fname_out .size () ? params.fname_out [f] : " " , fname_inp, params};
1071
1051
1072
1052
std::vector<float > pcmf32; // mono-channel F32 PCM
1073
1053
std::vector<std::vector<float >> pcmf32s; // stereo-channel F32 PCM
@@ -1172,7 +1152,7 @@ int main(int argc, char ** argv) {
1172
1152
1173
1153
// this callback is called on each new segment
1174
1154
if (!wparams.print_realtime ) {
1175
- wparams.new_segment_callback = whisper_print_segment_callback ;
1155
+ wparams.new_segment_callback = fout_factory. print_segment_callback ;
1176
1156
wparams.new_segment_callback_user_data = &user_data;
1177
1157
}
1178
1158
@@ -1214,54 +1194,26 @@ int main(int argc, char ** argv) {
1214
1194
1215
1195
// output stuff
1216
1196
{
1217
- printf (" \n " );
1218
-
1219
- // output to text file
1220
- if (params.output_txt ) {
1221
- const auto fname_txt = fname_out + " .txt" ;
1222
- output_txt (ctx, fname_txt.c_str (), params, pcmf32s);
1223
- }
1224
-
1225
- // output to VTT file
1226
- if (params.output_vtt ) {
1227
- const auto fname_vtt = fname_out + " .vtt" ;
1228
- output_vtt (ctx, fname_vtt.c_str (), params, pcmf32s);
1229
- }
1230
-
1231
- // output to SRT file
1232
- if (params.output_srt ) {
1233
- const auto fname_srt = fname_out + " .srt" ;
1234
- output_srt (ctx, fname_srt.c_str (), params, pcmf32s);
1235
- }
1236
-
1237
- // output to WTS file
1238
- if (params.output_wts ) {
1239
- const auto fname_wts = fname_out + " .wts" ;
1240
- output_wts (ctx, fname_wts.c_str (), fname_inp.c_str (), params, float (pcmf32.size () + 1000 )/WHISPER_SAMPLE_RATE, pcmf32s);
1241
- }
1242
-
1243
- // output to CSV file
1244
- if (params.output_csv ) {
1245
- const auto fname_csv = fname_out + " .csv" ;
1246
- output_csv (ctx, fname_csv.c_str (), params, pcmf32s);
1247
- }
1248
-
1249
- // output to JSON file
1250
- if (params.output_jsn ) {
1251
- const auto fname_jsn = fname_out + " .json" ;
1252
- output_json (ctx, fname_jsn.c_str (), params, pcmf32s, params.output_jsn_full );
1253
- }
1254
-
1255
- // output to LRC file
1256
- if (params.output_lrc ) {
1257
- const auto fname_lrc = fname_out + " .lrc" ;
1258
- output_lrc (ctx, fname_lrc.c_str (), params, pcmf32s);
1259
- }
1260
-
1261
- // output to score file
1262
- if (params.log_score ) {
1263
- const auto fname_score = fname_out + " .score.txt" ;
1264
- output_score (ctx, fname_score.c_str (), params, pcmf32s);
1197
+ // macros to stringify function name
1198
+ #define output_func (func, ext, param, ...) if (param && fout_factory.open(ext, #func)) {\
1199
+ func (ctx, fout_factory.fout , params, __VA_ARGS__); \
1200
+ }
1201
+ #define output_ext (ext, ...) output_func(output_##ext, " ." #ext, params.output_##ext, __VA_ARGS__)
1202
+
1203
+ output_ext (txt, pcmf32s);
1204
+ output_ext (vtt, pcmf32s);
1205
+ output_ext (srt, pcmf32s);
1206
+ output_ext (wts, pcmf32s, fname_inp.c_str (), float (pcmf32.size () + 1000 )/WHISPER_SAMPLE_RATE, fout_factory.fname_out .c_str ());
1207
+ output_ext (csv, pcmf32s);
1208
+ output_func (output_json, " .json" , params.output_jsn , pcmf32s);
1209
+ output_ext (lrc, pcmf32s);
1210
+ output_func (output_score, " .score.txt" , params.log_score , pcmf32s);
1211
+
1212
+ #undef output_ext
1213
+ #undef output_func
1214
+
1215
+ if (fout_factory.is_stdout && !fout_factory.used_stdout ) {
1216
+ fprintf (stderr, " warning: '--output-file -' used without any other '--output-*'" );
1265
1217
}
1266
1218
}
1267
1219
}
0 commit comments