@@ -773,13 +773,15 @@ struct whisper_state {
773
773
int64_t t_sample_us = 0 ;
774
774
int64_t t_encode_us = 0 ;
775
775
int64_t t_decode_us = 0 ;
776
+ int64_t t_batchd_us = 0 ;
776
777
int64_t t_prompt_us = 0 ;
777
778
int64_t t_mel_us = 0 ;
778
779
779
780
int32_t n_sample = 0 ; // number of tokens sampled
780
781
int32_t n_encode = 0 ; // number of encoder calls
781
- int32_t n_decode = 0 ; // number of decoder calls with n_tokens == 1 (text-generation)
782
- int32_t n_prompt = 0 ; // number of decoder calls with n_tokens > 1 (prompt encoding)
782
+ int32_t n_decode = 0 ; // number of decoder calls with n_tokens == 1 (text-generation)
783
+ int32_t n_batchd = 0 ; // number of decoder calls with n_tokens < 16 (batch decoding)
784
+ int32_t n_prompt = 0 ; // number of decoder calls with n_tokens > 1 (prompt encoding)
783
785
int32_t n_fail_p = 0 ; // number of logprob threshold failures
784
786
int32_t n_fail_h = 0 ; // number of entropy threshold failures
785
787
@@ -2616,9 +2618,12 @@ static bool whisper_decode_internal(
2616
2618
if (batch.n_tokens == 1 ) {
2617
2619
wstate.t_decode_us += ggml_time_us () - t_start_us;
2618
2620
wstate.n_decode ++;
2621
+ } else if (batch.n_tokens < 16 ) {
2622
+ wstate.t_batchd_us += ggml_time_us () - t_start_us;
2623
+ wstate.n_batchd += n_tokens;
2619
2624
} else {
2620
2625
wstate.t_prompt_us += ggml_time_us () - t_start_us;
2621
- wstate.n_prompt ++ ;
2626
+ wstate.n_prompt += n_tokens ;
2622
2627
}
2623
2628
2624
2629
return !(abort_callback && abort_callback (abort_callback_data));
@@ -3827,13 +3832,15 @@ void whisper_print_timings(struct whisper_context * ctx) {
3827
3832
const int32_t n_sample = std::max (1 , ctx->state ->n_sample );
3828
3833
const int32_t n_encode = std::max (1 , ctx->state ->n_encode );
3829
3834
const int32_t n_decode = std::max (1 , ctx->state ->n_decode );
3835
+ const int32_t n_batchd = std::max (1 , ctx->state ->n_batchd );
3830
3836
const int32_t n_prompt = std::max (1 , ctx->state ->n_prompt );
3831
3837
3832
3838
WHISPER_LOG_INFO (" %s: fallbacks = %3d p / %3d h\n " , __func__, ctx->state ->n_fail_p , ctx->state ->n_fail_h );
3833
3839
WHISPER_LOG_INFO (" %s: mel time = %8.2f ms\n " , __func__, ctx->state ->t_mel_us / 1000 .0f );
3834
3840
WHISPER_LOG_INFO (" %s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n " , __func__, 1e-3f * ctx->state ->t_sample_us , n_sample, 1e-3f * ctx->state ->t_sample_us / n_sample);
3835
3841
WHISPER_LOG_INFO (" %s: encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n " , __func__, 1e-3f * ctx->state ->t_encode_us , n_encode, 1e-3f * ctx->state ->t_encode_us / n_encode);
3836
3842
WHISPER_LOG_INFO (" %s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n " , __func__, 1e-3f * ctx->state ->t_decode_us , n_decode, 1e-3f * ctx->state ->t_decode_us / n_decode);
3843
+ WHISPER_LOG_INFO (" %s: batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n " , __func__, 1e-3f * ctx->state ->t_batchd_us , n_batchd, 1e-3f * ctx->state ->t_batchd_us / n_batchd);
3837
3844
WHISPER_LOG_INFO (" %s: prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n " , __func__, 1e-3f * ctx->state ->t_prompt_us , n_prompt, 1e-3f * ctx->state ->t_prompt_us / n_prompt);
3838
3845
}
3839
3846
WHISPER_LOG_INFO (" %s: total time = %8.2f ms\n " , __func__, (t_end_us - ctx->t_start_us )/1000 .0f );
@@ -3850,6 +3857,7 @@ void whisper_reset_timings(struct whisper_context * ctx) {
3850
3857
ctx->state ->n_sample = 0 ;
3851
3858
ctx->state ->n_encode = 0 ;
3852
3859
ctx->state ->n_decode = 0 ;
3860
+ ctx->state ->n_batchd = 0 ;
3853
3861
ctx->state ->n_prompt = 0 ;
3854
3862
}
3855
3863
}
@@ -5896,11 +5904,13 @@ int whisper_full_parallel(
5896
5904
ctx->state ->t_sample_us += states[i]->t_sample_us ;
5897
5905
ctx->state ->t_encode_us += states[i]->t_encode_us ;
5898
5906
ctx->state ->t_decode_us += states[i]->t_decode_us ;
5907
+ ctx->state ->t_batchd_us += states[i]->t_batchd_us ;
5899
5908
ctx->state ->t_prompt_us += states[i]->t_prompt_us ;
5900
5909
5901
5910
ctx->state ->n_sample += states[i]->n_sample ;
5902
5911
ctx->state ->n_encode += states[i]->n_encode ;
5903
5912
ctx->state ->n_decode += states[i]->n_decode ;
5913
+ ctx->state ->n_batchd += states[i]->n_batchd ;
5904
5914
ctx->state ->n_prompt += states[i]->n_prompt ;
5905
5915
5906
5916
whisper_free_state (states[i]);
0 commit comments