@@ -4,6 +4,7 @@ import { createWorker } from 'tesseract.js';
4
4
import { getDocument } from ' pdfjs-dist' ;
5
5
import * as pdfJS from ' pdfjs-dist' ;
6
6
import pdfJSWorkerURL from ' pdfjs-dist/build/pdf.worker?url' ;
7
+ import { textStatistics } from ' ../text-statistics/text-statistics.service' ;
7
8
import TextareaCopyable from ' @/components/TextareaCopyable.vue' ;
8
9
import { useQueryParamOrStorage } from ' @/composable/queryParams' ;
9
10
@@ -115,6 +116,7 @@ const languagesOptions = Array.from(languages.map(l => ({
115
116
116
117
const language = useQueryParamOrStorage ({ name: ' lang' , storageName: ' ocr-image:lang' , defaultValue: ' eng' });
117
118
119
+ const pageSeparator = ' \n =============\n ' ;
118
120
const ocrInProgress = ref (false );
119
121
const fileInput = ref () as Ref <File >;
120
122
const ocrText = computedAsync (async () => {
@@ -125,6 +127,8 @@ const ocrText = computedAsync(async () => {
125
127
return e .toString ();
126
128
}
127
129
});
130
+ const stats = computed (() => textStatistics (ocrText .value ?.replace (new RegExp (pageSeparator , ' g' ), ' ' ) || ' ' ));
131
+ const pageCount = computed (() => ocrText .value ?.split (new RegExp (pageSeparator , ' g' )).length || 0 );
128
132
129
133
async function onUpload(file : File ) {
130
134
if (file ) {
@@ -180,7 +184,7 @@ async function ocr(file: File, language: string) {
180
184
}
181
185
await worker .terminate ();
182
186
ocrInProgress .value = false ;
183
- return allTexts .join (' \n ============= \n ' );
187
+ return allTexts .join (pageSeparator );
184
188
};
185
189
</script >
186
190
@@ -215,6 +219,28 @@ async function ocr(file: File, language: string) {
215
219
size =" small"
216
220
/>
217
221
</div >
222
+
223
+ <c-card v-if =" !ocrInProgress && stats" title =" Statistics" >
224
+ <n-space mt-3 >
225
+ <n-statistic label =" Character count" :value =" stats.chars" />
226
+ <n-statistic label =" Word count" :value =" stats.words" />
227
+ <n-statistic label =" Line count" :value =" stats.lines" />
228
+ <n-statistic label =" Pages count" :value =" pageCount" />
229
+ <n-statistic label =" Sentences count" :value =" stats.sentences" />
230
+ </n-space >
231
+
232
+ <n-divider />
233
+
234
+ <n-space >
235
+ <n-statistic label =" Chars (no spaces)" :value =" stats.chars_no_spaces" />
236
+ <n-statistic label =" Uppercase chars" :value =" stats.chars_upper" />
237
+ <n-statistic label =" Lowercase chars" :value =" stats.chars_lower" />
238
+ <n-statistic label =" Digit chars" :value =" stats.chars_digits" />
239
+ <n-statistic label =" Punctuations" :value =" stats.chars_puncts" />
240
+ <n-statistic label =" Spaces chars" :value =" stats.chars_spaces" />
241
+ <n-statistic label =" Word count (no punct)" :value =" stats.words_no_puncs" />
242
+ </n-space >
243
+ </c-card >
218
244
</div >
219
245
</template >
220
246
0 commit comments