Skip to content

Commit 7a1beb5

Browse files
committed
feat: add OCRized text statistics
1 parent 7f2bf11 commit 7a1beb5

File tree

2 files changed

+28
-11
lines changed

2 files changed

+28
-11
lines changed

components.d.ts

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -132,25 +132,16 @@ declare module '@vue/runtime-core' {
132132
NConfigProvider: typeof import('naive-ui')['NConfigProvider']
133133
NDivider: typeof import('naive-ui')['NDivider']
134134
NEllipsis: typeof import('naive-ui')['NEllipsis']
135-
NFormItem: typeof import('naive-ui')['NFormItem']
136-
NGi: typeof import('naive-ui')['NGi']
137-
NGrid: typeof import('naive-ui')['NGrid']
138135
NH1: typeof import('naive-ui')['NH1']
139136
NH3: typeof import('naive-ui')['NH3']
140137
NIcon: typeof import('naive-ui')['NIcon']
141-
NInputNumber: typeof import('naive-ui')['NInputNumber']
142-
NLabel: typeof import('naive-ui')['NLabel']
143138
NLayout: typeof import('naive-ui')['NLayout']
144139
NLayoutSider: typeof import('naive-ui')['NLayoutSider']
145140
NMenu: typeof import('naive-ui')['NMenu']
146141
NScrollbar: typeof import('naive-ui')['NScrollbar']
147-
NSlider: typeof import('naive-ui')['NSlider']
142+
NSpace: typeof import('naive-ui')['NSpace']
148143
NSpin: typeof import('naive-ui')['NSpin']
149144
NStatistic: typeof import('naive-ui')['NStatistic']
150-
NSwitch: typeof import('naive-ui')['NSwitch']
151-
NTable: typeof import('naive-ui')['NTable']
152-
NTag: typeof import('naive-ui')['NTag']
153-
NSpin: typeof import('naive-ui')['NSpin']
154145
NumeronymGenerator: typeof import('./src/tools/numeronym-generator/numeronym-generator.vue')['default']
155146
OcrImage: typeof import('./src/tools/ocr-image/ocr-image.vue')['default']
156147
OtpCodeGeneratorAndValidator: typeof import('./src/tools/otp-code-generator-and-validator/otp-code-generator-and-validator.vue')['default']

src/tools/ocr-image/ocr-image.vue

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { createWorker } from 'tesseract.js';
44
import { getDocument } from 'pdfjs-dist';
55
import * as pdfJS from 'pdfjs-dist';
66
import pdfJSWorkerURL from 'pdfjs-dist/build/pdf.worker?url';
7+
import { textStatistics } from '../text-statistics/text-statistics.service';
78
import TextareaCopyable from '@/components/TextareaCopyable.vue';
89
import { useQueryParamOrStorage } from '@/composable/queryParams';
910
@@ -115,6 +116,7 @@ const languagesOptions = Array.from(languages.map(l => ({
115116
116117
const language = useQueryParamOrStorage({ name: 'lang', storageName: 'ocr-image:lang', defaultValue: 'eng' });
117118
119+
const pageSeparator = '\n=============\n';
118120
const ocrInProgress = ref(false);
119121
const fileInput = ref() as Ref<File>;
120122
const ocrText = computedAsync(async () => {
@@ -125,6 +127,8 @@ const ocrText = computedAsync(async () => {
125127
return e.toString();
126128
}
127129
});
130+
const stats = computed(() => textStatistics(ocrText.value?.replace(new RegExp(pageSeparator, 'g'), ' ') || ''));
131+
const pageCount = computed(() => ocrText.value?.split(new RegExp(pageSeparator, 'g')).length || 0);
128132
129133
async function onUpload(file: File) {
130134
if (file) {
@@ -180,7 +184,7 @@ async function ocr(file: File, language: string) {
180184
}
181185
await worker.terminate();
182186
ocrInProgress.value = false;
183-
return allTexts.join('\n=============\n');
187+
return allTexts.join(pageSeparator);
184188
};
185189
</script>
186190

@@ -215,6 +219,28 @@ async function ocr(file: File, language: string) {
215219
size="small"
216220
/>
217221
</div>
222+
223+
<c-card v-if="!ocrInProgress && stats" title="Statistics">
224+
<n-space mt-3>
225+
<n-statistic label="Character count" :value="stats.chars" />
226+
<n-statistic label="Word count" :value="stats.words" />
227+
<n-statistic label="Line count" :value="stats.lines" />
228+
<n-statistic label="Pages count" :value="pageCount" />
229+
<n-statistic label="Sentences count" :value="stats.sentences" />
230+
</n-space>
231+
232+
<n-divider />
233+
234+
<n-space>
235+
<n-statistic label="Chars (no spaces)" :value="stats.chars_no_spaces" />
236+
<n-statistic label="Uppercase chars" :value="stats.chars_upper" />
237+
<n-statistic label="Lowercase chars" :value="stats.chars_lower" />
238+
<n-statistic label="Digit chars" :value="stats.chars_digits" />
239+
<n-statistic label="Punctuations" :value="stats.chars_puncts" />
240+
<n-statistic label="Spaces chars" :value="stats.chars_spaces" />
241+
<n-statistic label="Word count (no punct)" :value="stats.words_no_puncs" />
242+
</n-space>
243+
</c-card>
218244
</div>
219245
</template>
220246

0 commit comments

Comments
 (0)