Skip to content

Commit 089853e

Browse files
committed
feat(new tool): Text extractor form HTML
Fix CorentinTh#1035
1 parent b59942a commit 089853e

File tree

7 files changed

+128
-4
lines changed

7 files changed

+128
-4
lines changed

components.d.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ declare module '@vue/runtime-core' {
7777
EmojiPicker: typeof import('./src/tools/emoji-picker/emoji-picker.vue')['default']
7878
Encryption: typeof import('./src/tools/encryption/encryption.vue')['default']
7979
EtaCalculator: typeof import('./src/tools/eta-calculator/eta-calculator.vue')['default']
80+
ExtractTextFromHtml: typeof import('./src/tools/extract-text-from-html/extract-text-from-html.vue')['default']
8081
FavoriteButton: typeof import('./src/components/FavoriteButton.vue')['default']
8182
FormatTransformer: typeof import('./src/components/FormatTransformer.vue')['default']
8283
GitMemo: typeof import('./src/tools/git-memo/git-memo.vue')['default']
@@ -126,25 +127,26 @@ declare module '@vue/runtime-core' {
126127
MenuLayout: typeof import('./src/components/MenuLayout.vue')['default']
127128
MetaTagGenerator: typeof import('./src/tools/meta-tag-generator/meta-tag-generator.vue')['default']
128129
MimeTypes: typeof import('./src/tools/mime-types/mime-types.vue')['default']
130+
NAlert: typeof import('naive-ui')['NAlert']
129131
NavbarButtons: typeof import('./src/components/NavbarButtons.vue')['default']
130132
NCode: typeof import('naive-ui')['NCode']
131133
NCollapseTransition: typeof import('naive-ui')['NCollapseTransition']
134+
NColorPicker: typeof import('naive-ui')['NColorPicker']
132135
NConfigProvider: typeof import('naive-ui')['NConfigProvider']
133136
NDivider: typeof import('naive-ui')['NDivider']
134137
NEllipsis: typeof import('naive-ui')['NEllipsis']
135138
NFormItem: typeof import('naive-ui')['NFormItem']
136-
NGi: typeof import('naive-ui')['NGi']
137-
NGrid: typeof import('naive-ui')['NGrid']
138139
NH1: typeof import('naive-ui')['NH1']
139140
NH3: typeof import('naive-ui')['NH3']
140141
NIcon: typeof import('naive-ui')['NIcon']
142+
NInputGroup: typeof import('naive-ui')['NInputGroup']
143+
NInputGroupLabel: typeof import('naive-ui')['NInputGroupLabel']
141144
NInputNumber: typeof import('naive-ui')['NInputNumber']
142-
NLabel: typeof import('naive-ui')['NLabel']
143145
NLayout: typeof import('naive-ui')['NLayout']
144146
NLayoutSider: typeof import('naive-ui')['NLayoutSider']
145147
NMenu: typeof import('naive-ui')['NMenu']
146148
NScrollbar: typeof import('naive-ui')['NScrollbar']
147-
NSpin: typeof import('naive-ui')['NSpin']
149+
NSwitch: typeof import('naive-ui')['NSwitch']
148150
NumeronymGenerator: typeof import('./src/tools/numeronym-generator/numeronym-generator.vue')['default']
149151
OtpCodeGeneratorAndValidator: typeof import('./src/tools/otp-code-generator-and-validator/otp-code-generator-and-validator.vue')['default']
150152
PasswordStrengthAnalyser: typeof import('./src/tools/password-strength-analyser/password-strength-analyser.vue')['default']
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { test, expect } from '@playwright/test';
2+
3+
test.describe('Tool - Extract text from html', () => {
4+
test.beforeEach(async ({ page }) => {
5+
await page.goto('/extract-text-from-html');
6+
});
7+
8+
test('Has correct title', async ({ page }) => {
9+
await expect(page).toHaveTitle('Extract text from HTML');
10+
});
11+
12+
test('Extract text from HTML', async ({ page }) => {
13+
await page.getByTestId('input').fill('<p>Paste your HTML in the input form on the left</p>');
14+
const extractedText = await page.getByTestId('area-content').innerText();
15+
expect(extractedText.trim()).toEqual('Paste your HTML in the input form on the left'.trim());
16+
});
17+
});
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import { expect, describe, it } from 'vitest';
2+
import { getTextFromHtml, validateHtml } from './extract-text-from-html.service';
3+
4+
describe('extract-text-from-html service', () => {
5+
describe('validateHtml', () => {
6+
it('check if the value is valid html', () => {
7+
expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
8+
expect(validateHtml('<div>Paste your HTML in the input form on the left</div>')).toBeTruthy();
9+
expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p></div>')).toBeTruthy();
10+
expect(validateHtml('<body><div><p>Paste your HTML in the input form on the left</p></div></body>')).toBeTruthy();
11+
expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
12+
});
13+
14+
it('check if the value is an html invlid', () => {
15+
expect(validateHtml('<p>Paste your HTML in the input form on the left<p>')).toBeFalsy();
16+
expect(validateHtml('Paste your HTML in the input form on the left<p>')).toBeFalsy();
17+
expect(validateHtml('<p>Paste your HTML in the input form on the left')).toBeFalsy();
18+
expect(validateHtml('<p>Paste your HTML in the input form on the left<>')).toBeFalsy();
19+
expect(validateHtml('<>Paste your HTML in the input form on the left<>')).toBeFalsy();
20+
expect(validateHtml('<p>Paste your HTML in the input form on the left</a>')).toBeFalsy();
21+
expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
22+
});
23+
});
24+
25+
describe('getTextFromHtml', () => {
26+
it('must be return a string', () => {
27+
expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toString();
28+
});
29+
30+
it('must be return text from html', () => {
31+
expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toStrictEqual(
32+
'Paste your HTML in the input form on the left',
33+
);
34+
});
35+
});
36+
});
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
function validateHtml(value: string) {
2+
try {
3+
new DOMParser().parseFromString(value, 'text/html');
4+
} catch (error) {
5+
return false;
6+
}
7+
8+
const regex = /<([a-z][a-z0-9]*)\b[^>]*>(.*?)<\/\1>|<([a-z][a-z0-9]*)\b[^\/]*\/>/gi;
9+
const matches = value.match(regex);
10+
11+
return Boolean(matches !== null && matches.length);
12+
}
13+
14+
function getTextFromHtml(value: string) {
15+
const element = document.createElement('div');
16+
element.innerHTML = value;
17+
const text = element?.innerText || element?.textContent || '';
18+
return text.replace(/\s+/g, ' ');
19+
}
20+
21+
export { validateHtml, getTextFromHtml };
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<script setup lang="ts">
2+
import { getTextFromHtml, validateHtml } from './extract-text-from-html.service';
3+
import { withDefaultOnError } from '@/utils/defaults';
4+
import type { UseValidationRule } from '@/composable/validation';
5+
6+
function transformer(value: string) {
7+
return withDefaultOnError(() => {
8+
if (value === '') {
9+
return '';
10+
}
11+
return getTextFromHtml(value);
12+
}, '');
13+
}
14+
15+
const rules: UseValidationRule<string>[] = [
16+
{
17+
validator: (value: string) => value === '' || validateHtml(value),
18+
message: 'Provided HTML is not valid.',
19+
},
20+
];
21+
</script>
22+
23+
<template>
24+
<format-transformer
25+
input-label="Your raw HTML"
26+
input-placeholder="Paste your raw HTML here..."
27+
output-label="Text from your HTML"
28+
:input-validation-rules="rules"
29+
:transformer="transformer"
30+
/>
31+
</template>
32+
33+
<style lang="less" scoped></style>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import { CursorText } from '@vicons/tabler';
2+
import { defineTool } from '../tool';
3+
4+
export const tool = defineTool({
5+
name: 'Extract text from HTML',
6+
path: '/extract-text-from-html',
7+
description:
8+
'Paste your HTML in the input form on the left and you will get text instantly. Occasionally, you may need to extract plain text from an HTML page where CSS properties (like user-select: none;) prevent text selection. The typical workaround involves using the DevTools (F12) to select "Copy → outer HTML". The proposed tool would simplify this process by extracting the "inner Text" directly from the copied HTML.',
9+
keywords: ['extract', 'text', 'from', 'html'],
10+
component: () => import('./extract-text-from-html.vue'),
11+
icon: CursorText,
12+
createdAt: new Date('2024-05-10'),
13+
});

src/tools/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { tool as base64FileConverter } from './base64-file-converter';
22
import { tool as base64StringConverter } from './base64-string-converter';
33
import { tool as basicAuthGenerator } from './basic-auth-generator';
4+
import { tool as extractTextFromHtml } from './extract-text-from-html';
45

56
import { tool as asciiTextDrawer } from './ascii-text-drawer';
67

@@ -148,6 +149,7 @@ export const toolsByCategory: ToolCategory[] = [
148149
dockerRunToDockerComposeConverter,
149150
xmlFormatter,
150151
yamlViewer,
152+
extractTextFromHtml,
151153
],
152154
},
153155
{

0 commit comments

Comments
 (0)