Skip to content

Commit db4e5fc

Browse files
committed
Merge branch 'up/fix/html-to-text' into chore/all-my-stuffs
2 parents 54e6d8d + b2a4b96 commit db4e5fc

File tree

7 files changed

+122
-0
lines changed

7 files changed

+122
-0
lines changed

components.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ declare module '@vue/runtime-core' {
115115
EnergyComputer: typeof import('./src/tools/energy-computer/energy-computer.vue')['default']
116116
EnergyConverter: typeof import('./src/tools/energy-converter/energy-converter.vue')['default']
117117
EtaCalculator: typeof import('./src/tools/eta-calculator/eta-calculator.vue')['default']
118+
ExtractTextFromHtml: typeof import('./src/tools/extract-text-from-html/extract-text-from-html.vue')['default']
118119
FavoriteButton: typeof import('./src/components/FavoriteButton.vue')['default']
119120
FileHasher: typeof import('./src/tools/file-hasher/file-hasher.vue')['default']
120121
FileType: typeof import('./src/tools/file-type/file-type.vue')['default']
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { expect, test } from '@playwright/test';
2+
3+
test.describe('Tool - Extract text from html', () => {
4+
test.beforeEach(async ({ page }) => {
5+
await page.goto('/extract-text-from-html');
6+
});
7+
8+
test('Has correct title', async ({ page }) => {
9+
await expect(page).toHaveTitle('Extract text from HTML - IT Tools');
10+
});
11+
12+
test('Extract text from HTML', async ({ page }) => {
13+
await page.getByTestId('input').fill('<p>Paste your HTML in the input form on the left</p>');
14+
const extractedText = await page.getByTestId('area-content').innerText();
15+
expect(extractedText.trim()).toEqual('Paste your HTML in the input form on the left'.trim());
16+
});
17+
});
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import { describe, expect, it } from 'vitest';
2+
import { getTextFromHtml, validateHtml } from './extract-text-from-html.service';
3+
4+
describe('extract-text-from-html service', () => {
5+
describe('validateHtml', () => {
6+
it('check if the value is valid html', () => {
7+
expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
8+
expect(validateHtml('<div>Paste your HTML in the input form on the left</div>')).toBeTruthy();
9+
expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p></div>')).toBeTruthy();
10+
expect(validateHtml('<body><div><p>Paste your HTML in the input form on the left</p></div></body>')).toBeTruthy();
11+
expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
12+
});
13+
14+
it('check if the value is an html invlid', () => {
15+
expect(validateHtml('<p>Paste your HTML in the input form on the left<p>')).toBeFalsy();
16+
expect(validateHtml('Paste your HTML in the input form on the left<p>')).toBeFalsy();
17+
expect(validateHtml('<p>Paste your HTML in the input form on the left')).toBeFalsy();
18+
expect(validateHtml('<p>Paste your HTML in the input form on the left<>')).toBeFalsy();
19+
expect(validateHtml('<>Paste your HTML in the input form on the left<>')).toBeFalsy();
20+
expect(validateHtml('<p>Paste your HTML in the input form on the left</a>')).toBeFalsy();
21+
expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
22+
});
23+
});
24+
25+
describe('getTextFromHtml', () => {
26+
it('must be return a string', () => {
27+
expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toString();
28+
});
29+
30+
it('must be return text from html', () => {
31+
expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toStrictEqual(
32+
'Paste your HTML in the input form on the left',
33+
);
34+
});
35+
});
36+
});
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
function validateHtml(value: string) {
2+
try {
3+
new DOMParser().parseFromString(value, 'text/html');
4+
}
5+
catch (error) {
6+
return false;
7+
}
8+
9+
const regex = /<([a-z][a-z0-9]*)\b[^>]*>(.*?)<\/\1>|<([a-z][a-z0-9]*)\b[^\/]*\/>/gi;
10+
const matches = value.match(regex);
11+
12+
return Boolean(matches !== null && matches.length);
13+
}
14+
15+
function getTextFromHtml(value: string) {
16+
const element = document.createElement('div');
17+
element.innerHTML = value;
18+
const text = element?.innerText || element?.textContent || '';
19+
return text.replace(/\s+/g, ' ');
20+
}
21+
22+
export { validateHtml, getTextFromHtml };
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<script setup lang="ts">
2+
import { getTextFromHtml, validateHtml } from './extract-text-from-html.service';
3+
import { withDefaultOnError } from '@/utils/defaults';
4+
import type { UseValidationRule } from '@/composable/validation';
5+
6+
function transformer(value: string) {
7+
return withDefaultOnError(() => {
8+
if (value === '') {
9+
return '';
10+
}
11+
return getTextFromHtml(value);
12+
}, '');
13+
}
14+
15+
const rules: UseValidationRule<string>[] = [
16+
{
17+
validator: (value: string) => value === '' || validateHtml(value),
18+
message: 'Provided HTML is not valid.',
19+
},
20+
];
21+
</script>
22+
23+
<template>
24+
<format-transformer
25+
input-label="Your raw HTML"
26+
input-placeholder="Paste your raw HTML here..."
27+
output-label="Text from your HTML"
28+
:input-validation-rules="rules"
29+
:transformer="transformer"
30+
/>
31+
</template>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import { CursorText } from '@vicons/tabler';
2+
import { defineTool } from '../tool';
3+
4+
export const tool = defineTool({
5+
name: 'Extract text from HTML',
6+
path: '/extract-text-from-html',
7+
description:
8+
'Paste your HTML in the input form on the left and you will get text instantly. Occasionally, you may need to extract plain text from an HTML page where CSS properties (like user-select: none;) prevent text selection. The typical workaround involves using the DevTools (F12) to select "Copy → outer HTML". The proposed tool would simplify this process by extracting the "inner Text" directly from the copied HTML.',
9+
keywords: ['extract', 'text', 'from', 'html'],
10+
component: () => import('./extract-text-from-html.vue'),
11+
icon: CursorText,
12+
createdAt: new Date('2024-05-10'),
13+
});

src/tools/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { tool as base64FileConverter } from './base64-file-converter';
22
import { tool as base64StringConverter } from './base64-string-converter';
33
import { tool as basicAuthGenerator } from './basic-auth-generator';
4+
import { tool as extractTextFromHtml } from './extract-text-from-html';
45
import { tool as propertiesToYaml } from './properties-to-yaml';
56
import { tool as jsonToJava } from './json-to-java';
67
import { tool as jsonSortMaster } from './json-sort-master';
@@ -412,6 +413,7 @@ export const toolsByCategory: ToolCategory[] = [
412413
xmlFormatter,
413414
xsltTester,
414415
yamlViewer,
416+
extractTextFromHtml,
415417
jsonEditor,
416418
emailNormalizer,
417419
codeHighlighter,

0 commit comments

Comments
 (0)