Skip to content

Commit f8bb810

Browse files
committed
Merge branch 'up/fix/text-to-uni-enh' into chore/all-my-stuffs
2 parents 9854f85 + a1252eb commit f8bb810

File tree

4 files changed

+283
-32
lines changed

4 files changed

+283
-32
lines changed

src/tools/text-to-unicode/text-to-unicode.e2e.spec.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,18 @@ test.describe('Tool - Text to Unicode', () => {
1010
});
1111

1212
test('Text to unicode conversion', async ({ page }) => {
13-
await page.getByTestId('text-to-unicode-input').fill('it-tools');
13+
await page.getByTestId('text-to-unicode-input').fill('"it-tools" 文字');
1414
const unicode = await page.getByTestId('text-to-unicode-output').inputValue();
1515

16-
expect(unicode).toEqual('it-tools');
16+
// eslint-disable-next-line unicorn/escape-case
17+
expect(unicode).toEqual(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
1718
});
1819

1920
test('Unicode to text conversion', async ({ page }) => {
20-
await page.getByTestId('unicode-to-text-input').fill('it-tools');
21+
// eslint-disable-next-line unicorn/escape-case
22+
await page.getByTestId('unicode-to-text-input').fill(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
2123
const text = await page.getByTestId('unicode-to-text-output').inputValue();
2224

23-
expect(text).toEqual('it-tools');
25+
expect(text).toEqual('"it-tools" 文字');
2426
});
2527
});

src/tools/text-to-unicode/text-to-unicode.service.test.ts

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import { describe, expect, it } from 'vitest';
2-
import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service';
2+
import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './text-to-unicode.service';
3+
4+
describe('text-to-unicode (legacy tests)', () => {
5+
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
6+
const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped);
37

4-
describe('text-to-unicode', () => {
58
describe('convertTextToUnicode', () => {
69
it('a text string is converted to unicode representation', () => {
710
expect(convertTextToUnicode('A')).toBe('A');
@@ -18,3 +21,100 @@ describe('text-to-unicode', () => {
1821
});
1922
});
2023
});
24+
25+
const ALL_PRINTABLE_ASCII = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
26+
27+
describe('text-to-unicode regexes', () => {
28+
// eslint-disable-next-line prefer-regex-literals
29+
const skipAsciiJs = new RegExp(String.raw`([[ -~]--['"\\]]+)`, 'gv');
30+
// eslint-disable-next-line prefer-regex-literals
31+
const skipAsciiHtml = new RegExp(String.raw`([[ -~]--[<>&'"]]+)`, 'gv');
32+
33+
it('regexes are equivalent to `v`-flag versions', () => {
34+
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
35+
// regexes in `text-to-unicode.service.ts` can be replaced with `v`-flag versions once unicodeSets reaches
36+
// sufficient in-browser support
37+
expect(ALL_PRINTABLE_ASCII.match(skipAsciiJs)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_JS));
38+
expect(ALL_PRINTABLE_ASCII.match(skipAsciiHtml)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_HTML));
39+
});
40+
});
41+
42+
describe('text-to-unicode', () => {
43+
interface TestConfig {
44+
text: string
45+
results: Record<ConverterId, string>
46+
skipAscii?: boolean
47+
};
48+
const tests: TestConfig[] = [
49+
{
50+
text: 'ABC',
51+
results: {
52+
fullUnicode: String.raw`\u0041\u0042\u0043`,
53+
utf16: String.raw`\u0041\u0042\u0043`,
54+
hexEntities: String.raw`&#x41;&#x42;&#x43;`,
55+
decimalEntities: String.raw`&#65;&#66;&#67;`,
56+
},
57+
},
58+
{
59+
text: 'ABC',
60+
skipAscii: true,
61+
results: {
62+
fullUnicode: 'ABC',
63+
utf16: 'ABC',
64+
hexEntities: 'ABC',
65+
decimalEntities: 'ABC',
66+
},
67+
},
68+
{
69+
text: ALL_PRINTABLE_ASCII,
70+
skipAscii: true,
71+
results: {
72+
// eslint-disable-next-line unicorn/escape-case
73+
fullUnicode: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
74+
// eslint-disable-next-line unicorn/escape-case
75+
utf16: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
76+
hexEntities: String.raw` !&#x22;#$%&#x26;&#x27;()*+,-./0123456789:;&#x3c;=&#x3e;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
77+
decimalEntities: String.raw` !&#34;#$%&#38;&#39;()*+,-./0123456789:;&#60;=&#62;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
78+
},
79+
},
80+
{
81+
text: '文字',
82+
results: {
83+
// eslint-disable-next-line unicorn/escape-case
84+
fullUnicode: String.raw`\u6587\u5b57`,
85+
// eslint-disable-next-line unicorn/escape-case
86+
utf16: String.raw`\u6587\u5b57`,
87+
hexEntities: String.raw`&#x6587;&#x5b57;`,
88+
decimalEntities: String.raw`&#25991;&#23383;`,
89+
},
90+
},
91+
{
92+
text: 'a 💩 b',
93+
skipAscii: true,
94+
results: {
95+
// eslint-disable-next-line unicorn/escape-case
96+
fullUnicode: String.raw`a \u{1f4a9} b`,
97+
// eslint-disable-next-line unicorn/escape-case
98+
utf16: String.raw`a \ud83d\udca9 b`,
99+
hexEntities: String.raw`a &#x1f4a9; b`,
100+
decimalEntities: String.raw`a &#128169; b`,
101+
},
102+
},
103+
];
104+
105+
for (const { text, skipAscii, results } of tests) {
106+
describe(`${text} (skipAscii=${skipAscii})`, () => {
107+
for (const [key, result] of Object.entries(results)) {
108+
describe(key, () => {
109+
const converter = converters[key as ConverterId];
110+
it('Escaping', () => {
111+
expect(converter.escape(text, skipAscii ?? false)).toBe(result);
112+
});
113+
it('Unescaping', () => {
114+
expect(converter.unescape(result)).toBe(text);
115+
});
116+
});
117+
}
118+
});
119+
}
120+
});
Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,86 @@
1-
function convertTextToUnicode(text: string): string {
2-
return text.split('').map(value => `&#${value.charCodeAt(0)};`).join('');
1+
// regex that never matches
2+
const SKIP_NOTHING_RE = /(\b\B)/;
3+
export const SKIP_ASCII_JS = /([ -!#-&(-\[\]-~]+)/g;
4+
export const SKIP_ASCII_HTML = /([ -!#-%(-;=?-~]+)/g;
5+
6+
function codeUnits(text: string): number[] {
7+
return text.split('').map(char => char.codePointAt(0)!);
8+
}
9+
10+
function codePoints(text: string): number[] {
11+
return [...text].map(char => char.codePointAt(0)!);
12+
}
13+
14+
interface ConverterConfig {
15+
name: string
16+
escape: {
17+
charValues?(text: string): number[]
18+
mapper(charValue: number): string
19+
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
20+
asciiSkipper: RegExp
21+
}
22+
unescape: {
23+
regex: RegExp
24+
radix: number
25+
}
26+
}
27+
class Converter {
28+
constructor(public config: ConverterConfig) {}
29+
30+
escape(text: string, skipAscii: boolean): string {
31+
const { asciiSkipper, charValues, mapper } = this.config.escape;
32+
const getCharValues = charValues ?? codePoints;
33+
34+
return text
35+
.split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE)
36+
.flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper))
37+
.join('');
38+
}
39+
40+
unescape(escaped: string): string {
41+
const { regex, radix } = this.config.unescape;
42+
43+
return escaped.replace(regex, (match) => {
44+
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
45+
});
46+
}
47+
}
48+
49+
export type ConverterId = keyof typeof converters;
50+
const converters = {
51+
fullUnicode: new Converter({
52+
name: 'Full Unicode',
53+
escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
54+
unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 },
55+
}),
56+
utf16: new Converter({
57+
name: 'UTF-16 Code Units',
58+
escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
59+
unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 },
60+
}),
61+
hexEntities: new Converter({
62+
name: 'HTML Entities (Hex)',
63+
escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML },
64+
unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 },
65+
}),
66+
decimalEntities: new Converter({
67+
name: 'HTML Entities (Decimal)',
68+
escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML },
69+
unescape: { regex: /&#\d+;/gu, radix: 10 },
70+
}),
71+
} satisfies Record<string, Converter>;
72+
73+
function convertCodePointToUnicode(codePoint: number): string {
74+
const hex = codePoint.toString(16);
75+
return hex.length > 4 ? String.raw`\u{${hex}}` : String.raw`\u${hex.padStart(4, '0')}`;
76+
}
77+
78+
function toHexEntities(codePoint: number): string {
79+
return `&#x${codePoint.toString(16)};`;
380
}
481

5-
function convertUnicodeToText(unicodeStr: string): string {
6-
return unicodeStr.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec));
82+
function toDecimalEntities(codePoint: number): string {
83+
return `&#${codePoint};`;
784
}
885

9-
export { convertTextToUnicode, convertUnicodeToText };
86+
export { converters };
Lines changed: 93 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,106 @@
11
<script setup lang="ts">
2-
import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service';
2+
import { type ConverterId, converters } from './text-to-unicode.service';
33
import { useCopy } from '@/composable/copy';
44
5+
const converterId = ref<ConverterId>('fullUnicode');
6+
const skipAscii = ref(true);
7+
58
const inputText = ref('');
6-
const unicodeFromText = computed(() => inputText.value.trim() === '' ? '' : convertTextToUnicode(inputText.value));
9+
const unicodeFromText = computed(() =>
10+
inputText.value.trim() === ''
11+
? ''
12+
: converters[converterId.value].escape(inputText.value, skipAscii.value),
13+
);
714
const { copy: copyUnicode } = useCopy({ source: unicodeFromText });
815
916
const inputUnicode = ref('');
10-
const textFromUnicode = computed(() => inputUnicode.value.trim() === '' ? '' : convertUnicodeToText(inputUnicode.value));
17+
const textFromUnicode = computed(() =>
18+
inputUnicode.value.trim() === '' ? '' : converters[converterId.value].unescape(inputUnicode.value),
19+
);
1120
const { copy: copyText } = useCopy({ source: textFromUnicode });
1221
</script>
1322

1423
<template>
15-
<c-card title="Text to Unicode">
16-
<c-input-text v-model:value="inputText" multiline placeholder="e.g. 'Hello Avengers'" label="Enter text to convert to unicode" autosize autofocus raw-text test-id="text-to-unicode-input" />
17-
<c-input-text v-model:value="unicodeFromText" label="Unicode from your text" multiline raw-text readonly mt-2 placeholder="The unicode representation of your text will be here" test-id="text-to-unicode-output" />
18-
<div mt-2 flex justify-center>
19-
<c-button :disabled="!unicodeFromText" @click="copyUnicode()">
20-
Copy unicode to clipboard
21-
</c-button>
22-
</div>
23-
</c-card>
24-
25-
<c-card title="Unicode to Text">
26-
<c-input-text v-model:value="inputUnicode" multiline placeholder="Input Unicode" label="Enter unicode to convert to text" autosize raw-text test-id="unicode-to-text-input" />
27-
<c-input-text v-model:value="textFromUnicode" label="Text from your Unicode" multiline raw-text readonly mt-2 placeholder="The text representation of your unicode will be here" test-id="unicode-to-text-output" />
28-
<div mt-2 flex justify-center>
29-
<c-button :disabled="!textFromUnicode" @click="copyText()">
30-
Copy text to clipboard
31-
</c-button>
24+
<div class="outer" flex flex-col gap-6>
25+
<div class="controls">
26+
<c-select
27+
v-model:value="converterId"
28+
searchable
29+
label="Conversion type:"
30+
:options="Object.entries(converters).map(([key, val]) => ({ label: val.config.name, value: key }))"
31+
/>
3232
</div>
33-
</c-card>
33+
<c-card class="card" title="Text to Unicode">
34+
<c-input-text
35+
v-model:value="inputText"
36+
multiline
37+
placeholder="e.g. 'Hello Avengers'"
38+
label="Enter text to convert to Unicode"
39+
autosize
40+
autofocus
41+
raw-text
42+
test-id="text-to-unicode-input"
43+
/>
44+
<c-input-text
45+
v-model:value="unicodeFromText"
46+
label="Unicode from your text"
47+
multiline
48+
raw-text
49+
readonly
50+
mt-2
51+
placeholder="The unicode representation of your text will be here"
52+
test-id="text-to-unicode-output"
53+
/>
54+
<div mt-2 flex justify-start>
55+
<n-form-item label="Skip ASCII chars with no special meaning?" :show-feedback="false" label-placement="left">
56+
<n-switch v-model:value="skipAscii" />
57+
</n-form-item>
58+
</div>
59+
<div mt-2 flex justify-center>
60+
<c-button :disabled="!unicodeFromText" @click="copyUnicode()"> Copy unicode to clipboard </c-button>
61+
</div>
62+
</c-card>
63+
<c-card class="card" title="Unicode to Text">
64+
<c-input-text
65+
v-model:value="inputUnicode"
66+
multiline
67+
placeholder="Input Unicode"
68+
label="Enter unicode to convert to text"
69+
autosize
70+
raw-text
71+
test-id="unicode-to-text-input"
72+
/>
73+
<c-input-text
74+
v-model:value="textFromUnicode"
75+
label="Text from your Unicode"
76+
multiline
77+
raw-text
78+
readonly
79+
mt-2
80+
placeholder="The text representation of your unicode will be here"
81+
test-id="unicode-to-text-output"
82+
/>
83+
<div mt-2 flex justify-center>
84+
<c-button :disabled="!textFromUnicode" @click="copyText()"> Copy text to clipboard </c-button>
85+
</div>
86+
</c-card>
87+
</div>
3488
</template>
89+
90+
<style lang="less" scoped>
91+
.outer {
92+
flex: 0 1 1200px;
93+
margin-inline: 50px;
94+
display: flex;
95+
flex-direction: row;
96+
flex-wrap: wrap;
97+
}
98+
99+
.controls {
100+
flex: 0 1 100%;
101+
}
102+
103+
.card {
104+
flex: 1 0 max(40%, 500px);
105+
}
106+
</style>

0 commit comments

Comments
 (0)