Skip to content

Commit 1dc965d

Browse files
committed
Always escape ASCII chars with special meaning
1 parent b0ae8d7 commit 1dc965d

File tree

4 files changed

+57
-28
lines changed

4 files changed

+57
-28
lines changed

src/tools/text-to-unicode/text-to-unicode.e2e.spec.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,18 @@ test.describe('Tool - Text to Unicode', () => {
1010
});
1111

1212
test('Text to unicode conversion', async ({ page }) => {
13-
await page.getByTestId('text-to-unicode-input').fill('it-tools');
13+
await page.getByTestId('text-to-unicode-input').fill('"it-tools" 文字');
1414
const unicode = await page.getByTestId('text-to-unicode-output').inputValue();
1515

16-
expect(unicode).toEqual('it-tools');
16+
// eslint-disable-next-line unicorn/escape-case
17+
expect(unicode).toEqual(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
1718
});
1819

1920
test('Unicode to text conversion', async ({ page }) => {
20-
await page.getByTestId('unicode-to-text-input').fill('it-tools');
21+
// eslint-disable-next-line unicorn/escape-case
22+
await page.getByTestId('unicode-to-text-input').fill(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
2123
const text = await page.getByTestId('unicode-to-text-output').inputValue();
2224

23-
expect(text).toEqual('it-tools');
25+
expect(text).toEqual('"it-tools" 文字');
2426
});
2527
});

src/tools/text-to-unicode/text-to-unicode.service.test.ts

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { describe, expect, it } from 'vitest';
2-
import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service';
2+
import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './text-to-unicode.service';
33

44
describe('text-to-unicode (legacy tests)', () => {
5-
const convertTextToUnicode = converters.decimalEntities.escape;
5+
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
66
const convertUnicodeToText = converters.decimalEntities.unescape;
77

88
describe('convertTextToUnicode', () => {
@@ -22,6 +22,23 @@ describe('text-to-unicode (legacy tests)', () => {
2222
});
2323
});
2424

25+
const ALL_PRINTABLE_ASCII = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
26+
27+
describe('text-to-unicode regexes', () => {
28+
// eslint-disable-next-line prefer-regex-literals
29+
const skipAsciiJs = new RegExp(String.raw`([[ -~]--['"\\]]+)`, 'gv');
30+
// eslint-disable-next-line prefer-regex-literals
31+
const skipAsciiHtml = new RegExp(String.raw`([[ -~]--[<>&'"]]+)`, 'gv');
32+
33+
it('regexes are equivalent to `v`-flag versions', () => {
34+
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
35+
// regexes in `text-to-unicode.service.ts` can be replaced with `v`-flag versions once unicodeSets reaches
36+
// sufficient in-browser support
37+
expect(ALL_PRINTABLE_ASCII.match(skipAsciiJs)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_JS));
38+
expect(ALL_PRINTABLE_ASCII.match(skipAsciiHtml)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_HTML));
39+
});
40+
});
41+
2542
describe('text-to-unicode', () => {
2643
interface TestConfig {
2744
text: string
@@ -48,6 +65,18 @@ describe('text-to-unicode', () => {
4865
decimalEntities: 'ABC',
4966
},
5067
},
68+
{
69+
text: ALL_PRINTABLE_ASCII,
70+
skipPrintableAscii: true,
71+
results: {
72+
// eslint-disable-next-line unicorn/escape-case
73+
fullUnicode: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
74+
// eslint-disable-next-line unicorn/escape-case
75+
utf16: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
76+
hexEntities: String.raw` !&#x22;#$%&#x26;&#x27;()*+,-./0123456789:;&#x3c;=&#x3e;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
77+
decimalEntities: String.raw` !&#34;#$%&#38;&#39;()*+,-./0123456789:;&#60;=&#62;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
78+
},
79+
},
5180
{
5281
text: '文字',
5382
results: {
@@ -79,7 +108,7 @@ describe('text-to-unicode', () => {
79108
describe(key, () => {
80109
const converter = converters[key as ConverterId];
81110
it('Escaping', () => {
82-
expect(converter.escape(text, skipAscii ? SKIP_PRINTABLE_ASCII_RE : undefined)).toBe(result);
111+
expect(converter.escape(text, skipAscii)).toBe(result);
83112
});
84113
it('Unescaping', () => {
85114
expect(converter.unescape(result)).toBe(text);

src/tools/text-to-unicode/text-to-unicode.service.ts

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,35 @@
11
// regex that never matches
22
const SKIP_NOTHING_RE = /(\b\B)/;
3-
export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g;
3+
export const SKIP_ASCII_JS = /([ -!#-&(-\[\]-~]+)/g;
4+
export const SKIP_ASCII_HTML = /([ -!#-%(-;=?-~]+)/g;
45

5-
function _codeUnits(text: string): number[] {
6+
function codeUnits(text: string): number[] {
67
return text.split('').map(char => char.codePointAt(0));
78
}
89

9-
function _codePoints(text: string): number[] {
10+
function codePoints(text: string): number[] {
1011
return [...text].map(char => char.codePointAt(0));
1112
}
1213

1314
export interface Converter {
1415
name: string
15-
escape(text: string, skip: RegExp): string
16+
escape(text: string, skipAscii: boolean): string
1617
unescape(text: string): string
1718
};
1819

1920
interface EscapeConfig {
20-
getCharValues?(text: string): number[]
21+
charValues?(text: string): number[]
2122
mapper(charValue: number): string
23+
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
24+
asciiSkipper: RegExp
2225
};
2326

24-
function escaper({ getCharValues, mapper }: EscapeConfig) {
25-
/**
26-
* @param text text input to escape
27-
* @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group.
28-
*/
29-
return (text: string, skipper?: RegExp): string => {
30-
skipper ??= SKIP_NOTHING_RE;
31-
getCharValues ??= _codePoints;
27+
function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) {
28+
return (text: string, skip: boolean): string => {
29+
getCharValues ??= codePoints;
3230

3331
return text
34-
.split(skipper)
32+
.split(skip ? skipper : SKIP_NOTHING_RE)
3533
.flatMap((x, i) => {
3634
if (i % 2) {
3735
return x;
@@ -59,22 +57,22 @@ export type ConverterId = keyof typeof converters;
5957
const converters = {
6058
fullUnicode: {
6159
name: 'Full Unicode',
62-
escape: escaper({ mapper: convertCodePointToUnicode }),
60+
escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
6361
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
6462
},
6563
utf16: {
6664
name: 'UTF-16 Code Units',
67-
escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }),
65+
escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
6866
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
6967
},
7068
hexEntities: {
7169
name: 'HTML Entities (Hex)',
72-
escape: escaper({ mapper: toHexEntities }),
70+
escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }),
7371
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
7472
},
7573
decimalEntities: {
7674
name: 'HTML Entities (Decimal)',
77-
escape: escaper({ mapper: toDecimalEntities }),
75+
escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }),
7876
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
7977
},
8078
} satisfies Record<string, Converter>;

src/tools/text-to-unicode/text-to-unicode.vue

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<script setup lang="ts">
2-
import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service';
2+
import { type ConverterId, converters } from './text-to-unicode.service';
33
import { useCopy } from '@/composable/copy';
44
55
const converterId = ref<ConverterId>('fullUnicode');
@@ -9,7 +9,7 @@ const inputText = ref('');
99
const unicodeFromText = computed(() =>
1010
inputText.value.trim() === ''
1111
? ''
12-
: converters[converterId.value].escape(inputText.value, skipAscii.value ? SKIP_PRINTABLE_ASCII_RE : undefined),
12+
: converters[converterId.value].escape(inputText.value, skipAscii.value),
1313
);
1414
const { copy: copyUnicode } = useCopy({ source: unicodeFromText });
1515
@@ -52,7 +52,7 @@ const { copy: copyText } = useCopy({ source: textFromUnicode });
5252
test-id="text-to-unicode-output"
5353
/>
5454
<div mt-2 flex justify-start>
55-
<n-form-item label="Skip ASCII?" :show-feedback="false" label-placement="left">
55+
<n-form-item label="Skip ASCII chars with no special meaning?" :show-feedback="false" label-placement="left">
5656
<n-switch v-model:value="skipAscii" />
5757
</n-form-item>
5858
</div>

0 commit comments

Comments
 (0)