|
1 | 1 | // regex that never matches
|
2 | 2 | const SKIP_NOTHING_RE = /(\b\B)/;
|
3 |
| -export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g; |
| 3 | +export const SKIP_ASCII_JS = /([ -!#-&(-\[\]-~]+)/g; |
| 4 | +export const SKIP_ASCII_HTML = /([ -!#-%(-;=?-~]+)/g; |
4 | 5 |
|
5 |
| -function _codeUnits(text: string): number[] { |
| 6 | +function codeUnits(text: string): number[] { |
6 | 7 | return text.split('').map(char => char.codePointAt(0));
|
7 | 8 | }
|
8 | 9 |
|
9 |
| -function _codePoints(text: string): number[] { |
| 10 | +function codePoints(text: string): number[] { |
10 | 11 | return [...text].map(char => char.codePointAt(0));
|
11 | 12 | }
|
12 | 13 |
|
13 | 14 | export interface Converter {
|
14 | 15 | name: string
|
15 |
| - escape(text: string, skip: RegExp): string |
| 16 | + escape(text: string, skipAscii: boolean): string |
16 | 17 | unescape(text: string): string
|
17 | 18 | };
|
18 | 19 |
|
19 | 20 | interface EscapeConfig {
|
20 |
| - getCharValues?(text: string): number[] |
| 21 | + charValues?(text: string): number[] |
21 | 22 | mapper(charValue: number): string
|
| 23 | + /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ |
| 24 | + asciiSkipper: RegExp |
22 | 25 | };
|
23 | 26 |
|
24 |
| -function escaper({ getCharValues, mapper }: EscapeConfig) { |
25 |
| - /** |
26 |
| - * @param text text input to escape |
27 |
| - * @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group. |
28 |
| - */ |
29 |
| - return (text: string, skipper?: RegExp): string => { |
30 |
| - skipper ??= SKIP_NOTHING_RE; |
31 |
| - getCharValues ??= _codePoints; |
| 27 | +function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) { |
| 28 | + return (text: string, skip: boolean): string => { |
| 29 | + getCharValues ??= codePoints; |
32 | 30 |
|
33 | 31 | return text
|
34 |
| - .split(skipper) |
| 32 | + .split(skip ? skipper : SKIP_NOTHING_RE) |
35 | 33 | .flatMap((x, i) => {
|
36 | 34 | if (i % 2) {
|
37 | 35 | return x;
|
@@ -59,22 +57,22 @@ export type ConverterId = keyof typeof converters;
|
59 | 57 | const converters = {
|
60 | 58 | fullUnicode: {
|
61 | 59 | name: 'Full Unicode',
|
62 |
| - escape: escaper({ mapper: convertCodePointToUnicode }), |
| 60 | + escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), |
63 | 61 | unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
|
64 | 62 | },
|
65 | 63 | utf16: {
|
66 | 64 | name: 'UTF-16 Code Units',
|
67 |
| - escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }), |
| 65 | + escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), |
68 | 66 | unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
|
69 | 67 | },
|
70 | 68 | hexEntities: {
|
71 | 69 | name: 'HTML Entities (Hex)',
|
72 |
| - escape: escaper({ mapper: toHexEntities }), |
| 70 | + escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }), |
73 | 71 | unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
|
74 | 72 | },
|
75 | 73 | decimalEntities: {
|
76 | 74 | name: 'HTML Entities (Decimal)',
|
77 |
| - escape: escaper({ mapper: toDecimalEntities }), |
| 75 | + escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }), |
78 | 76 | unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
|
79 | 77 | },
|
80 | 78 | } satisfies Record<string, Converter>;
|
|
0 commit comments