Skip to content

Commit a19e8da

Browse files
Implementation
1 parent 55ec47d commit a19e8da

File tree

4 files changed

+184
-170
lines changed

4 files changed

+184
-170
lines changed

.npmrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
package-lock=false

demo.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ const parse = require('regjsparser').parse;
55
const generate = require('regjsgen').generate;
66
const regenerate = require('regenerate');
77

8-
const pattern = String.raw`-`;
8+
const pattern = '\\w';
99

1010
console.log(generate(parse(pattern)));
11-
console.log(regenerate('-'.codePointAt(0)).toString())
1211

13-
const processedPattern = rewritePattern(pattern, 'u', { useUnicodeFlag: true });
12+
const processedPattern = rewritePattern(pattern, 'ui', {
13+
'unicodeFlag': 'transform'
14+
})
1415

1516
console.log(processedPattern);
1617

rewrite-pattern.js

Lines changed: 98 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -129,12 +129,14 @@ const caseFold = (codePoint) => {
129129
};
130130

131131
const processCharacterClass = (characterClassItem, regenerateOptions) => {
132+
let transformed = config.transform.unicodeFlag;
133+
const negative = characterClassItem.negative;
132134
const set = regenerate();
133135
for (const item of characterClassItem.body) {
134136
switch (item.type) {
135137
case 'value':
136138
set.add(item.codePoint);
137-
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
139+
if (config.flags.ignoreCase && config.transform.unicodeFlag) {
138140
const folded = caseFold(item.codePoint);
139141
if (folded) {
140142
set.add(folded);
@@ -145,19 +147,22 @@ const processCharacterClass = (characterClassItem, regenerateOptions) => {
145147
const min = item.min.codePoint;
146148
const max = item.max.codePoint;
147149
set.addRange(min, max);
148-
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
150+
if (config.flags.ignoreCase && config.transform.unicodeFlag) {
149151
set.iuAddRange(min, max);
150152
}
151153
break;
152154
case 'characterClassEscape':
153155
set.add(getCharacterClassEscapeSet(
154156
item.value,
155-
config.unicode,
156-
config.ignoreCase
157+
config.flags.unicode,
158+
config.flags.ignoreCase
157159
));
158160
break;
159161
case 'unicodePropertyEscape':
160162
set.add(getUnicodePropertyEscapeSet(item.value, item.negative));
163+
if (config.transform.unicodePropertyEscapes) {
164+
transformed = true;
165+
}
161166
break;
162167
// The `default` clause is only here as a safeguard; it should never be
163168
// reached. Code coverage tools should ignore it.
@@ -166,10 +171,12 @@ const processCharacterClass = (characterClassItem, regenerateOptions) => {
166171
throw new Error(`Unknown term type: ${ item.type }`);
167172
}
168173
}
169-
if (characterClassItem.negative) {
170-
update(characterClassItem, `(?!${set.toString(regenerateOptions)})[\\s\\S]`)
171-
} else {
172-
update(characterClassItem, set.toString(regenerateOptions));
174+
if (transformed) {
175+
if (negative) {
176+
update(characterClassItem, `(?!${set.toString(regenerateOptions)})[\\s\\S]`)
177+
} else {
178+
update(characterClassItem, set.toString(regenerateOptions));
179+
}
173180
}
174181
return characterClassItem;
175182
};
@@ -189,14 +196,12 @@ const assertNoUnmatchedReferences = (groups) => {
189196
const processTerm = (item, regenerateOptions, groups) => {
190197
switch (item.type) {
191198
case 'dot':
192-
if (config.useDotAllFlag) {
193-
break;
194-
} else if (config.unicode) {
199+
if (config.transform.unicodeFlag) {
195200
update(
196201
item,
197-
getUnicodeDotSet(config.dotAll).toString(regenerateOptions)
202+
getUnicodeDotSet(config.flags.dotAll).toString(regenerateOptions)
198203
);
199-
} else if (config.dotAll) {
204+
} else if (config.transform.dotAllFlag) {
200205
// TODO: consider changing this at the regenerate level.
201206
update(item, '[\\s\\S]');
202207
}
@@ -205,7 +210,7 @@ const processTerm = (item, regenerateOptions, groups) => {
205210
item = processCharacterClass(item, regenerateOptions);
206211
break;
207212
case 'unicodePropertyEscape':
208-
if (config.unicodePropertyEscape) {
213+
if (config.transform.unicodePropertyEscapes) {
209214
update(
210215
item,
211216
getUnicodePropertyEscapeSet(item.value, item.negative)
@@ -214,20 +219,22 @@ const processTerm = (item, regenerateOptions, groups) => {
214219
}
215220
break;
216221
case 'characterClassEscape':
217-
update(
218-
item,
219-
getCharacterClassEscapeSet(
220-
item.value,
221-
config.unicode,
222-
config.ignoreCase
223-
).toString(regenerateOptions)
224-
);
222+
if (config.transform.unicodeFlag) {
223+
update(
224+
item,
225+
getCharacterClassEscapeSet(
226+
item.value,
227+
/* config.transform.unicodeFlag implies config.flags.unicode */ true,
228+
config.flags.ignoreCase
229+
).toString(regenerateOptions)
230+
);
231+
}
225232
break;
226233
case 'group':
227234
if (item.behavior == 'normal') {
228235
groups.lastIndex++;
229236
}
230-
if (item.name && config.namedGroup) {
237+
if (item.name && config.transform.namedGroups) {
231238
const name = item.name.value;
232239

233240
if (groups.names[name]) {
@@ -262,7 +269,7 @@ const processTerm = (item, regenerateOptions, groups) => {
262269
case 'value':
263270
const codePoint = item.codePoint;
264271
const set = regenerate(codePoint);
265-
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
272+
if (config.flags.ignoreCase && config.transform.unicodeFlag) {
266273
const folded = caseFold(codePoint);
267274
if (folded) {
268275
set.add(folded);
@@ -300,42 +307,84 @@ const processTerm = (item, regenerateOptions, groups) => {
300307
return item;
301308
};
302309

310+
// Enable every stable RegExp feature by default
311+
const regjsparserFeatures = {
312+
'unicodePropertyEscape': true,
313+
'namedGroups': true,
314+
'lookbehind': true
315+
};
316+
303317
const config = {
304-
'ignoreCase': false,
305-
'unicode': false,
306-
'dotAll': false,
307-
'useDotAllFlag': false,
308-
'useUnicodeFlag': false,
309-
'unicodePropertyEscape': false,
310-
'namedGroup': false
318+
'flags': {
319+
'ignoreCase': false,
320+
'unicode': false,
321+
'dotAll': false,
322+
},
323+
'transform': {
324+
'dotAllFlag': false,
325+
'unicodeFlag': false,
326+
'unicodePropertyEscapes': false,
327+
'namedGroups': false,
328+
},
329+
get useUnicodeFlag() {
330+
return this.flags.unicode && !this.transform.unicodeFlag;
331+
}
311332
};
312-
const rewritePattern = (pattern, flags, options) => {
313-
config.unicode = flags && flags.includes('u');
314-
const regjsparserFeatures = {
315-
'unicodePropertyEscape': config.unicode,
316-
'namedGroups': true,
317-
'lookbehind': options && options.lookbehind
318-
};
319-
config.ignoreCase = flags && flags.includes('i');
320-
const supportDotAllFlag = options && options.dotAllFlag;
321-
config.dotAll = supportDotAllFlag && flags && flags.includes('s');
322-
config.namedGroup = options && options.namedGroup;
323-
config.useDotAllFlag = options && options.useDotAllFlag;
324-
config.useUnicodeFlag = options && options.useUnicodeFlag;
325-
config.unicodePropertyEscape = options && options.unicodePropertyEscape;
326-
if (supportDotAllFlag && config.useDotAllFlag) {
327-
throw new Error('`useDotAllFlag` and `dotAllFlag` cannot both be true!');
333+
334+
const validateOptions = (options) => {
335+
if (!options) return;
336+
337+
for (const key of Object.keys(options)) {
338+
const value = options[key];
339+
switch (key) {
340+
case 'dotAllFlag':
341+
case 'unicodeFlag':
342+
case 'unicodePropertyEscapes':
343+
case 'namedGroups':
344+
if (value != null && value !== false && value !== 'transform') {
345+
throw new Error(`.${key} must be false (default) or 'transform'.`);
346+
}
347+
break;
348+
case 'onNamedGroup':
349+
if (value != null && typeof value !== 'function') {
350+
throw new Error('.onNamedGroup must be a function.');
351+
}
352+
break;
353+
default:
354+
throw new Error(`.${key} is not a valid regexpu-core option.`);
355+
}
328356
}
357+
};
358+
359+
const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;
360+
const transform = (options, name) => options ? options[name] === 'transform' : false;
361+
362+
const rewritePattern = (pattern, flags, options) => {
363+
validateOptions(options);
364+
365+
config.flags.unicode = hasFlag(flags, 'u');
366+
config.flags.ignoreCase = hasFlag(flags, 'i');
367+
config.flags.dotAll = hasFlag(flags, 's');
368+
369+
config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');
370+
config.transform.unicodeFlag = config.flags.unicode && transform(options, 'unicodeFlag');
371+
// unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
372+
config.transform.unicodePropertyEscapes = config.flags.unicode && (
373+
transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes')
374+
);
375+
config.transform.namedGroups = transform(options, 'namedGroups');
376+
329377
const regenerateOptions = {
330-
'hasUnicodeFlag': config.useUnicodeFlag,
331-
'bmpOnly': !config.unicode
378+
'hasUnicodeFlag': config.flags.unicode && !config.transform.unicodeFlag,
379+
'bmpOnly': !config.flags.unicode
332380
};
333381
const groups = {
334382
'onNamedGroup': options && options.onNamedGroup,
335383
'lastIndex': 0,
336384
'names': Object.create(null), // { [name]: index }
337385
'unmatchedReferences': Object.create(null) // { [name]: Array<reference> }
338386
};
387+
339388
const tree = parse(pattern, flags, regjsparserFeatures);
340389
// Note: `processTerm` mutates `tree` and `groups`.
341390
processTerm(tree, regenerateOptions, groups);

0 commit comments

Comments
 (0)