@@ -129,12 +129,14 @@ const caseFold = (codePoint) => {
129
129
} ;
130
130
131
131
const processCharacterClass = ( characterClassItem , regenerateOptions ) => {
132
+ let transformed = config . transform . unicodeFlag ;
133
+ const negative = characterClassItem . negative ;
132
134
const set = regenerate ( ) ;
133
135
for ( const item of characterClassItem . body ) {
134
136
switch ( item . type ) {
135
137
case 'value' :
136
138
set . add ( item . codePoint ) ;
137
- if ( config . ignoreCase && config . unicode && ! config . useUnicodeFlag ) {
139
+ if ( config . flags . ignoreCase && config . transform . unicodeFlag ) {
138
140
const folded = caseFold ( item . codePoint ) ;
139
141
if ( folded ) {
140
142
set . add ( folded ) ;
@@ -145,19 +147,22 @@ const processCharacterClass = (characterClassItem, regenerateOptions) => {
145
147
const min = item . min . codePoint ;
146
148
const max = item . max . codePoint ;
147
149
set . addRange ( min , max ) ;
148
- if ( config . ignoreCase && config . unicode && ! config . useUnicodeFlag ) {
150
+ if ( config . flags . ignoreCase && config . transform . unicodeFlag ) {
149
151
set . iuAddRange ( min , max ) ;
150
152
}
151
153
break ;
152
154
case 'characterClassEscape' :
153
155
set . add ( getCharacterClassEscapeSet (
154
156
item . value ,
155
- config . unicode ,
156
- config . ignoreCase
157
+ config . flags . unicode ,
158
+ config . flags . ignoreCase
157
159
) ) ;
158
160
break ;
159
161
case 'unicodePropertyEscape' :
160
162
set . add ( getUnicodePropertyEscapeSet ( item . value , item . negative ) ) ;
163
+ if ( config . transform . unicodePropertyEscapes ) {
164
+ transformed = true ;
165
+ }
161
166
break ;
162
167
// The `default` clause is only here as a safeguard; it should never be
163
168
// reached. Code coverage tools should ignore it.
@@ -166,10 +171,12 @@ const processCharacterClass = (characterClassItem, regenerateOptions) => {
166
171
throw new Error ( `Unknown term type: ${ item . type } ` ) ;
167
172
}
168
173
}
169
- if ( characterClassItem . negative ) {
170
- update ( characterClassItem , `(?!${ set . toString ( regenerateOptions ) } )[\\s\\S]` )
171
- } else {
172
- update ( characterClassItem , set . toString ( regenerateOptions ) ) ;
174
+ if ( transformed ) {
175
+ if ( negative ) {
176
+ update ( characterClassItem , `(?!${ set . toString ( regenerateOptions ) } )[\\s\\S]` )
177
+ } else {
178
+ update ( characterClassItem , set . toString ( regenerateOptions ) ) ;
179
+ }
173
180
}
174
181
return characterClassItem ;
175
182
} ;
@@ -189,14 +196,12 @@ const assertNoUnmatchedReferences = (groups) => {
189
196
const processTerm = ( item , regenerateOptions , groups ) => {
190
197
switch ( item . type ) {
191
198
case 'dot' :
192
- if ( config . useDotAllFlag ) {
193
- break ;
194
- } else if ( config . unicode ) {
199
+ if ( config . transform . unicodeFlag ) {
195
200
update (
196
201
item ,
197
- getUnicodeDotSet ( config . dotAll ) . toString ( regenerateOptions )
202
+ getUnicodeDotSet ( config . flags . dotAll ) . toString ( regenerateOptions )
198
203
) ;
199
- } else if ( config . dotAll ) {
204
+ } else if ( config . transform . dotAllFlag ) {
200
205
// TODO: consider changing this at the regenerate level.
201
206
update ( item , '[\\s\\S]' ) ;
202
207
}
@@ -205,7 +210,7 @@ const processTerm = (item, regenerateOptions, groups) => {
205
210
item = processCharacterClass ( item , regenerateOptions ) ;
206
211
break ;
207
212
case 'unicodePropertyEscape' :
208
- if ( config . unicodePropertyEscape ) {
213
+ if ( config . transform . unicodePropertyEscapes ) {
209
214
update (
210
215
item ,
211
216
getUnicodePropertyEscapeSet ( item . value , item . negative )
@@ -214,20 +219,22 @@ const processTerm = (item, regenerateOptions, groups) => {
214
219
}
215
220
break ;
216
221
case 'characterClassEscape' :
217
- update (
218
- item ,
219
- getCharacterClassEscapeSet (
220
- item . value ,
221
- config . unicode ,
222
- config . ignoreCase
223
- ) . toString ( regenerateOptions )
224
- ) ;
222
+ if ( config . transform . unicodeFlag ) {
223
+ update (
224
+ item ,
225
+ getCharacterClassEscapeSet (
226
+ item . value ,
227
+ /* config.transform.unicodeFlag implies config.flags.unicode */ true ,
228
+ config . flags . ignoreCase
229
+ ) . toString ( regenerateOptions )
230
+ ) ;
231
+ }
225
232
break ;
226
233
case 'group' :
227
234
if ( item . behavior == 'normal' ) {
228
235
groups . lastIndex ++ ;
229
236
}
230
- if ( item . name && config . namedGroup ) {
237
+ if ( item . name && config . transform . namedGroups ) {
231
238
const name = item . name . value ;
232
239
233
240
if ( groups . names [ name ] ) {
@@ -262,7 +269,7 @@ const processTerm = (item, regenerateOptions, groups) => {
262
269
case 'value' :
263
270
const codePoint = item . codePoint ;
264
271
const set = regenerate ( codePoint ) ;
265
- if ( config . ignoreCase && config . unicode && ! config . useUnicodeFlag ) {
272
+ if ( config . flags . ignoreCase && config . transform . unicodeFlag ) {
266
273
const folded = caseFold ( codePoint ) ;
267
274
if ( folded ) {
268
275
set . add ( folded ) ;
@@ -300,42 +307,84 @@ const processTerm = (item, regenerateOptions, groups) => {
300
307
return item ;
301
308
} ;
302
309
310
+ // Enable every stable RegExp feature by default
311
+ const regjsparserFeatures = {
312
+ 'unicodePropertyEscape' : true ,
313
+ 'namedGroups' : true ,
314
+ 'lookbehind' : true
315
+ } ;
316
+
303
317
const config = {
304
- 'ignoreCase' : false ,
305
- 'unicode' : false ,
306
- 'dotAll' : false ,
307
- 'useDotAllFlag' : false ,
308
- 'useUnicodeFlag' : false ,
309
- 'unicodePropertyEscape' : false ,
310
- 'namedGroup' : false
318
+ 'flags' : {
319
+ 'ignoreCase' : false ,
320
+ 'unicode' : false ,
321
+ 'dotAll' : false ,
322
+ } ,
323
+ 'transform' : {
324
+ 'dotAllFlag' : false ,
325
+ 'unicodeFlag' : false ,
326
+ 'unicodePropertyEscapes' : false ,
327
+ 'namedGroups' : false ,
328
+ } ,
329
+ get useUnicodeFlag ( ) {
330
+ return this . flags . unicode && ! this . transform . unicodeFlag ;
331
+ }
311
332
} ;
312
- const rewritePattern = ( pattern , flags , options ) => {
313
- config . unicode = flags && flags . includes ( 'u' ) ;
314
- const regjsparserFeatures = {
315
- 'unicodePropertyEscape' : config . unicode ,
316
- 'namedGroups' : true ,
317
- 'lookbehind' : options && options . lookbehind
318
- } ;
319
- config . ignoreCase = flags && flags . includes ( 'i' ) ;
320
- const supportDotAllFlag = options && options . dotAllFlag ;
321
- config . dotAll = supportDotAllFlag && flags && flags . includes ( 's' ) ;
322
- config . namedGroup = options && options . namedGroup ;
323
- config . useDotAllFlag = options && options . useDotAllFlag ;
324
- config . useUnicodeFlag = options && options . useUnicodeFlag ;
325
- config . unicodePropertyEscape = options && options . unicodePropertyEscape ;
326
- if ( supportDotAllFlag && config . useDotAllFlag ) {
327
- throw new Error ( '`useDotAllFlag` and `dotAllFlag` cannot both be true!' ) ;
333
+
334
+ const validateOptions = ( options ) => {
335
+ if ( ! options ) return ;
336
+
337
+ for ( const key of Object . keys ( options ) ) {
338
+ const value = options [ key ] ;
339
+ switch ( key ) {
340
+ case 'dotAllFlag' :
341
+ case 'unicodeFlag' :
342
+ case 'unicodePropertyEscapes' :
343
+ case 'namedGroups' :
344
+ if ( value != null && value !== false && value !== 'transform' ) {
345
+ throw new Error ( `.${ key } must be false (default) or 'transform'.` ) ;
346
+ }
347
+ break ;
348
+ case 'onNamedGroup' :
349
+ if ( value != null && typeof value !== 'function' ) {
350
+ throw new Error ( '.onNamedGroup must be a function.' ) ;
351
+ }
352
+ break ;
353
+ default :
354
+ throw new Error ( `.${ key } is not a valid regexpu-core option.` ) ;
355
+ }
328
356
}
357
+ } ;
358
+
359
+ const hasFlag = ( flags , flag ) => flags ? flags . includes ( flag ) : false ;
360
+ const transform = ( options , name ) => options ? options [ name ] === 'transform' : false ;
361
+
362
+ const rewritePattern = ( pattern , flags , options ) => {
363
+ validateOptions ( options ) ;
364
+
365
+ config . flags . unicode = hasFlag ( flags , 'u' ) ;
366
+ config . flags . ignoreCase = hasFlag ( flags , 'i' ) ;
367
+ config . flags . dotAll = hasFlag ( flags , 's' ) ;
368
+
369
+ config . transform . dotAllFlag = config . flags . dotAll && transform ( options , 'dotAllFlag' ) ;
370
+ config . transform . unicodeFlag = config . flags . unicode && transform ( options , 'unicodeFlag' ) ;
371
+ // unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
372
+ config . transform . unicodePropertyEscapes = config . flags . unicode && (
373
+ transform ( options , 'unicodeFlag' ) || transform ( options , 'unicodePropertyEscapes' )
374
+ ) ;
375
+ config . transform . namedGroups = transform ( options , 'namedGroups' ) ;
376
+
329
377
const regenerateOptions = {
330
- 'hasUnicodeFlag' : config . useUnicodeFlag ,
331
- 'bmpOnly' : ! config . unicode
378
+ 'hasUnicodeFlag' : config . flags . unicode && ! config . transform . unicodeFlag ,
379
+ 'bmpOnly' : ! config . flags . unicode
332
380
} ;
333
381
const groups = {
334
382
'onNamedGroup' : options && options . onNamedGroup ,
335
383
'lastIndex' : 0 ,
336
384
'names' : Object . create ( null ) , // { [name]: index }
337
385
'unmatchedReferences' : Object . create ( null ) // { [name]: Array<reference> }
338
386
} ;
387
+
339
388
const tree = parse ( pattern , flags , regjsparserFeatures ) ;
340
389
// Note: `processTerm` mutates `tree` and `groups`.
341
390
processTerm ( tree , regenerateOptions , groups ) ;
0 commit comments