Skip to content

Commit 584de91

Browse files
Jarred-Sumnerpaperclover
authored andcommitted
Revert "decode regex if needed (oven-sh#5167)"
This reverts commit 32664df.
1 parent 1bea5f9 commit 584de91

File tree

5 files changed

+126
-167
lines changed

5 files changed

+126
-167
lines changed

src/js_ast.zig

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2538,10 +2538,7 @@ pub const E = struct {
25382538
};
25392539

25402540
pub const RegExp = struct {
2541-
data: union(enum) {
2542-
raw: string,
2543-
decoded: bun.BabyList(u16),
2544-
},
2541+
value: string,
25452542

25462543
// This exists for JavaScript bindings
25472544
// The RegExp constructor expects flags as a second argument.
@@ -2551,7 +2548,7 @@ pub const E = struct {
25512548
// ^
25522549
flags_offset: ?u16 = null,
25532550

2554-
pub var empty = RegExp{ .data = .{ .raw = "" } };
2551+
pub var empty = RegExp{ .value = "" };
25552552

25562553
pub fn pattern(this: RegExp) string {
25572554

src/js_lexer.zig

Lines changed: 26 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ fn NewLexer_(
295295
this.comments_to_preserve_before.clearAndFree();
296296
}
297297

298-
pub fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, comptime BufType: type, buf_: *BufType) !void {
298+
fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, comptime BufType: type, buf_: *BufType) !void {
299299
var buf = buf_.*;
300300
defer buf_.* = buf;
301301
if (comptime is_json) lexer.is_ascii_only = false;
@@ -2075,11 +2075,9 @@ fn NewLexer_(
20752075
if (comptime is_json) unreachable;
20762076
}
20772077

2078-
// returns true of the regex contents need to be decoded
2079-
pub fn scanRegExp(lexer: *LexerType) !bool {
2078+
pub fn scanRegExp(lexer: *LexerType) !void {
20802079
lexer.assertNotJSON();
20812080
lexer.regex_flags_start = null;
2082-
var decode = lexer.code_point >= 0x80;
20832081
while (true) {
20842082
switch (lexer.code_point) {
20852083
'/' => {
@@ -2123,48 +2121,20 @@ fn NewLexer_(
21232121
},
21242122
}
21252123
}
2126-
2127-
return decode;
2124+
return;
21282125
},
21292126
'[' => {
21302127
lexer.step();
2131-
if (lexer.code_point >= 0x80) decode = true;
21322128
while (lexer.code_point != ']') {
2133-
try lexer.scanRegExpValidateAndStep(&decode);
2129+
try lexer.scanRegExpValidateAndStep();
21342130
}
21352131
lexer.step();
2136-
if (lexer.code_point >= 0x80) decode = true;
21372132
},
21382133
else => {
2139-
try lexer.scanRegExpValidateAndStep(&decode);
2134+
try lexer.scanRegExpValidateAndStep();
21402135
},
21412136
}
21422137
}
2143-
2144-
return decode;
2145-
}
2146-
2147-
fn scanRegExpValidateAndStep(lexer: *LexerType, decode: *bool) !void {
2148-
lexer.assertNotJSON();
2149-
2150-
if (lexer.code_point == '\\') {
2151-
lexer.step();
2152-
if (lexer.code_point >= 0x80) decode.* = true;
2153-
}
2154-
2155-
switch (lexer.code_point) {
2156-
'\r', '\n', 0x2028, 0x2029 => {
2157-
// Newlines aren't allowed in regular expressions
2158-
try lexer.syntaxError();
2159-
},
2160-
-1 => { // EOF
2161-
try lexer.syntaxError();
2162-
},
2163-
else => {
2164-
lexer.step();
2165-
if (lexer.code_point >= 0x80) decode.* = true;
2166-
},
2167-
}
21682138
}
21692139

21702140
// TODO: use wtf-8 encoding.
@@ -2622,6 +2592,27 @@ fn NewLexer_(
26222592
try lexer.nextInsideJSXElement();
26232593
}
26242594

2595+
fn scanRegExpValidateAndStep(lexer: *LexerType) !void {
2596+
lexer.assertNotJSON();
2597+
2598+
if (lexer.code_point == '\\') {
2599+
lexer.step();
2600+
}
2601+
2602+
switch (lexer.code_point) {
2603+
'\r', '\n', 0x2028, 0x2029 => {
2604+
// Newlines aren't allowed in regular expressions
2605+
try lexer.syntaxError();
2606+
},
2607+
-1 => { // EOF
2608+
try lexer.syntaxError();
2609+
},
2610+
else => {
2611+
lexer.step();
2612+
},
2613+
}
2614+
}
2615+
26252616
pub fn rescanCloseBraceAsTemplateToken(lexer: *LexerType) !void {
26262617
lexer.assertNotJSON();
26272618

src/js_parser.zig

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13095,39 +13095,13 @@ fn NewParser_(
1309513095
return p.newExpr(E.BigInt{ .value = value }, loc);
1309613096
},
1309713097
.t_slash, .t_slash_equals => {
13098-
const needs_decode = try p.lexer.scanRegExp();
13098+
try p.lexer.scanRegExp();
1309913099
// always set regex_flags_start to null to make sure we don't accidentally use the wrong value later
1310013100
defer p.lexer.regex_flags_start = null;
13101-
13102-
const raw = p.lexer.raw();
13103-
13104-
if (!needs_decode) {
13105-
try p.lexer.next();
13106-
return p.newExpr(
13107-
E.RegExp{
13108-
.data = .{
13109-
.raw = raw,
13110-
},
13111-
.flags_offset = p.lexer.regex_flags_start,
13112-
},
13113-
loc,
13114-
);
13115-
}
13116-
13117-
var buf = std.ArrayList(u16).initCapacity(p.allocator, raw.len) catch unreachable;
13118-
try p.lexer.decodeEscapeSequences(p.lexer.start, raw, @TypeOf(buf), &buf);
13119-
13101+
const value = p.lexer.raw();
1312013102
try p.lexer.next();
1312113103

13122-
return p.newExpr(
13123-
E.RegExp{
13124-
.data = .{
13125-
.decoded = bun.BabyList(u16).init(buf.items),
13126-
},
13127-
.flags_offset = p.lexer.regex_flags_start,
13128-
},
13129-
loc,
13130-
);
13104+
return p.newExpr(E.RegExp{ .value = value, .flags_offset = p.lexer.regex_flags_start }, loc);
1313113105
},
1313213106
.t_void => {
1313313107
try p.lexer.next();

0 commit comments

Comments
 (0)