Skip to content

Commit 287c4b7

Browse files
JoshuaKGoldbergfasttimemdjermanovic
authored
feat: no-misleading-character-class granular errors (#17515)
* feat: `no-misleading-character-class` granular errors * fix: column offsets * fix: missing CallExpression * Apply suggestions from code review Co-authored-by: Francesco Trotta <[email protected]> * All tests passing again * Edge case: quadruple back slashes * Apply suggestions from code review Co-authored-by: Francesco Trotta <[email protected]> * Update lib/rules/no-misleading-character-class.js Co-authored-by: Francesco Trotta <[email protected]> * Adjusted for repeat characters * Separated kinds into a Set * Update lib/rules/no-misleading-character-class.js Co-authored-by: Francesco Trotta <[email protected]> * Adjust unit tests for accepted changes * Split into adjustment function for sequence pairs * Reduced complexity because of edge cases, as reqiested * Use chars array as suggested - mostly there * Checked that slices include first and last * Used sourceCode.getText(node) over node.raw, and added comments * Update lib/rules/no-misleading-character-class.js * Drastically limit applicability * Restricted regexp literals on \u * Update lib/rules/no-misleading-character-class.js Co-authored-by: Milos Djermanovic <[email protected]> * Corrected erroneous escaped u exclusion * Update lib/rules/no-misleading-character-class.js Co-authored-by: Francesco Trotta <[email protected]> * If a report range is missing, skip other reports * Update lib/rules/no-misleading-character-class.js Co-authored-by: Milos Djermanovic <[email protected]> * Merge branch 'main' * Implement fasttime suggestion for a Map * nit: for loop * Unify zwj reports * Added a couple of multiline tests * use .at(-1) --------- Co-authored-by: Francesco Trotta <[email protected]> Co-authored-by: Milos Djermanovic <[email protected]>
1 parent d31c180 commit 287c4b7

File tree

2 files changed

+777
-152
lines changed

2 files changed

+777
-152
lines changed

lib/rules/no-misleading-character-class.js

+186-70
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ function *iterateCharacterSequence(nodes) {
6262
}
6363
}
6464

65-
6665
/**
6766
* Checks whether the given character node is a Unicode code point escape or not.
6867
* @param {Character} char the character node to check.
@@ -73,80 +72,120 @@ function isUnicodeCodePointEscape(char) {
7372
}
7473

7574
/**
76-
* Each function returns `true` if it detects that kind of problem.
77-
* @type {Record<string, (chars: Character[]) => boolean>}
75+
* Each function returns matched characters if it detects that kind of problem.
76+
* @type {Record<string, (char: Character, index: number, chars: Character[]) => Character[] | null>}
7877
*/
79-
const hasCharacterSequence = {
80-
surrogatePairWithoutUFlag(chars) {
81-
return chars.some((c, i) => {
82-
if (i === 0) {
83-
return false;
84-
}
85-
const c1 = chars[i - 1];
86-
87-
return (
88-
isSurrogatePair(c1.value, c.value) &&
89-
!isUnicodeCodePointEscape(c1) &&
90-
!isUnicodeCodePointEscape(c)
91-
);
92-
});
78+
const characterSequenceIndexFilters = {
79+
surrogatePairWithoutUFlag(char, index, chars) {
80+
if (index === 0) {
81+
return null;
82+
}
83+
84+
const previous = chars[index - 1];
85+
86+
if (
87+
isSurrogatePair(previous.value, char.value) &&
88+
!isUnicodeCodePointEscape(previous) &&
89+
!isUnicodeCodePointEscape(char)
90+
) {
91+
return [previous, char];
92+
}
93+
94+
return null;
9395
},
9496

95-
surrogatePair(chars) {
96-
return chars.some((c, i) => {
97-
if (i === 0) {
98-
return false;
99-
}
100-
const c1 = chars[i - 1];
101-
102-
return (
103-
isSurrogatePair(c1.value, c.value) &&
104-
(
105-
isUnicodeCodePointEscape(c1) ||
106-
isUnicodeCodePointEscape(c)
107-
)
108-
);
109-
});
97+
surrogatePair(char, index, chars) {
98+
if (index === 0) {
99+
return null;
100+
}
101+
102+
const previous = chars[index - 1];
103+
104+
if (
105+
isSurrogatePair(previous.value, char.value) &&
106+
(
107+
isUnicodeCodePointEscape(previous) ||
108+
isUnicodeCodePointEscape(char)
109+
)
110+
) {
111+
return [previous, char];
112+
}
113+
114+
return null;
110115
},
111116

112-
combiningClass(chars) {
113-
return chars.some((c, i) => (
114-
i !== 0 &&
115-
isCombiningCharacter(c.value) &&
116-
!isCombiningCharacter(chars[i - 1].value)
117-
));
117+
combiningClass(char, index, chars) {
118+
if (
119+
index !== 0 &&
120+
isCombiningCharacter(char.value) &&
121+
!isCombiningCharacter(chars[index - 1].value)
122+
) {
123+
return [chars[index - 1], char];
124+
}
125+
126+
return null;
118127
},
119128

120-
emojiModifier(chars) {
121-
return chars.some((c, i) => (
122-
i !== 0 &&
123-
isEmojiModifier(c.value) &&
124-
!isEmojiModifier(chars[i - 1].value)
125-
));
129+
emojiModifier(char, index, chars) {
130+
if (
131+
index !== 0 &&
132+
isEmojiModifier(char.value) &&
133+
!isEmojiModifier(chars[index - 1].value)
134+
) {
135+
return [chars[index - 1], char];
136+
}
137+
138+
return null;
126139
},
127140

128-
regionalIndicatorSymbol(chars) {
129-
return chars.some((c, i) => (
130-
i !== 0 &&
131-
isRegionalIndicatorSymbol(c.value) &&
132-
isRegionalIndicatorSymbol(chars[i - 1].value)
133-
));
141+
regionalIndicatorSymbol(char, index, chars) {
142+
if (
143+
index !== 0 &&
144+
isRegionalIndicatorSymbol(char.value) &&
145+
isRegionalIndicatorSymbol(chars[index - 1].value)
146+
) {
147+
return [chars[index - 1], char];
148+
}
149+
150+
return null;
134151
},
135152

136-
zwj(chars) {
137-
const lastIndex = chars.length - 1;
153+
zwj(char, index, chars) {
154+
if (
155+
index !== 0 &&
156+
index !== chars.length - 1 &&
157+
char.value === 0x200d &&
158+
chars[index - 1].value !== 0x200d &&
159+
chars[index + 1].value !== 0x200d
160+
) {
161+
return chars.slice(index - 1, index + 2);
162+
}
138163

139-
return chars.some((c, i) => (
140-
i !== 0 &&
141-
i !== lastIndex &&
142-
c.value === 0x200d &&
143-
chars[i - 1].value !== 0x200d &&
144-
chars[i + 1].value !== 0x200d
145-
));
164+
return null;
146165
}
147166
};
148167

149-
const kinds = Object.keys(hasCharacterSequence);
168+
const kinds = Object.keys(characterSequenceIndexFilters);
169+
170+
/**
171+
* Collects the indices where the filter returns an array.
172+
* @param {Character[]} chars Characters to run the filter on.
173+
* @param {(char: Character, index: number, chars: Character[]) => Character[] | null} filter Finds matches for an index.
174+
* @returns {Character[][]} Indices where the filter returned true.
175+
*/
176+
function accumulate(chars, filter) {
177+
const matchingChars = [];
178+
179+
chars.forEach((char, index) => {
180+
const matches = filter(char, index, chars);
181+
182+
if (matches) {
183+
matchingChars.push(matches);
184+
}
185+
});
186+
187+
return matchingChars;
188+
}
150189

151190
//------------------------------------------------------------------------------
152191
// Rule Definition
@@ -181,6 +220,62 @@ module.exports = {
181220
const sourceCode = context.sourceCode;
182221
const parser = new RegExpParser();
183222

223+
/**
224+
* Generates a granular loc for context.report, if directly calculable.
225+
* @param {Character[]} chars Individual characters being reported on.
226+
* @param {Node} node Parent string node to report within.
227+
* @returns {Object | null} Granular loc for context.report, if directly calculable.
228+
* @see https://github.com/eslint/eslint/pull/17515
229+
*/
230+
function generateReportLocation(chars, node) {
231+
232+
// Limit to to literals and expression-less templates with raw values === their value.
233+
switch (node.type) {
234+
case "TemplateLiteral":
235+
if (node.expressions.length || node.quasis[0].value.raw !== node.quasis[0].value.cooked) {
236+
return null;
237+
}
238+
break;
239+
240+
case "Literal":
241+
if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) {
242+
return null;
243+
}
244+
break;
245+
246+
default:
247+
return null;
248+
}
249+
250+
return {
251+
start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start),
252+
end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end)
253+
};
254+
}
255+
256+
/**
257+
* Finds the report loc(s) for a range of matches.
258+
* @param {Character[][]} matches Characters that should trigger a report.
259+
* @param {Node} node The node to report.
260+
* @returns {Object | null} Node loc(s) for context.report.
261+
*/
262+
function getNodeReportLocations(matches, node) {
263+
const locs = [];
264+
265+
for (const chars of matches) {
266+
const loc = generateReportLocation(chars, node);
267+
268+
// If a report can't match to a range, don't report any others
269+
if (!loc) {
270+
return [node.loc];
271+
}
272+
273+
locs.push(loc);
274+
}
275+
276+
return locs;
277+
}
278+
184279
/**
185280
* Verify a given regular expression.
186281
* @param {Node} node The node to report.
@@ -208,21 +303,26 @@ module.exports = {
208303
return;
209304
}
210305

211-
const foundKinds = new Set();
306+
const foundKindMatches = new Map();
212307

213308
visitRegExpAST(patternNode, {
214309
onCharacterClassEnter(ccNode) {
215310
for (const chars of iterateCharacterSequence(ccNode.elements)) {
216311
for (const kind of kinds) {
217-
if (hasCharacterSequence[kind](chars)) {
218-
foundKinds.add(kind);
312+
const matches = accumulate(chars, characterSequenceIndexFilters[kind]);
313+
314+
if (foundKindMatches.has(kind)) {
315+
foundKindMatches.get(kind).push(...matches);
316+
} else {
317+
foundKindMatches.set(kind, matches);
219318
}
319+
220320
}
221321
}
222322
}
223323
});
224324

225-
for (const kind of foundKinds) {
325+
for (const [kind, matches] of foundKindMatches) {
226326
let suggest;
227327

228328
if (kind === "surrogatePairWithoutUFlag") {
@@ -232,11 +332,27 @@ module.exports = {
232332
}];
233333
}
234334

235-
context.report({
236-
node,
237-
messageId: kind,
238-
suggest
239-
});
335+
const locs = getNodeReportLocations(matches, node);
336+
337+
// Grapheme zero-width joiners (e.g. in 👨‍👩‍👦) visually show as one emoji
338+
if (kind === "zwj" && locs.length > 1) {
339+
context.report({
340+
loc: {
341+
start: locs[0].start,
342+
end: locs[1].end
343+
},
344+
messageId: kind,
345+
suggest
346+
});
347+
} else {
348+
for (const loc of locs) {
349+
context.report({
350+
loc,
351+
messageId: kind,
352+
suggest
353+
});
354+
}
355+
}
240356
}
241357
}
242358

@@ -267,7 +383,7 @@ module.exports = {
267383
const flags = getStringIfConstant(flagsNode, scope);
268384

269385
if (typeof pattern === "string") {
270-
verify(refNode, pattern, flags || "", fixer => {
386+
verify(patternNode, pattern, flags || "", fixer => {
271387

272388
if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
273389
return null;

0 commit comments

Comments
 (0)