Skip to content

Commit a20b623

Browse files
committed
WIP
1 parent d92e0b2 commit a20b623

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

web/pdf_find_controller.js

+11-4
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ function normalize(text) {
6060
// Compile the regular expression for text normalization once.
6161
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
6262
normalizationRegex = new RegExp(
63-
`([${replace}])|(-\\n)|(\\n)|(\\p{Mn}+)`,
63+
`([${replace}])|(-\\n)|(\\n)|(\\p{Mn}+)|(\\p{Cf}+)`,
6464
"gum"
6565
);
6666
}
@@ -109,7 +109,7 @@ function normalize(text) {
109109

110110
normalized = normalized.replace(
111111
normalizationRegex,
112-
(match, p1, p2, p3, p4, i) => {
112+
(match, p1, p2, p3, p4, p5, i) => {
113113
i -= shiftOrigin;
114114
if (p1) {
115115
// Fractions...
@@ -142,6 +142,13 @@ function normalize(text) {
142142
return " ";
143143
}
144144

145+
if (p5) {
146+
// Invisible formatting indicators.
147+
positions.push([i - shift, p5.length + shift]);
148+
shift += p5.length;
149+
return "";
150+
}
151+
145152
// Diacritics.
146153
hasDiacritics = true;
147154
let jj = match.length;
@@ -477,10 +484,10 @@ class PDFFindController {
477484
});
478485
}
479486

480-
// Replace spaces by \s+ to be sure to match any spaces.
487+
// Replace spaces by [\s\p{Zs}]+ to be sure to match any spaces.
481488
// We must do it after the if (matchDiacritcs) block to avoid
482489
// wrong things with the "s".
483-
query = query.replace(whitespacesRegExp, "\\s+");
490+
query = query.replace(whitespacesRegExp, "[\\s\\p{Zs}]+");
484491

485492
return query;
486493
}

0 commit comments

Comments
 (0)