@@ -60,7 +60,7 @@ function normalize(text) {
60
60
// Compile the regular expression for text normalization once.
61
61
const replace = Object . keys ( CHARACTERS_TO_NORMALIZE ) . join ( "" ) ;
62
62
normalizationRegex = new RegExp (
63
- `([${ replace } ])|(-\\n)|(\\n)|(\\p{Mn}+)` ,
63
+ `([${ replace } ])|(-\\n)|(\\n)|(\\p{Mn}+)|(\\p{Cf}+) ` ,
64
64
"gum"
65
65
) ;
66
66
}
@@ -109,7 +109,7 @@ function normalize(text) {
109
109
110
110
normalized = normalized . replace (
111
111
normalizationRegex ,
112
- ( match , p1 , p2 , p3 , p4 , i ) => {
112
+ ( match , p1 , p2 , p3 , p4 , p5 , i ) => {
113
113
i -= shiftOrigin ;
114
114
if ( p1 ) {
115
115
// Fractions...
@@ -142,6 +142,13 @@ function normalize(text) {
142
142
return " " ;
143
143
}
144
144
145
+ if ( p5 ) {
146
+ // Invisible formatting indicators.
147
+ positions . push ( [ i - shift , p5 . length + shift ] ) ;
148
+ shift += p5 . length ;
149
+ return "" ;
150
+ }
151
+
145
152
// Diacritics.
146
153
hasDiacritics = true ;
147
154
let jj = match . length ;
@@ -477,10 +484,10 @@ class PDFFindController {
477
484
} ) ;
478
485
}
479
486
480
- // Replace spaces by \s + to be sure to match any spaces.
487
+ // Replace spaces by [\s\p{Zs}] + to be sure to match any spaces.
481
488
// We must do it after the if (matchDiacritcs) block to avoid
482
489
// wrong things with the "s".
483
- query = query . replace ( whitespacesRegExp , "\\s+" ) ;
490
+ query = query . replace ( whitespacesRegExp , "[ \\s\\p{Zs}] +" ) ;
484
491
485
492
return query ;
486
493
}
0 commit comments