Skip to content

Commit d92e0b2

Browse files
committed
Support search with or without diacritics
- get original index in using a dichotomic seach instead of a linear one; - remove diacritics from text using NFD decomposition and unicode regex; - convert the query string into a RegExp; - replace whitespaces in the query with \s+;
1 parent f5b79be commit d92e0b2

11 files changed

+345
-176
lines changed

l10n/en-US/viewer.properties

+1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ find_next.title=Find the next occurrence of the phrase
168168
find_next_label=Next
169169
find_highlight=Highlight all
170170
find_match_case_label=Match case
171+
find_match_diacritics_label=Match Diacritics
171172
find_entire_word_label=Whole words
172173
find_reached_top=Reached top of document, continued from bottom
173174
find_reached_bottom=Reached end of document, continued from top

test/pdfs/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@
343343
!issue4650.pdf
344344
!issue6721_reduced.pdf
345345
!issue3025.pdf
346+
!french_diacritics.pdf
346347
!issue2099-1.pdf
347348
!issue3371.pdf
348349
!issue2956.pdf

test/pdfs/french_diacritics.pdf

10.3 KB
Binary file not shown.

test/unit/pdf_find_controller_spec.js

+125
Original file line numberDiff line numberDiff line change
@@ -271,5 +271,130 @@ describe("pdf_find_controller", function () {
271271
pageMatches: [[19, 48, 66]],
272272
pageMatchesLength: [[8, 8, 8]],
273273
});
274+
275+
await testSearch({
276+
eventBus,
277+
pdfFindController,
278+
parameters: {
279+
query: "1/2",
280+
caseSensitive: false,
281+
entireWord: false,
282+
phraseSearch: true,
283+
findPrevious: false,
284+
},
285+
matchesPerPage: [2],
286+
selectedMatch: {
287+
pageIndex: 0,
288+
matchIndex: 0,
289+
},
290+
pageMatches: [[28, 57]],
291+
pageMatchesLength: [[1, 1]],
292+
});
293+
294+
await testSearch({
295+
eventBus,
296+
pdfFindController,
297+
parameters: {
298+
query: "½",
299+
caseSensitive: false,
300+
entireWord: false,
301+
phraseSearch: true,
302+
findPrevious: false,
303+
},
304+
matchesPerPage: [2],
305+
selectedMatch: {
306+
pageIndex: 0,
307+
matchIndex: 0,
308+
},
309+
pageMatches: [[28, 57]],
310+
pageMatchesLength: [[1, 1]],
311+
});
312+
});
313+
314+
it("performs a normal search, where the text with diacritics is normalized", async function () {
315+
const { eventBus, pdfFindController } = await initPdfFindController(
316+
"french_diacritics.pdf"
317+
);
318+
319+
await testSearch({
320+
eventBus,
321+
pdfFindController,
322+
parameters: {
323+
query: "a",
324+
caseSensitive: false,
325+
entireWord: false,
326+
phraseSearch: true,
327+
findPrevious: false,
328+
matchDiacritics: false,
329+
},
330+
matchesPerPage: [6],
331+
selectedMatch: {
332+
pageIndex: 0,
333+
matchIndex: 0,
334+
},
335+
pageMatches: [[0, 2, 4, 6, 8, 10]],
336+
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
337+
});
338+
339+
await testSearch({
340+
eventBus,
341+
pdfFindController,
342+
parameters: {
343+
query: "u",
344+
caseSensitive: false,
345+
entireWord: false,
346+
phraseSearch: true,
347+
findPrevious: false,
348+
matchDiacritics: false,
349+
},
350+
matchesPerPage: [6],
351+
selectedMatch: {
352+
pageIndex: 0,
353+
matchIndex: 0,
354+
},
355+
pageMatches: [[44, 46, 48, 50, 52, 54]],
356+
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
357+
});
358+
359+
await testSearch({
360+
eventBus,
361+
pdfFindController,
362+
parameters: {
363+
query: "ë",
364+
caseSensitive: false,
365+
entireWord: false,
366+
phraseSearch: true,
367+
findPrevious: false,
368+
matchDiacritics: true,
369+
},
370+
matchesPerPage: [2],
371+
selectedMatch: {
372+
pageIndex: 0,
373+
matchIndex: 0,
374+
},
375+
pageMatches: [[28, 30]],
376+
pageMatchesLength: [[1, 1]],
377+
});
378+
});
379+
380+
it("performs a search where one of the results contains an hyphen", async function () {
381+
const { eventBus, pdfFindController } = await initPdfFindController();
382+
383+
await testSearch({
384+
eventBus,
385+
pdfFindController,
386+
parameters: {
387+
query: "optimiz",
388+
caseSensitive: false,
389+
entireWord: false,
390+
phraseSearch: true,
391+
findPrevious: false,
392+
},
393+
matchesPerPage: [1, 4, 2, 3, 3, 0, 2, 9, 1, 0, 0, 6, 3, 4],
394+
selectedMatch: {
395+
pageIndex: 0,
396+
matchIndex: 0,
397+
},
398+
});
274399
});
275400
});

web/app.js

+3
Original file line numberDiff line numberDiff line change
@@ -2605,6 +2605,7 @@ function webViewerFind(evt) {
26052605
entireWord: evt.entireWord,
26062606
highlightAll: evt.highlightAll,
26072607
findPrevious: evt.findPrevious,
2608+
matchDiacritics: evt.matchDiacritics,
26082609
});
26092610
}
26102611

@@ -2616,6 +2617,7 @@ function webViewerFindFromUrlHash(evt) {
26162617
entireWord: false,
26172618
highlightAll: true,
26182619
findPrevious: false,
2620+
matchDiacritics: true,
26192621
});
26202622
}
26212623

@@ -2820,6 +2822,7 @@ function webViewerKeyDown(evt) {
28202822
entireWord: findState.entireWord,
28212823
highlightAll: findState.highlightAll,
28222824
findPrevious: cmd === 5 || cmd === 12,
2825+
matchDiacritics: findState.matchDiacritics,
28232826
});
28242827
}
28252828
handled = true;

web/firefoxcom.js

+2
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ class MozL10n {
218218
"findcasesensitivitychange",
219219
"findentirewordchange",
220220
"findbarclose",
221+
"finddiacriticmatchingchange",
221222
];
222223
const handleEvent = function ({ type, detail }) {
223224
if (!PDFViewerApplication.initialized) {
@@ -236,6 +237,7 @@ class MozL10n {
236237
entireWord: !!detail.entireWord,
237238
highlightAll: !!detail.highlightAll,
238239
findPrevious: !!detail.findPrevious,
240+
matchDiacritics: !!detail.matchDiacritics,
239241
});
240242
};
241243

web/pdf_find_bar.js

+6
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class PDFFindBar {
3333
this.highlightAll = options.highlightAllCheckbox;
3434
this.caseSensitive = options.caseSensitiveCheckbox;
3535
this.entireWord = options.entireWordCheckbox;
36+
this.matchDiacritics = options.matchDiacriticsCheckbox;
3637
this.findMsg = options.findMsg;
3738
this.findResultsCount = options.findResultsCount;
3839
this.findPreviousButton = options.findPreviousButton;
@@ -82,6 +83,10 @@ class PDFFindBar {
8283
this.dispatchEvent("entirewordchange");
8384
});
8485

86+
this.matchDiacritics.addEventListener("click", () => {
87+
this.dispatchEvent("diacriticmatchingchange");
88+
});
89+
8590
this.eventBus._on("resize", this._adjustWidth.bind(this));
8691
}
8792

@@ -99,6 +104,7 @@ class PDFFindBar {
99104
entireWord: this.entireWord.checked,
100105
highlightAll: this.highlightAll.checked,
101106
findPrevious: findPrev,
107+
matchDiacritics: this.matchDiacritics.checked,
102108
});
103109
}
104110

0 commit comments

Comments
 (0)