Skip to content

Commit 73ad20e

Browse files
committed
Support search with or without diacritics
- get original index in using a dichotomic seach instead of a linear one; - remove diacritics from text using NFD decomposition and unicode regex; - convert the query string into a RegExp; - replace whitespaces in the query with \s+; - remove pdf_find_utils.js.
1 parent f6f3351 commit 73ad20e

15 files changed

+318
-310
lines changed

l10n/en-US/viewer.properties

+1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ find_next.title=Find the next occurrence of the phrase
168168
find_next_label=Next
169169
find_highlight=Highlight all
170170
find_match_case_label=Match case
171+
find_match_diacritics_label=Match Diacritics
171172
find_entire_word_label=Whole words
172173
find_reached_top=Reached top of document, continued from bottom
173174
find_reached_bottom=Reached end of document, continued from top

test/pdfs/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@
321321
!issue4650.pdf
322322
!issue6721_reduced.pdf
323323
!issue3025.pdf
324+
!french_diacritics.pdf
324325
!issue2099-1.pdf
325326
!issue3371.pdf
326327
!issue2956.pdf

test/pdfs/french_diacritics.pdf

10.3 KB
Binary file not shown.

test/unit/clitests.json

-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
"node_stream_spec.js",
3131
"parser_spec.js",
3232
"pdf_find_controller_spec.js",
33-
"pdf_find_utils_spec.js",
3433
"pdf_history_spec.js",
3534
"primitives_spec.js",
3635
"stream_spec.js",

test/unit/jasmine-boot.js

-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ async function initializePDFJS(callback) {
7575
"pdfjs-test/unit/network_utils_spec.js",
7676
"pdfjs-test/unit/parser_spec.js",
7777
"pdfjs-test/unit/pdf_find_controller_spec.js",
78-
"pdfjs-test/unit/pdf_find_utils_spec.js",
7978
"pdfjs-test/unit/pdf_history_spec.js",
8079
"pdfjs-test/unit/primitives_spec.js",
8180
"pdfjs-test/unit/scripting_spec.js",

test/unit/pdf_find_controller_spec.js

+125-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ function testSearch({
108108
return;
109109
}
110110
eventBus.off("updatefindmatchescount", onUpdateFindMatchesCount);
111-
112111
expect(evt.matchesCount.total).toBe(totalMatches);
113112
for (let i = 0; i < totalPages; i++) {
114113
expect(pdfFindController.pageMatches[i].length).toEqual(
@@ -271,5 +270,130 @@ describe("pdf_find_controller", function () {
271270
pageMatches: [[19, 48, 66]],
272271
pageMatchesLength: [[8, 8, 8]],
273272
});
273+
274+
await testSearch({
275+
eventBus,
276+
pdfFindController,
277+
parameters: {
278+
query: "1/2",
279+
caseSensitive: false,
280+
entireWord: false,
281+
phraseSearch: true,
282+
findPrevious: false,
283+
},
284+
matchesPerPage: [2],
285+
selectedMatch: {
286+
pageIndex: 0,
287+
matchIndex: 0,
288+
},
289+
pageMatches: [[28, 57]],
290+
pageMatchesLength: [[1, 1]],
291+
});
292+
293+
await testSearch({
294+
eventBus,
295+
pdfFindController,
296+
parameters: {
297+
query: "½",
298+
caseSensitive: false,
299+
entireWord: false,
300+
phraseSearch: true,
301+
findPrevious: false,
302+
},
303+
matchesPerPage: [2],
304+
selectedMatch: {
305+
pageIndex: 0,
306+
matchIndex: 0,
307+
},
308+
pageMatches: [[28, 57]],
309+
pageMatchesLength: [[1, 1]],
310+
});
311+
});
312+
313+
it("performs a normal search, where the text with diacritics is normalized", async function () {
314+
const { eventBus, pdfFindController } = await initPdfFindController(
315+
"french_diacritics.pdf"
316+
);
317+
318+
await testSearch({
319+
eventBus,
320+
pdfFindController,
321+
parameters: {
322+
query: "a",
323+
caseSensitive: false,
324+
entireWord: false,
325+
phraseSearch: true,
326+
findPrevious: false,
327+
matchDiacritics: false,
328+
},
329+
matchesPerPage: [6],
330+
selectedMatch: {
331+
pageIndex: 0,
332+
matchIndex: 0,
333+
},
334+
pageMatches: [[0, 2, 4, 6, 8, 10]],
335+
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
336+
});
337+
338+
await testSearch({
339+
eventBus,
340+
pdfFindController,
341+
parameters: {
342+
query: "u",
343+
caseSensitive: false,
344+
entireWord: false,
345+
phraseSearch: true,
346+
findPrevious: false,
347+
matchDiacritics: false,
348+
},
349+
matchesPerPage: [6],
350+
selectedMatch: {
351+
pageIndex: 0,
352+
matchIndex: 0,
353+
},
354+
pageMatches: [[44, 46, 48, 50, 52, 54]],
355+
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
356+
});
357+
358+
await testSearch({
359+
eventBus,
360+
pdfFindController,
361+
parameters: {
362+
query: "ë",
363+
caseSensitive: false,
364+
entireWord: false,
365+
phraseSearch: true,
366+
findPrevious: false,
367+
matchDiacritics: true,
368+
},
369+
matchesPerPage: [2],
370+
selectedMatch: {
371+
pageIndex: 0,
372+
matchIndex: 0,
373+
},
374+
pageMatches: [[28, 30]],
375+
pageMatchesLength: [[1, 1]],
376+
});
377+
});
378+
379+
it("performs a search where one of the results contains an hyphen", async function () {
380+
const { eventBus, pdfFindController } = await initPdfFindController();
381+
382+
await testSearch({
383+
eventBus,
384+
pdfFindController,
385+
parameters: {
386+
query: "optimiz",
387+
caseSensitive: false,
388+
entireWord: false,
389+
phraseSearch: true,
390+
findPrevious: false,
391+
},
392+
matchesPerPage: [1, 4, 2, 3, 3, 0, 2, 9, 1, 0, 0, 6, 3, 4],
393+
selectedMatch: {
394+
pageIndex: 0,
395+
matchIndex: 0,
396+
},
397+
});
274398
});
275399
});

test/unit/pdf_find_utils_spec.js

-56
This file was deleted.

web/app.js

+3
Original file line numberDiff line numberDiff line change
@@ -2608,6 +2608,7 @@ function webViewerFind(evt) {
26082608
entireWord: evt.entireWord,
26092609
highlightAll: evt.highlightAll,
26102610
findPrevious: evt.findPrevious,
2611+
matchDiacritics: evt.matchDiacritics,
26112612
});
26122613
}
26132614

@@ -2619,6 +2620,7 @@ function webViewerFindFromUrlHash(evt) {
26192620
entireWord: false,
26202621
highlightAll: true,
26212622
findPrevious: false,
2623+
matchDiacritics: true,
26222624
});
26232625
}
26242626

@@ -2825,6 +2827,7 @@ function webViewerKeyDown(evt) {
28252827
entireWord: findState.entireWord,
28262828
highlightAll: findState.highlightAll,
28272829
findPrevious: cmd === 5 || cmd === 12,
2830+
matchDiacritics: findState.matchDiacritics,
28282831
});
28292832
}
28302833
handled = true;

web/firefoxcom.js

+2
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ class MozL10n {
218218
"findcasesensitivitychange",
219219
"findentirewordchange",
220220
"findbarclose",
221+
"finddiacriticmatchingchange",
221222
];
222223
const handleEvent = function ({ type, detail }) {
223224
if (!PDFViewerApplication.initialized) {
@@ -236,6 +237,7 @@ class MozL10n {
236237
entireWord: !!detail.entireWord,
237238
highlightAll: !!detail.highlightAll,
238239
findPrevious: !!detail.findPrevious,
240+
matchDiacritics: !!detail.matchDiacritics,
239241
});
240242
};
241243

web/pdf_find_bar.js

+6
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class PDFFindBar {
3333
this.highlightAll = options.highlightAllCheckbox;
3434
this.caseSensitive = options.caseSensitiveCheckbox;
3535
this.entireWord = options.entireWordCheckbox;
36+
this.matchDiacritics = options.matchDiacriticsCheckbox;
3637
this.findMsg = options.findMsg;
3738
this.findResultsCount = options.findResultsCount;
3839
this.findPreviousButton = options.findPreviousButton;
@@ -82,6 +83,10 @@ class PDFFindBar {
8283
this.dispatchEvent("entirewordchange");
8384
});
8485

86+
this.matchDiacritics.addEventListener("click", () => {
87+
this.dispatchEvent("diacriticmatchingchange");
88+
});
89+
8590
this.eventBus._on("resize", this._adjustWidth.bind(this));
8691
}
8792

@@ -99,6 +104,7 @@ class PDFFindBar {
99104
entireWord: this.entireWord.checked,
100105
highlightAll: this.highlightAll.checked,
101106
findPrevious: findPrev,
107+
matchDiacritics: this.matchDiacritics.checked,
102108
});
103109
}
104110

0 commit comments

Comments
 (0)