Skip to content

Commit 4e025e1

Browse files
Merge pull request #15105 from calixteman/15094
Always flush the current item with MarkedContent stuff when getting text (#15094)
2 parents f208107 + 3789dab commit 4e025e1

File tree

2 files changed

+28
-3
lines changed

2 files changed

+28
-3
lines changed

src/core/evaluator.js

+3-2
Original file line numberDiff line numberDiff line change
@@ -3290,6 +3290,7 @@ class PartialEvaluator {
32903290
);
32913291
return;
32923292
case OPS.beginMarkedContent:
3293+
flushTextContentItem();
32933294
if (includeMarkedContent) {
32943295
textContent.items.push({
32953296
type: "beginMarkedContent",
@@ -3298,8 +3299,8 @@ class PartialEvaluator {
32983299
}
32993300
break;
33003301
case OPS.beginMarkedContentProps:
3302+
flushTextContentItem();
33013303
if (includeMarkedContent) {
3302-
flushTextContentItem();
33033304
let mcid = null;
33043305
if (args[1] instanceof Dict) {
33053306
mcid = args[1].get("MCID");
@@ -3314,8 +3315,8 @@ class PartialEvaluator {
33143315
}
33153316
break;
33163317
case OPS.endMarkedContent:
3318+
flushTextContentItem();
33173319
if (includeMarkedContent) {
3318-
flushTextContentItem();
33193320
textContent.items.push({
33203321
type: "endMarkedContent",
33213322
});

test/unit/api_spec.js

+25-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ describe("api", function () {
7878
}
7979

8080
function mergeText(items) {
81-
return items.map(chunk => chunk.str + (chunk.hasEOL ? "\n" : "")).join("");
81+
return items
82+
.map(chunk => (chunk.str ?? "") + (chunk.hasEOL ? "\n" : ""))
83+
.join("");
8284
}
8385

8486
describe("getDocument", function () {
@@ -2275,6 +2277,28 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
22752277
await loadingTask.destroy();
22762278
});
22772279

2280+
it("gets text content with or without includeMarkedContent, and compare (issue 15094)", async function () {
2281+
if (isNodeJS) {
2282+
pending("Linked test-cases are not supported in Node.js.");
2283+
}
2284+
2285+
const loadingTask = getDocument(buildGetDocumentParams("pdf.pdf"));
2286+
const pdfDoc = await loadingTask.promise;
2287+
const pdfPage = await pdfDoc.getPage(568);
2288+
let { items } = await pdfPage.getTextContent({
2289+
includeMarkedContent: false,
2290+
});
2291+
const textWithoutMC = mergeText(items);
2292+
({ items } = await pdfPage.getTextContent({
2293+
includeMarkedContent: true,
2294+
}));
2295+
const textWithMC = mergeText(items);
2296+
2297+
expect(textWithoutMC).toEqual(textWithMC);
2298+
2299+
await loadingTask.destroy();
2300+
});
2301+
22782302
it("gets empty structure tree", async function () {
22792303
const tree = await page.getStructTree();
22802304

0 commit comments

Comments
 (0)