Skip to content

Commit 03c8c89

Browse files
Merge pull request #13171 from brendandahl/struct-tree
[api-minor] Add support for basic structure tree for accessibility.
2 parents b0473eb + fc9501a commit 03c8c89

22 files changed

+911
-14
lines changed

src/core/document.js

+20
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ import { calculateMD5 } from "./crypto.js";
5858
import { Linearization } from "./parser.js";
5959
import { OperatorList } from "./operator_list.js";
6060
import { PartialEvaluator } from "./evaluator.js";
61+
import { StructTreePage } from "./struct_tree.js";
6162
import { XFAFactory } from "./xfa/factory.js";
6263

6364
const DEFAULT_USER_UNIT = 1.0;
@@ -104,6 +105,10 @@ class Page {
104105
static createObjId() {
105106
return `p${pageIndex}_${++idCounters.obj}`;
106107
}
108+
109+
static getPageObjId() {
110+
return `page${ref.toString()}`;
111+
}
107112
};
108113
}
109114

@@ -406,6 +411,7 @@ class Page {
406411
handler,
407412
task,
408413
normalizeWhitespace,
414+
includeMarkedContent,
409415
sink,
410416
combineTextItems,
411417
}) {
@@ -437,12 +443,22 @@ class Page {
437443
task,
438444
resources: this.resources,
439445
normalizeWhitespace,
446+
includeMarkedContent,
440447
combineTextItems,
441448
sink,
442449
});
443450
});
444451
}
445452

453+
async getStructTree() {
454+
const structTreeRoot = await this.pdfManager.ensureCatalog(
455+
"structTreeRoot"
456+
);
457+
const tree = new StructTreePage(structTreeRoot, this.pageDict);
458+
tree.parse();
459+
return tree;
460+
}
461+
446462
getAnnotationsData(intent) {
447463
return this._parsedAnnotations.then(function (annotations) {
448464
const annotationsData = [];
@@ -604,6 +620,10 @@ class PDFDocument {
604620
static createObjId() {
605621
unreachable("Abstract method `createObjId` called.");
606622
}
623+
624+
static getPageObjId() {
625+
unreachable("Abstract method `getPageObjId` called.");
626+
}
607627
};
608628
}
609629

src/core/evaluator.js

+38-1
Original file line numberDiff line numberDiff line change
@@ -1913,7 +1913,10 @@ class PartialEvaluator {
19131913
return;
19141914
}
19151915
// Other marked content types aren't supported yet.
1916-
args = [args[0].name];
1916+
args = [
1917+
args[0].name,
1918+
args[1] instanceof Dict ? args[1].get("MCID") : null,
1919+
];
19171920

19181921
break;
19191922
case OPS.beginMarkedContent:
@@ -1973,6 +1976,7 @@ class PartialEvaluator {
19731976
stateManager = null,
19741977
normalizeWhitespace = false,
19751978
combineTextItems = false,
1979+
includeMarkedContent = false,
19761980
sink,
19771981
seenStyles = new Set(),
19781982
}) {
@@ -2573,6 +2577,7 @@ class PartialEvaluator {
25732577
stateManager: xObjStateManager,
25742578
normalizeWhitespace,
25752579
combineTextItems,
2580+
includeMarkedContent,
25762581
sink: sinkWrapper,
25772582
seenStyles,
25782583
})
@@ -2650,6 +2655,38 @@ class PartialEvaluator {
26502655
})
26512656
);
26522657
return;
2658+
case OPS.beginMarkedContent:
2659+
if (includeMarkedContent) {
2660+
textContent.items.push({
2661+
type: "beginMarkedContent",
2662+
tag: isName(args[0]) ? args[0].name : null,
2663+
});
2664+
}
2665+
break;
2666+
case OPS.beginMarkedContentProps:
2667+
if (includeMarkedContent) {
2668+
flushTextContentItem();
2669+
let mcid = null;
2670+
if (isDict(args[1])) {
2671+
mcid = args[1].get("MCID");
2672+
}
2673+
textContent.items.push({
2674+
type: "beginMarkedContentProps",
2675+
id: Number.isInteger(mcid)
2676+
? `${self.idFactory.getPageObjId()}_mcid${mcid}`
2677+
: null,
2678+
tag: isName(args[0]) ? args[0].name : null,
2679+
});
2680+
}
2681+
break;
2682+
case OPS.endMarkedContent:
2683+
if (includeMarkedContent) {
2684+
flushTextContentItem();
2685+
textContent.items.push({
2686+
type: "endMarkedContent",
2687+
});
2688+
}
2689+
break;
26532690
} // switch
26542691
if (textContent.items.length >= sink.desiredSize) {
26552692
// Wait for ready, if we reach highWaterMark.

src/core/obj.js

+28-1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ import { CipherTransformFactory } from "./crypto.js";
6060
import { ColorSpace } from "./colorspace.js";
6161
import { GlobalImageCache } from "./image_utils.js";
6262
import { MetadataParser } from "./metadata_parser.js";
63+
import { StructTreeRoot } from "./struct_tree.js";
6364

6465
function fetchDestination(dest) {
6566
return isDict(dest) ? dest.get("D") : dest;
@@ -200,6 +201,32 @@ class Catalog {
200201
return markInfo;
201202
}
202203

204+
get structTreeRoot() {
205+
let structTree = null;
206+
try {
207+
structTree = this._readStructTreeRoot();
208+
} catch (ex) {
209+
if (ex instanceof MissingDataException) {
210+
throw ex;
211+
}
212+
warn("Unable read to structTreeRoot info.");
213+
}
214+
return shadow(this, "structTreeRoot", structTree);
215+
}
216+
217+
/**
218+
* @private
219+
*/
220+
_readStructTreeRoot() {
221+
const obj = this._catDict.get("StructTreeRoot");
222+
if (!isDict(obj)) {
223+
return null;
224+
}
225+
const root = new StructTreeRoot(obj);
226+
root.init();
227+
return root;
228+
}
229+
203230
get toplevelPagesDict() {
204231
const pagesObj = this._catDict.get("Pages");
205232
if (!isDict(pagesObj)) {
@@ -2626,4 +2653,4 @@ const ObjectLoader = (function () {
26262653
return ObjectLoader;
26272654
})();
26282655

2629-
export { Catalog, FileSpec, ObjectLoader, XRef };
2656+
export { Catalog, FileSpec, NumberTree, ObjectLoader, XRef };

0 commit comments

Comments
 (0)