Skip to content

Commit 7579644

Browse files
committed
[Annotations] Some annotations can have their values stored in the xfa:datasets
- it aims to fix #14685; - add a basic object to get values from the parsed datasets; - these annotations don't have an appearance so we must create one when printing or saving.
1 parent d6592b5 commit 7579644

File tree

6 files changed

+178
-13
lines changed

6 files changed

+178
-13
lines changed

src/core/annotation.js

+37-12
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,16 @@ class AnnotationFactory {
7272
static create(xref, ref, pdfManager, idFactory, collectFields) {
7373
return Promise.all([
7474
pdfManager.ensureCatalog("acroForm"),
75+
pdfManager.ensureDoc("xfaDatasets"),
7576
collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1,
76-
]).then(([acroForm, pageIndex]) =>
77+
]).then(([acroForm, xfaDatasets, pageIndex]) =>
7778
pdfManager.ensure(this, "_create", [
7879
xref,
7980
ref,
8081
pdfManager,
8182
idFactory,
8283
acroForm,
84+
xfaDatasets,
8385
collectFields,
8486
pageIndex,
8587
])
@@ -95,6 +97,7 @@ class AnnotationFactory {
9597
pdfManager,
9698
idFactory,
9799
acroForm,
100+
xfaDatasets,
98101
collectFields,
99102
pageIndex = -1
100103
) {
@@ -119,6 +122,7 @@ class AnnotationFactory {
119122
id,
120123
pdfManager,
121124
acroForm: acroForm instanceof Dict ? acroForm : Dict.empty,
125+
xfaDatasets,
122126
collectFields,
123127
pageIndex,
124128
};
@@ -1237,7 +1241,7 @@ class WidgetAnnotation extends Annotation {
12371241
);
12381242
}
12391243

1240-
const fieldValue = getInheritableProperty({
1244+
let fieldValue = getInheritableProperty({
12411245
dict,
12421246
key: "V",
12431247
getArray: true,
@@ -1251,6 +1255,15 @@ class WidgetAnnotation extends Annotation {
12511255
});
12521256
data.defaultFieldValue = this._decodeFormValue(defaultFieldValue);
12531257

1258+
if (fieldValue === undefined && params.xfaDatasets) {
1259+
// Try to figure out if we have something in the xfa dataset.
1260+
const path = stringToPDFString(dict.get("T") || "");
1261+
if (path) {
1262+
data.hasValueFromXFA = true;
1263+
data.fieldValue = fieldValue = params.xfaDatasets.getValue(path);
1264+
}
1265+
}
1266+
12541267
// When no "V" entry exists, let the fieldValue fallback to the "DV" entry
12551268
// (fixes issue13823.pdf).
12561269
if (fieldValue === undefined && data.defaultFieldValue !== null) {
@@ -1401,17 +1414,20 @@ class WidgetAnnotation extends Annotation {
14011414
}
14021415

14031416
async save(evaluator, task, annotationStorage) {
1404-
if (!annotationStorage) {
1405-
return null;
1406-
}
1407-
const storageEntry = annotationStorage.get(this.data.id);
1408-
const value = storageEntry && storageEntry.value;
1417+
const storageEntry = annotationStorage
1418+
? annotationStorage.get(this.data.id)
1419+
: undefined;
1420+
let value = storageEntry && storageEntry.value;
14091421
if (value === this.data.fieldValue || value === undefined) {
1410-
return null;
1422+
if (!this.data.hasValueFromXFA) {
1423+
return null;
1424+
}
1425+
value = value || this.data.fieldValue;
14111426
}
14121427

14131428
// Value can be an array (with choice list and multiple selections)
14141429
if (
1430+
!this.data.hasValueFromXFA &&
14151431
Array.isArray(value) &&
14161432
Array.isArray(this.data.fieldValue) &&
14171433
value.length === this.data.fieldValue.length &&
@@ -1493,14 +1509,23 @@ class WidgetAnnotation extends Annotation {
14931509

14941510
async _getAppearance(evaluator, task, annotationStorage) {
14951511
const isPassword = this.hasFieldFlag(AnnotationFieldFlag.PASSWORD);
1496-
if (!annotationStorage || isPassword) {
1512+
if (isPassword) {
14971513
return null;
14981514
}
1499-
const storageEntry = annotationStorage.get(this.data.id);
1515+
const storageEntry = annotationStorage
1516+
? annotationStorage.get(this.data.id)
1517+
: undefined;
15001518
let value = storageEntry && storageEntry.value;
15011519
if (value === undefined) {
1502-
// The annotation hasn't been rendered so use the appearance
1503-
return null;
1520+
if (!this.data.hasValueFromXFA || this.appearance) {
1521+
// The annotation hasn't been rendered so use the appearance.
1522+
return null;
1523+
}
1524+
// The annotation has its value in XFA datasets but not in the V field.
1525+
value = this.data.fieldValue;
1526+
if (!value) {
1527+
return "";
1528+
}
15041529
}
15051530

15061531
value = value.trim();

src/core/dataset_reader.js

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/* Copyright 2022 Mozilla Foundation
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
import { parseXFAPath } from "./core_utils.js";
17+
import { SimpleXMLParser } from "./xml_parser.js";
18+
19+
class DataSetXMLParser extends SimpleXMLParser {
20+
constructor(options) {
21+
super(options);
22+
this.node = null;
23+
}
24+
25+
onEndElement(name) {
26+
const node = super.onEndElement(name);
27+
if (node && name === "xfa:datasets") {
28+
this.node = node;
29+
30+
// We don't need anything else, so just kill the parser.
31+
throw new Error();
32+
}
33+
}
34+
}
35+
36+
class DatasetReader {
37+
constructor(data) {
38+
if (data.datasets) {
39+
this.node = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
40+
data.datasets
41+
).documentElement;
42+
} else {
43+
const parser = new DataSetXMLParser({ hasAttributes: true });
44+
try {
45+
parser.parseFromString(data.xdp);
46+
} catch (_) {}
47+
this.node = parser.node;
48+
}
49+
}
50+
51+
getValue(path) {
52+
if (!this.node || !path) {
53+
return "";
54+
}
55+
const node = this.node.searchNode(parseXFAPath(path), 0);
56+
57+
if (!node) {
58+
return "";
59+
}
60+
61+
const first = node.firstChild;
62+
if (first && first.nodeName === "value") {
63+
return node.children.map(child => child.textContent);
64+
}
65+
66+
return node.textContent;
67+
}
68+
}
69+
70+
export { DatasetReader };

src/core/document.js

+42
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import { BaseStream } from "./base_stream.js";
4747
import { calculateMD5 } from "./crypto.js";
4848
import { Catalog } from "./catalog.js";
4949
import { clearGlobalCaches } from "./cleanup_helper.js";
50+
import { DatasetReader } from "./dataset_reader.js";
5051
import { Linearization } from "./parser.js";
5152
import { NullStream } from "./stream.js";
5253
import { ObjectLoader } from "./object_loader.js";
@@ -820,6 +821,47 @@ class PDFDocument {
820821
});
821822
}
822823

824+
get xfaDatasets() {
825+
const acroForm = this.catalog.acroForm;
826+
if (!acroForm) {
827+
return shadow(this, "xfaDatasets", null);
828+
}
829+
830+
const xfa = acroForm.get("XFA");
831+
if (xfa instanceof BaseStream && !xfa.isEmpty) {
832+
try {
833+
const xdp = stringToUTF8String(xfa.getString());
834+
return shadow(this, "xfaDatasets", new DatasetReader({ xdp }));
835+
} catch (_) {
836+
warn("XFA - Invalid utf-8 string.");
837+
return shadow(this, "xfaDatasets", null);
838+
}
839+
}
840+
841+
if (!Array.isArray(xfa) || xfa.length === 0) {
842+
return null;
843+
}
844+
845+
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
846+
if (xfa[i] !== "datasets") {
847+
continue;
848+
}
849+
const data = this.xref.fetchIfRef(xfa[i + 1]);
850+
if (!(data instanceof BaseStream) || data.isEmpty) {
851+
continue;
852+
}
853+
try {
854+
const datasets = stringToUTF8String(data.getString());
855+
return shadow(this, "xfaDatasets", new DatasetReader({ datasets }));
856+
} catch (_) {
857+
warn("XFA - Invalid utf-8 string.");
858+
return shadow(this, "xfaDatasets", null);
859+
}
860+
}
861+
862+
return shadow(this, "xfaDatasets", null);
863+
}
864+
823865
get xfaData() {
824866
const acroForm = this.catalog.acroForm;
825867
if (!acroForm) {

src/core/xml_parser.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ class SimpleDOMNode {
328328
.join("");
329329
}
330330

331+
get children() {
332+
return this.childNodes;
333+
}
334+
331335
hasChildNodes() {
332336
return this.childNodes && this.childNodes.length > 0;
333337
}
@@ -492,11 +496,12 @@ class SimpleXMLParser extends XMLParserBase {
492496
this._currentFragment = this._stack.pop() || [];
493497
const lastElement = this._currentFragment[this._currentFragment.length - 1];
494498
if (!lastElement) {
495-
return;
499+
return null;
496500
}
497501
for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
498502
lastElement.childNodes[i].parentNode = lastElement;
499503
}
504+
return lastElement;
500505
}
501506

502507
onError(code) {

test/pdfs/issue14685.pdf.link

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://github.com/mozilla/pdf.js/files/8283456/1647183160545.pdf

test/test_manifest.json

+22
Original file line numberDiff line numberDiff line change
@@ -6332,5 +6332,27 @@
63326332
"md5": "5d1bfcc3b3130bfa7e33e43990e2213a",
63336333
"rounds": 1,
63346334
"type": "text"
6335+
},
6336+
{ "id": "issue14685",
6337+
"file": "pdfs/issue14685.pdf",
6338+
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
6339+
"rounds": 1,
6340+
"link": true,
6341+
"lastPage": 1,
6342+
"type": "eq"
6343+
},
6344+
{ "id": "issue14685-print",
6345+
"file": "pdfs/issue14685.pdf",
6346+
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
6347+
"rounds": 1,
6348+
"link": true,
6349+
"lastPage": 1,
6350+
"type": "eq",
6351+
"print": true,
6352+
"annotationStorage": {
6353+
"150R": {
6354+
"value": "Hello PDF.js World"
6355+
}
6356+
}
63356357
}
63366358
]

0 commit comments

Comments
 (0)