Skip to content

Commit 68b99c5

Browse files
Save form data in XFA datasets when pdf is a mix of acroforms and xfa (#12344)
* Move display/xml_parser.js in shared to use it in worker * Save form data in XFA datasets when pdf is a mix of acroforms and xfa Co-authored-by: Brendan Dahl <[email protected]>
1 parent 622e2fb commit 68b99c5

File tree

11 files changed

+416
-19
lines changed

11 files changed

+416
-19
lines changed

src/core/annotation.js

+25-6
Original file line numberDiff line numberDiff line change
@@ -1073,18 +1073,23 @@ class WidgetAnnotation extends Annotation {
10731073
return null;
10741074
}
10751075

1076+
const value = annotationStorage[this.data.id];
10761077
const bbox = [
10771078
0,
10781079
0,
10791080
this.data.rect[2] - this.data.rect[0],
10801081
this.data.rect[3] - this.data.rect[1],
10811082
];
10821083

1084+
const xfa = {
1085+
path: stringToPDFString(dict.get("T") || ""),
1086+
value,
1087+
};
1088+
10831089
const newRef = evaluator.xref.getNewRef();
10841090
const AP = new Dict(evaluator.xref);
10851091
AP.set("N", newRef);
10861092

1087-
const value = annotationStorage[this.data.id];
10881093
const encrypt = evaluator.xref.encrypt;
10891094
let originalTransform = null;
10901095
let newTransform = null;
@@ -1120,9 +1125,9 @@ class WidgetAnnotation extends Annotation {
11201125
return [
11211126
// data for the original object
11221127
// V field changed + reference for new AP
1123-
{ ref: this.ref, data: bufferOriginal.join("") },
1128+
{ ref: this.ref, data: bufferOriginal.join(""), xfa },
11241129
// data for the new AP
1125-
{ ref: newRef, data: bufferNew.join("") },
1130+
{ ref: newRef, data: bufferNew.join(""), xfa: null },
11261131
];
11271132
}
11281133

@@ -1521,6 +1526,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
15211526
return null;
15221527
}
15231528

1529+
const xfa = {
1530+
path: stringToPDFString(dict.get("T") || ""),
1531+
value: value ? this.data.exportValue : "",
1532+
};
1533+
15241534
const name = Name.get(value ? this.data.exportValue : "Off");
15251535
dict.set("V", name);
15261536
dict.set("AS", name);
@@ -1539,7 +1549,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
15391549
writeDict(dict, buffer, originalTransform);
15401550
buffer.push("\nendobj\n");
15411551

1542-
return [{ ref: this.ref, data: buffer.join("") }];
1552+
return [{ ref: this.ref, data: buffer.join(""), xfa }];
15431553
}
15441554

15451555
async _saveRadioButton(evaluator, task, annotationStorage) {
@@ -1555,6 +1565,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
15551565
return null;
15561566
}
15571567

1568+
const xfa = {
1569+
path: stringToPDFString(dict.get("T") || ""),
1570+
value: value ? this.data.buttonValue : "",
1571+
};
1572+
15581573
const name = Name.get(value ? this.data.buttonValue : "Off");
15591574
let parentBuffer = null;
15601575
const encrypt = evaluator.xref.encrypt;
@@ -1593,9 +1608,13 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
15931608
writeDict(dict, buffer, originalTransform);
15941609
buffer.push("\nendobj\n");
15951610

1596-
const newRefs = [{ ref: this.ref, data: buffer.join("") }];
1611+
const newRefs = [{ ref: this.ref, data: buffer.join(""), xfa }];
15971612
if (parentBuffer !== null) {
1598-
newRefs.push({ ref: this.parent, data: parentBuffer.join("") });
1613+
newRefs.push({
1614+
ref: this.parent,
1615+
data: parentBuffer.join(""),
1616+
xfa: null,
1617+
});
15991618
}
16001619

16011620
return newRefs;

src/core/worker.js

+27-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import {
3232
VerbosityLevel,
3333
warn,
3434
} from "../shared/util.js";
35-
import { clearPrimitiveCaches, Ref } from "./primitives.js";
35+
import { clearPrimitiveCaches, Dict, isDict, Ref } from "./primitives.js";
3636
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
3737
import { incrementalUpdate } from "./writer.js";
3838
import { isNodeJS } from "../shared/is_node.js";
@@ -521,7 +521,10 @@ class WorkerMessageHandler {
521521
filename,
522522
}) {
523523
pdfManager.requestLoadedStream();
524-
const promises = [pdfManager.onLoadedStream()];
524+
const promises = [
525+
pdfManager.onLoadedStream(),
526+
pdfManager.ensureCatalog("acroForm"),
527+
];
525528
const document = pdfManager.pdfDocument;
526529
for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
527530
promises.push(
@@ -532,7 +535,7 @@ class WorkerMessageHandler {
532535
);
533536
}
534537

535-
return Promise.all(promises).then(([stream, ...refs]) => {
538+
return Promise.all(promises).then(([stream, acroForm, ...refs]) => {
536539
let newRefs = [];
537540
for (const ref of refs) {
538541
newRefs = ref
@@ -545,6 +548,20 @@ class WorkerMessageHandler {
545548
return stream.bytes;
546549
}
547550

551+
acroForm = isDict(acroForm) ? acroForm : Dict.empty;
552+
const xfa = acroForm.get("XFA") || [];
553+
let xfaDatasets = null;
554+
if (Array.isArray(xfa)) {
555+
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
556+
if (xfa[i] === "datasets") {
557+
xfaDatasets = xfa[i + 1];
558+
}
559+
}
560+
} else {
561+
// TODO: Support XFA streams.
562+
warn("Unsupported XFA type.");
563+
}
564+
548565
const xref = document.xref;
549566
let newXrefInfo = Object.create(null);
550567
if (xref.trailer) {
@@ -572,7 +589,13 @@ class WorkerMessageHandler {
572589
}
573590
xref.resetNewRef();
574591

575-
return incrementalUpdate(stream.bytes, newXrefInfo, newRefs);
592+
return incrementalUpdate(
593+
stream.bytes,
594+
newXrefInfo,
595+
newRefs,
596+
xref,
597+
xfaDatasets
598+
);
576599
});
577600
});
578601

src/core/writer.js

+56-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,14 @@
1414
*/
1515
/* eslint no-var: error */
1616

17-
import { bytesToString, escapeString } from "../shared/util.js";
17+
import {
18+
bytesToString,
19+
escapeString,
20+
parseXFAPath,
21+
warn,
22+
} from "../shared/util.js";
1823
import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js";
24+
import { SimpleDOMNode, SimpleXMLParser } from "../shared/xml_parser.js";
1925
import { calculateMD5 } from "./crypto.js";
2026

2127
function writeDict(dict, buffer, transform) {
@@ -123,7 +129,55 @@ function computeMD5(filesize, xrefInfo) {
123129
return bytesToString(calculateMD5(array));
124130
}
125131

126-
function incrementalUpdate(originalData, xrefInfo, newRefs) {
132+
function updateXFA(datasetsRef, newRefs, xref) {
133+
if (datasetsRef === null || xref === null) {
134+
return;
135+
}
136+
const datasets = xref.fetchIfRef(datasetsRef);
137+
const str = bytesToString(datasets.getBytes());
138+
const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString(
139+
str
140+
);
141+
142+
for (const { xfa } of newRefs) {
143+
if (!xfa) {
144+
continue;
145+
}
146+
const { path, value } = xfa;
147+
if (!path) {
148+
continue;
149+
}
150+
const node = xml.documentElement.searchNode(parseXFAPath(path), 0);
151+
if (node) {
152+
node.childNodes = [new SimpleDOMNode("#text", value)];
153+
} else {
154+
warn(`Node not found for path: ${path}`);
155+
}
156+
}
157+
const buffer = [];
158+
xml.documentElement.dump(buffer);
159+
let updatedXml = buffer.join("");
160+
161+
const encrypt = xref.encrypt;
162+
if (encrypt) {
163+
const transform = encrypt.createCipherTransform(
164+
datasetsRef.num,
165+
datasetsRef.gen
166+
);
167+
updatedXml = transform.encryptString(updatedXml);
168+
}
169+
const data =
170+
`${datasetsRef.num} ${datasetsRef.gen} obj\n` +
171+
`<< /Type /EmbeddedFile /Length ${updatedXml.length}>>\nstream\n` +
172+
updatedXml +
173+
"\nendstream\nendobj\n";
174+
175+
newRefs.push({ ref: datasetsRef, data });
176+
}
177+
178+
function incrementalUpdate(originalData, xrefInfo, newRefs, xref, datasetsRef) {
179+
updateXFA(datasetsRef, newRefs, xref);
180+
127181
const newXref = new Dict(null);
128182
const refForXrefTable = xrefInfo.newRef;
129183

src/display/metadata.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*/
1515

1616
import { assert } from "../shared/util.js";
17-
import { SimpleXMLParser } from "./xml_parser.js";
17+
import { SimpleXMLParser } from "../shared/xml_parser.js";
1818

1919
class Metadata {
2020
constructor(data) {

src/shared/util.js

+69
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,73 @@ const createObjectURL = (function createObjectURLClosure() {
910910
};
911911
})();
912912

913+
/**
914+
* AcroForm field names use an array like notation to refer to
915+
* repeated XFA elements e.g. foo.bar[nnn].
916+
* see: XFA Spec Chapter 3 - Repeated Elements
917+
*
918+
* @param {string} path - XFA path name.
919+
* @returns {Array} - Array of Objects with the name and pos of
920+
* each part of the path.
921+
*/
922+
function parseXFAPath(path) {
923+
const positionPattern = /(.+)\[([0-9]+)\]$/;
924+
return path.split(".").map(component => {
925+
const m = component.match(positionPattern);
926+
if (m) {
927+
return { name: m[1], pos: parseInt(m[2], 10) };
928+
}
929+
return { name: component, pos: 0 };
930+
});
931+
}
932+
933+
const XMLEntities = {
934+
/* < */ 0x3c: "&lt;",
935+
/* > */ 0x3e: "&gt;",
936+
/* & */ 0x26: "&amp;",
937+
/* " */ 0x22: "&quot;",
938+
/* ' */ 0x27: "&apos;",
939+
};
940+
941+
function encodeToXmlString(str) {
942+
const buffer = [];
943+
let start = 0;
944+
for (let i = 0, ii = str.length; i < ii; i++) {
945+
const char = str.codePointAt(i);
946+
if (0x20 <= char && char <= 0x7e) {
947+
// ascii
948+
const entity = XMLEntities[char];
949+
if (entity) {
950+
if (start < i) {
951+
buffer.push(str.substring(start, i));
952+
}
953+
buffer.push(entity);
954+
start = i + 1;
955+
}
956+
} else {
957+
if (start < i) {
958+
buffer.push(str.substring(start, i));
959+
}
960+
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
961+
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
962+
// char is represented by two u16
963+
i++;
964+
}
965+
start = i + 1;
966+
}
967+
}
968+
969+
if (buffer.length === 0) {
970+
return str;
971+
}
972+
973+
if (start < str.length) {
974+
buffer.push(str.substring(start, str.length));
975+
}
976+
977+
return buffer.join("");
978+
}
979+
913980
export {
914981
BaseException,
915982
FONT_IDENTITY_MATRIX,
@@ -947,6 +1014,7 @@ export {
9471014
createPromiseCapability,
9481015
createObjectURL,
9491016
escapeString,
1017+
encodeToXmlString,
9501018
getModificationDate,
9511019
getVerbosityLevel,
9521020
info,
@@ -959,6 +1027,7 @@ export {
9591027
createValidAbsoluteUrl,
9601028
IsLittleEndianCached,
9611029
IsEvalSupportedCached,
1030+
parseXFAPath,
9621031
removeNullCharacters,
9631032
setVerbosityLevel,
9641033
shadow,

0 commit comments

Comments
 (0)