Skip to content

[Annotations] Some annotations can have their values stored in the xfa:datasets #14735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,16 @@ class AnnotationFactory {
static create(xref, ref, pdfManager, idFactory, collectFields) {
return Promise.all([
pdfManager.ensureCatalog("acroForm"),
pdfManager.ensureDoc("xfaDatasets"),
collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1,
]).then(([acroForm, pageIndex]) =>
]).then(([acroForm, xfaDatasets, pageIndex]) =>
pdfManager.ensure(this, "_create", [
xref,
ref,
pdfManager,
idFactory,
acroForm,
xfaDatasets,
collectFields,
pageIndex,
])
Expand All @@ -95,6 +97,7 @@ class AnnotationFactory {
pdfManager,
idFactory,
acroForm,
xfaDatasets,
collectFields,
pageIndex = -1
) {
Expand All @@ -119,6 +122,7 @@ class AnnotationFactory {
id,
pdfManager,
acroForm: acroForm instanceof Dict ? acroForm : Dict.empty,
xfaDatasets,
collectFields,
pageIndex,
};
Expand Down Expand Up @@ -1237,7 +1241,7 @@ class WidgetAnnotation extends Annotation {
);
}

const fieldValue = getInheritableProperty({
let fieldValue = getInheritableProperty({
dict,
key: "V",
getArray: true,
Expand All @@ -1251,6 +1255,15 @@ class WidgetAnnotation extends Annotation {
});
data.defaultFieldValue = this._decodeFormValue(defaultFieldValue);

if (fieldValue === undefined && params.xfaDatasets) {
// Try to figure out if we have something in the xfa dataset.
const path = this._title.str;
if (path) {
this._hasValueFromXFA = true;
data.fieldValue = fieldValue = params.xfaDatasets.getValue(path);
}
}

// When no "V" entry exists, let the fieldValue fallback to the "DV" entry
// (fixes issue13823.pdf).
if (fieldValue === undefined && data.defaultFieldValue !== null) {
Expand Down Expand Up @@ -1401,17 +1414,20 @@ class WidgetAnnotation extends Annotation {
}

async save(evaluator, task, annotationStorage) {
if (!annotationStorage) {
return null;
}
const storageEntry = annotationStorage.get(this.data.id);
const value = storageEntry && storageEntry.value;
const storageEntry = annotationStorage
? annotationStorage.get(this.data.id)
: undefined;
let value = storageEntry && storageEntry.value;
if (value === this.data.fieldValue || value === undefined) {
return null;
if (!this._hasValueFromXFA) {
return null;
}
value = value || this.data.fieldValue;
}

// Value can be an array (with choice list and multiple selections)
if (
!this._hasValueFromXFA &&
Array.isArray(value) &&
Array.isArray(this.data.fieldValue) &&
value.length === this.data.fieldValue.length &&
Expand Down Expand Up @@ -1493,14 +1509,23 @@ class WidgetAnnotation extends Annotation {

async _getAppearance(evaluator, task, annotationStorage) {
const isPassword = this.hasFieldFlag(AnnotationFieldFlag.PASSWORD);
if (!annotationStorage || isPassword) {
if (isPassword) {
return null;
}
const storageEntry = annotationStorage.get(this.data.id);
const storageEntry = annotationStorage
? annotationStorage.get(this.data.id)
: undefined;
let value = storageEntry && storageEntry.value;
if (value === undefined) {
// The annotation hasn't been rendered so use the appearance
return null;
if (!this._hasValueFromXFA || this.appearance) {
// The annotation hasn't been rendered so use the appearance.
return null;
}
// The annotation has its value in XFA datasets but not in the V field.
value = this.data.fieldValue;
if (!value) {
return "";
}
}

value = value.trim();
Expand Down
70 changes: 70 additions & 0 deletions src/core/dataset_reader.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/* Copyright 2022 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { parseXFAPath } from "./core_utils.js";
import { SimpleXMLParser } from "./xml_parser.js";

class DatasetXMLParser extends SimpleXMLParser {
constructor(options) {
super(options);
this.node = null;
}

onEndElement(name) {
const node = super.onEndElement(name);
if (node && name === "xfa:datasets") {
this.node = node;

// We don't need anything else, so just kill the parser.
throw new Error("Aborting DatasetXMLParser.");
}
}
}

class DatasetReader {
constructor(data) {
if (data.datasets) {
this.node = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
data.datasets
).documentElement;
} else {
const parser = new DatasetXMLParser({ hasAttributes: true });
try {
parser.parseFromString(data.xdp);
} catch (_) {}
this.node = parser.node;
}
}

getValue(path) {
if (!this.node || !path) {
return "";
}
const node = this.node.searchNode(parseXFAPath(path), 0);

if (!node) {
return "";
}

const first = node.firstChild;
if (first && first.nodeName === "value") {
return node.children.map(child => child.textContent);
}

return node.textContent;
}
}

export { DatasetReader };
42 changes: 42 additions & 0 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
import { Catalog } from "./catalog.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js";
import { Linearization } from "./parser.js";
import { NullStream } from "./stream.js";
import { ObjectLoader } from "./object_loader.js";
Expand Down Expand Up @@ -820,6 +821,47 @@ class PDFDocument {
});
}

get xfaDatasets() {
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "xfaDatasets", null);
}

const xfa = acroForm.get("XFA");
if (xfa instanceof BaseStream && !xfa.isEmpty) {
try {
const xdp = stringToUTF8String(xfa.getString());
return shadow(this, "xfaDatasets", new DatasetReader({ xdp }));
} catch (_) {
warn("XFA - Invalid utf-8 string.");
return shadow(this, "xfaDatasets", null);
}
}

if (!Array.isArray(xfa) || xfa.length === 0) {
return null;
}

for (let i = 0, ii = xfa.length; i < ii; i += 2) {
if (xfa[i] !== "datasets") {
continue;
}
const data = this.xref.fetchIfRef(xfa[i + 1]);
if (!(data instanceof BaseStream) || data.isEmpty) {
continue;
}
try {
const datasets = stringToUTF8String(data.getString());
return shadow(this, "xfaDatasets", new DatasetReader({ datasets }));
} catch (_) {
warn("XFA - Invalid utf-8 string.");
return shadow(this, "xfaDatasets", null);
}
}

return shadow(this, "xfaDatasets", null);
}

get xfaData() {
const acroForm = this.catalog.acroForm;
if (!acroForm) {
Expand Down
7 changes: 6 additions & 1 deletion src/core/xml_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,10 @@ class SimpleDOMNode {
.join("");
}

get children() {
return this.childNodes || [];
}

hasChildNodes() {
return this.childNodes && this.childNodes.length > 0;
}
Expand Down Expand Up @@ -492,11 +496,12 @@ class SimpleXMLParser extends XMLParserBase {
this._currentFragment = this._stack.pop() || [];
const lastElement = this._currentFragment[this._currentFragment.length - 1];
if (!lastElement) {
return;
return null;
}
for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
lastElement.childNodes[i].parentNode = lastElement;
}
return lastElement;
}

onError(code) {
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/issue14685.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/8283456/1647183160545.pdf
22 changes: 22 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6332,5 +6332,27 @@
"md5": "5d1bfcc3b3130bfa7e33e43990e2213a",
"rounds": 1,
"type": "text"
},
{ "id": "issue14685",
"file": "pdfs/issue14685.pdf",
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq"
},
{ "id": "issue14685-print",
"file": "pdfs/issue14685.pdf",
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq",
"print": true,
"annotationStorage": {
"150R": {
"value": "Hello PDF.js World"
}
}
}
]