Skip to content

Commit 8c70258

Browse files
Merge pull request #14182 from calixteman/richtext
Support rich content in markup annotation
2 parents fe11711 + cf8dc75 commit 8c70258

14 files changed

+188
-39
lines changed

src/core/annotation.js

+9
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ import { ObjectLoader } from "./object_loader.js";
5555
import { OperatorList } from "./operator_list.js";
5656
import { StringStream } from "./stream.js";
5757
import { writeDict } from "./writer.js";
58+
import { XFAFactory } from "./xfa/factory.js";
5859

5960
class AnnotationFactory {
6061
/**
@@ -1105,6 +1106,10 @@ class MarkupAnnotation extends Annotation {
11051106
this.data.color = null;
11061107
}
11071108
}
1109+
1110+
if (dict.has("RC")) {
1111+
this.data.richText = XFAFactory.getRichTextAsHtml(dict.get("RC"));
1112+
}
11081113
}
11091114

11101115
/**
@@ -2552,6 +2557,10 @@ class PopupAnnotation extends Annotation {
25522557

25532558
this.setContents(parentItem.get("Contents"));
25542559
this.data.contentsObj = this._contents;
2560+
2561+
if (parentItem.has("RC")) {
2562+
this.data.richText = XFAFactory.getRichTextAsHtml(parentItem.get("RC"));
2563+
}
25552564
}
25562565
}
25572566

src/core/xfa/builder.js

+3-2
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class Empty extends XFAObject {
6666
}
6767

6868
class Builder {
69-
constructor() {
69+
constructor(rootNameSpace = null) {
7070
this._namespaceStack = [];
7171
this._nsAgnosticLevel = 0;
7272

@@ -76,7 +76,8 @@ class Builder {
7676
this._nextNsId = Math.max(
7777
...Object.values(NamespaceIds).map(({ id }) => id)
7878
);
79-
this._currentNamespace = new UnknownNamespace(++this._nextNsId);
79+
this._currentNamespace =
80+
rootNameSpace || new UnknownNamespace(++this._nextNsId);
8081
}
8182

8283
buildRoot(ids) {

src/core/xfa/factory.js

+45-1
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,20 @@
1313
* limitations under the License.
1414
*/
1515

16-
import { $globalData, $toHTML } from "./xfa_object.js";
16+
import {
17+
$appendChild,
18+
$globalData,
19+
$nodeName,
20+
$text,
21+
$toHTML,
22+
} from "./xfa_object.js";
1723
import { Binder } from "./bind.js";
1824
import { DataHandler } from "./data.js";
1925
import { FontFinder } from "./fonts.js";
2026
import { stripQuotes } from "./utils.js";
2127
import { warn } from "../../shared/util.js";
2228
import { XFAParser } from "./parser.js";
29+
import { XhtmlNamespace } from "./xhtml.js";
2330

2431
class XFAFactory {
2532
constructor(data) {
@@ -106,6 +113,43 @@ class XFAFactory {
106113
}
107114
return Object.values(data).join("");
108115
}
116+
117+
static getRichTextAsHtml(rc) {
118+
if (!rc || typeof rc !== "string") {
119+
return null;
120+
}
121+
122+
try {
123+
let root = new XFAParser(XhtmlNamespace, /* richText */ true).parse(rc);
124+
if (!["body", "xhtml"].includes(root[$nodeName])) {
125+
// No body, so create one.
126+
const newRoot = XhtmlNamespace.body({});
127+
newRoot[$appendChild](root);
128+
root = newRoot;
129+
}
130+
131+
const result = root[$toHTML]();
132+
if (!result.success) {
133+
return null;
134+
}
135+
136+
const { html } = result;
137+
const { attributes } = html;
138+
if (attributes) {
139+
if (attributes.class) {
140+
attributes.class = attributes.class.filter(
141+
attr => !attr.startsWith("xfa")
142+
);
143+
}
144+
attributes.dir = "auto";
145+
}
146+
147+
return { html, str: root[$text]() };
148+
} catch (e) {
149+
warn(`XFA - an error occurred during parsing of rich text: ${e}`);
150+
}
151+
return null;
152+
}
109153
}
110154

111155
export { XFAFactory };

src/core/xfa/html_utils.js

+8-2
Original file line numberDiff line numberDiff line change
@@ -606,10 +606,16 @@ function setPara(node, nodeStyle, value) {
606606
}
607607

608608
function setFontFamily(xfaFont, node, fontFinder, style) {
609-
const name = stripQuotes(xfaFont.typeface);
610-
const typeface = fontFinder.find(name);
609+
if (!fontFinder) {
610+
// The font cannot be found in the pdf so use the default one.
611+
delete style.fontFamily;
612+
return;
613+
}
611614

615+
const name = stripQuotes(xfaFont.typeface);
612616
style.fontFamily = `"${name}"`;
617+
618+
const typeface = fontFinder.find(name);
613619
if (typeface) {
614620
const { fontFamily } = typeface.regular.cssFontInfo;
615621
if (fontFamily !== name) {

src/core/xfa/parser.js

+5-4
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ import { Builder } from "./builder.js";
3030
import { warn } from "../../shared/util.js";
3131

3232
class XFAParser extends XMLParserBase {
33-
constructor() {
33+
constructor(rootNameSpace = null, richText = false) {
3434
super();
35-
this._builder = new Builder();
35+
this._builder = new Builder(rootNameSpace);
3636
this._stack = [];
3737
this._globalData = {
3838
usedTypefaces: new Set(),
@@ -42,6 +42,7 @@ class XFAParser extends XMLParserBase {
4242
this._errorCode = XMLParserErrorCode.NoError;
4343
this._whiteRegex = /^\s+$/;
4444
this._nbsps = /\xa0+/g;
45+
this._richText = richText;
4546
}
4647

4748
parse(data) {
@@ -60,8 +61,8 @@ class XFAParser extends XMLParserBase {
6061
// Normally by definition a &nbsp is unbreakable
6162
// but in real life Acrobat can break strings on &nbsp.
6263
text = text.replace(this._nbsps, match => match.slice(1) + " ");
63-
if (this._current[$acceptWhitespace]()) {
64-
this._current[$onText](text);
64+
if (this._richText || this._current[$acceptWhitespace]()) {
65+
this._current[$onText](text, this._richText);
6566
return;
6667
}
6768

src/core/xfa/xhtml.js

+27-5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import {
2020
$content,
2121
$extra,
2222
$getChildren,
23+
$getParent,
2324
$globalData,
2425
$nodeName,
2526
$onText,
@@ -38,6 +39,7 @@ import {
3839
import { getMeasurement, HTMLResult, stripQuotes } from "./utils.js";
3940

4041
const XHTML_NS_ID = NamespaceIds.xhtml.id;
42+
const $richText = Symbol();
4143

4244
const VALID_STYLES = new Set([
4345
"color",
@@ -109,6 +111,7 @@ const StyleMapping = new Map([
109111

110112
const spacesRegExp = /\s+/g;
111113
const crlfRegExp = /[\r\n]+/g;
114+
const crlfForRichTextRegExp = /\r\n?/g;
112115

113116
function mapStyle(styleStr, node) {
114117
const style = Object.create(null);
@@ -185,6 +188,7 @@ const NoWhites = new Set(["body", "html"]);
185188
class XhtmlObject extends XmlObject {
186189
constructor(attributes, name) {
187190
super(XHTML_NS_ID, name);
191+
this[$richText] = false;
188192
this.style = attributes.style || "";
189193
}
190194

@@ -197,11 +201,16 @@ class XhtmlObject extends XmlObject {
197201
return !NoWhites.has(this[$nodeName]);
198202
}
199203

200-
[$onText](str) {
201-
str = str.replace(crlfRegExp, "");
202-
if (!this.style.includes("xfa-spacerun:yes")) {
203-
str = str.replace(spacesRegExp, " ");
204+
[$onText](str, richText = false) {
205+
if (!richText) {
206+
str = str.replace(crlfRegExp, "");
207+
if (!this.style.includes("xfa-spacerun:yes")) {
208+
str = str.replace(spacesRegExp, " ");
209+
}
210+
} else {
211+
this[$richText] = true;
204212
}
213+
205214
if (str) {
206215
this[$content] += str;
207216
}
@@ -311,14 +320,23 @@ class XhtmlObject extends XmlObject {
311320
return HTMLResult.EMPTY;
312321
}
313322

323+
let value;
324+
if (this[$richText]) {
325+
value = this[$content]
326+
? this[$content].replace(crlfForRichTextRegExp, "\n")
327+
: undefined;
328+
} else {
329+
value = this[$content] || undefined;
330+
}
331+
314332
return HTMLResult.success({
315333
name: this[$nodeName],
316334
attributes: {
317335
href: this.href,
318336
style: mapStyle(this.style, this),
319337
},
320338
children,
321-
value: this[$content] || "",
339+
value,
322340
});
323341
}
324342
}
@@ -457,6 +475,10 @@ class P extends XhtmlObject {
457475
}
458476

459477
[$text]() {
478+
const siblings = this[$getParent]()[$getChildren]();
479+
if (siblings[siblings.length - 1] === this) {
480+
return super[$text]();
481+
}
460482
return super[$text]() + "\n";
461483
}
462484
}

0 commit comments

Comments
 (0)