Skip to content

Commit ab946c4

Browse files
authored
Gh-426 improve multi byte text rendering (#429)
1 parent 49bdecf commit ab946c4

File tree

241 files changed

+390
-502741
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

241 files changed

+390
-502741
lines changed

core/core-awt/src/main/java/org/icepdf/core/pobjects/HexStringObject.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,15 +234,17 @@ public StringBuilder getLiteralStringBuffer(final int fontFormat, FontFile font)
234234
int charOffset = 2;
235235
int length = getLength();
236236
int charValue;
237+
boolean notUCS2 = font.getToUnicode() != null
238+
&& font.getToUnicode().getName() != null
239+
&& !font.getToUnicode().getName().contains("UCS2");
237240
StringBuilder tmp = new StringBuilder(length);
238241
// attempt to detect mulibyte encoded strings.
239242
for (int i = 0; i < length; i += charOffset) {
240243
String first = stringData.substring(i, i + 2);
241244
if (first.charAt(0) != '0') {
242245
// check range for possible 2 byte char ie mixed mode.
243246
charValue = getUnsignedInt(first);
244-
if (font.getByteEncoding() == FontFile.ByteEncoding.MIXED_BYTE &&
245-
font.canDisplay((char) charValue) && font.getSource() != null) {
247+
if (notUCS2 && font.canDisplay((char) charValue) && font.getSource() != null) {
246248
tmp.append((char) charValue);
247249
} else {
248250
charValue = getUnsignedInt(i, 4);

core/core-awt/src/main/java/org/icepdf/core/pobjects/LiteralStringObject.java

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -186,28 +186,16 @@ public StringBuilder getLiteralStringBuffer(final int fontFormat, FontFile font)
186186
int length = getLength();
187187
int charValue;
188188
StringBuilder tmp = new StringBuilder(length);
189-
if (font.getByteEncoding() == FontFile.ByteEncoding.MIXED_BYTE) {
190-
int charOffset = 1;
191-
for (int i = 0; i < length; i += charOffset) {
192-
// check range for possible 2 byte char.
189+
for (int i = 0; i < length; i += 1) {
190+
// check range for possible 2 byte char.
191+
charValue = getUnsignedInt(i, 2);
192+
if (font.canDisplay((char) charValue)) {
193+
tmp.append((char) charValue);
194+
i += 1;
195+
} else {
193196
charValue = getUnsignedInt(i, 1);
194197
if (font.canDisplay((char) charValue)) {
195198
tmp.append((char) charValue);
196-
} else {
197-
int charValue2 = getUnsignedInt(i, 2);
198-
if (font.canDisplay((char) charValue2)) {
199-
tmp.append((char) charValue2);
200-
i += 1;
201-
}
202-
}
203-
}
204-
} else {
205-
// we have default 2bytes.
206-
int charOffset = 2;
207-
for (int i = 0; i < length; i += charOffset) {
208-
int charValue2 = getUnsignedInt(i, 2);
209-
if (font.canDisplay((char) charValue2)) {
210-
tmp.append((char) charValue2);
211199
}
212200
}
213201
}

core/core-awt/src/main/java/org/icepdf/core/pobjects/fonts/CMap.java

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -25,64 +25,4 @@
2525
public interface CMap {
2626

2727
Name TYPE = new Name("CMap");
28-
29-
void init();
30-
31-
/**
32-
* Maps the character id to an underlying unicode value if available.
33-
*
34-
* @param ch character code to find unicode value of.
35-
* @return unicode value of ch if available otherwise original ch is returned unaltered.
36-
*/
37-
char toSelector(char ch);
38-
39-
char toSelector(char ch, boolean isCFF);
40-
41-
char fromSelector(char ch);
42-
43-
/**
44-
* Maps the character id to an underlying to unicode table. This method should
45-
* be called when looking for a unicode value for a CID. This method differs
46-
* slightly from #toSelector in that it can return at String rather than a
47-
* single character code.
48-
*
49-
* @param ch character id to look for corresponding unicode values.
50-
* @return unicode value of specified character code.
51-
*/
52-
String toUnicode(char ch);
53-
54-
/**
55-
* Determines if the cid should be interpreted as a one or two byte character.
56-
* Some CID fonts use the one byte notation but the two byte is the most
57-
* common bar far.
58-
*
59-
* @return true if the cid should be considered as having a one byte length.
60-
*/
61-
boolean isOneByte();
62-
63-
/**
64-
* Determines if the cid should be interpreted as a one or two byte character.
65-
* Some CID fonts use the one byte notation but the two byte is the most
66-
* common bar far.
67-
*
68-
* @return true if the cid should be considered as having a two byte length.
69-
*/
70-
boolean isTwoByte();
71-
72-
/**
73-
* Determines if the cid should be interpreted as a one or two byte character.
74-
* Some CID fonts use the one byte notation but the two byte is the most
75-
* common bar far. A mixed byte string must be parsed differently as the font
76-
* can be used to determine the number of bytes used for each character.
77-
*
78-
* @return true if the cid should be considered as having a mixed byte length.
79-
*/
80-
boolean isMixedByte();
81-
82-
/**
83-
* Utility method ot check if a CMap contain s any data, specifically usable toUnicode data.
84-
*
85-
* @return true if the mapping contains at least one entry.
86-
*/
87-
boolean isEmptyMapping();
8828
}

core/core-awt/src/main/java/org/icepdf/core/pobjects/fonts/Font.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616
package org.icepdf.core.pobjects.fonts;
1717

18-
18+
import org.apache.fontbox.cmap.CMap;
1919
import org.icepdf.core.pobjects.Dictionary;
2020
import org.icepdf.core.pobjects.DictionaryEntries;
2121
import org.icepdf.core.pobjects.Name;
@@ -394,6 +394,15 @@ public int getCharacterCount() {
394394
return lastchar - firstchar + 1;
395395
}
396396

397+
/**
398+
* <p>Returns the CMap used to map characters to glyphs.</p>
399+
*
400+
* @return CMap used to map characters to glyphs.
401+
*/
402+
public boolean hasUnicodeCMap() {
403+
return toUnicodeCMap != null;
404+
}
405+
397406
/**
398407
* <p>Returns true if the writing mode is vertical; false, otherwise</p>
399408
*

core/core-awt/src/main/java/org/icepdf/core/pobjects/fonts/FontFactory.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,6 @@ else if (FONT_SUBTYPE_CID_FONT_TYPE_0.equals(subtype)) {
103103
} else if (FONT_SUBTYPE_CID_FONT_TYPE_2.equals(subtype)) {
104104
font = new TypeCidType2Font(library, entries);
105105
}
106-
if (font == null) {
107-
// create OFont implementation.
108-
font = new org.icepdf.core.pobjects.fonts.ofont.Font(library, entries);
109-
}
110106
return font;
111107
}
112108

core/core-awt/src/main/java/org/icepdf/core/pobjects/fonts/FontFile.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
*/
1616
package org.icepdf.core.pobjects.fonts;
1717

18+
import org.apache.fontbox.cmap.CMap;
19+
1820
import java.awt.*;
1921
import java.awt.geom.AffineTransform;
2022
import java.awt.geom.Point2D;
@@ -57,8 +59,6 @@ FontFile deriveFont(Map<Integer, Float> widths, int firstCh, float missingWidth,
5759

5860
boolean canDisplay(char ech);
5961

60-
void setIsCid();
61-
6262
CMap getToUnicode();
6363

6464
String toUnicode(String displayText);

core/core-awt/src/main/java/org/icepdf/core/pobjects/fonts/FontManager.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
import java.io.File;
2424
import java.io.IOException;
2525
import java.net.URL;
26-
import java.util.List;
2726
import java.util.*;
27+
import java.util.List;
2828
import java.util.logging.Level;
2929
import java.util.logging.Logger;
3030
import java.util.prefs.Preferences;
@@ -107,27 +107,37 @@ public class FontManager {
107107
};
108108

109109
private static final String[] JAPANESE_FONT_NAMES = {
110+
// windows
110111
"Arial Unicode MS", "PMingLiU", "MingLiU",
111112
"MS PMincho", "MS Mincho", "Kochi Mincho", "Hiragino Mincho Pro",
112-
"KozMinPro Regular Acro", "HeiseiMin W3 Acro", "Adobe Ming Std Acro"
113+
"KozMinPro Regular Acro", "HeiseiMin W3 Acro", "Adobe Ming Std Acro",
114+
// linux
115+
"ipaexmincho", "Kochi Gothic", "Hiragino Kaku Gothic Pro",
116+
113117
};
114118

115119
private static final String[] CHINESE_SIMPLIFIED_FONT_NAMES = {
116120
"Arial Unicode MS", "PMingLiU", "MingLiU",
117121
"SimSun", "NSimSun", "Kochi Mincho", "STFangsong", "STSong Light Acro",
118-
"Adobe Song Std Acro", "stsong"
122+
"Adobe Song Std Acro", "stsong",
123+
// linux
124+
"ipaexmincho", "Kochi Gothic", "Hiragino Kaku Gothic Pro",
119125
};
120126

121127
private static final String[] CHINESE_TRADITIONAL_FONT_NAMES = {
122128
"Arial Unicode MS", "PMingLiU", "MingLiU",
123129
"SimSun", "NSimSun", "Kochi Mincho", "BiauKai", "MSungStd Light Acro",
124-
"Adobe Song Std Acro"
130+
"Adobe Song Std Acro",
131+
// linux
132+
"umingcn", "ipaexmincho", "Kochi Gothic", "Hiragino Kaku Gothic Pro",
125133
};
126134

127135
private static final String[] KOREAN_FONT_NAMES = {
128136
"Arial Unicode MS", "Dotum", "Gulim", "New Gulim", "GulimChe", "Batang",
129137
"BatangChe", "HYSMyeongJoStd Medium Acro", "Adobe Myungjo Std Acro",
130-
"AppleGothic", "Malgun Gothic", "UnDotum", "UnShinmun", "Baekmuk Gulim"
138+
"AppleGothic", "Malgun Gothic", "UnDotum", "UnShinmun", "Baekmuk Gulim",
139+
// linux
140+
"ipaexmincho", "Kochi Gothic", "Hiragino Kaku Gothic Pro",
131141
};
132142

133143
/**

core/core-awt/src/main/java/org/icepdf/core/pobjects/fonts/ofont/Encoding.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
import java.util.HashMap;
1919

2020
/**
21-
* @deprecated
21+
*
2222
*/
23-
@Deprecated
2423
public class Encoding implements org.icepdf.core.pobjects.fonts.Encoding {
2524
private final char[] enc;
2625

0 commit comments

Comments
 (0)