+ * Each node is an element of a circular double-linked list, which allows you + * to easily traverse between siblings. To distinguish, where the list ends, + * a special marker node is inserted of a + * {@link ParseTreeNodeType#CHILD_SENTINEL} type. These marker nodes are + * internal and are not accessible to the class users. List manipulation + * is handled by the class itself. + *
+ * + *+ * Node can have multiple children, which can be traversed via the siblings + * interface. Only the root node and composite type nodes are expected to have + * children, while primitive type node should have just text instead. Each + * node, with the exception of the root node, will have a parent set for + * traversal. + *
+ * + *+ * Parse tree starts with a root node, which has no siblings, no parent, and + * is of a {@link ParseTreeNodeType#ROOT} type. Its first order children + * should be a tokenized representation of a PDF stream. For the most part the + * tree shouldn't be very tall at this moment, as there are very few composite + * types (string literals, arrays and dictionaries), and those are rarely + * encountered in a content stream in a deeply nested way. + *
+ */ +public final class ParseTreeNode { + /** + * Parent of the node. Should be null for root. + */ + private final ParseTreeNode parent; + /** + * Type of the node. + */ + private final ParseTreeNodeType type; + /** + * Text array, backing the tree node. Expected to be non-null for primitive + * types. Part of the inlined text segment data. + */ + private final char[] textArray; + /** + * Starting offset into the text array. Expected to be a valid value for + * primitive types. Part of the inlined text segment data. + */ + private final int textOffset; + /** + * Text segment length. Expected to be a valid value for primitive types. + * Part of the inlined text segment data. + */ + private final int textCount; + /** + * Circular double-linked list of children. Maintained manually by the + * class. Should point to a sentinel node, with getNext being the first + * element of the list and getPrev being the last element of the list. + */ + private ParseTreeNode children = null; + /** + * Pointer to the previous sibling node in a circular double-linked list. + * For a root node it will be set to {@code this}. + */ + private ParseTreeNode prev = this; + /** + * Pointer to the next sibling node in a circular double-linked list. + * For a root node it will be set to {@code this}. + */ + private ParseTreeNode next = this; + + /** + * Creates a root parse tree node. + */ + public ParseTreeNode() { + this.parent = null; + this.type = ParseTreeNodeType.ROOT; + this.textArray = null; + this.textOffset = 0; + this.textCount = 0; + } + + /** + * Creates a child parse tree node of a composite type. + * + * @param type Type of the node. Should be a composite type. + * @param parent Parent of the node. Should not be null. + */ + private ParseTreeNode(ParseTreeNodeType type, ParseTreeNode parent) { + this(type, null, 0, 0, parent); + } + + /** + * Creates a child parse tree node of a specified type, which is, + * optionally, backed by text. + * + * @param type Type of the node. + * @param textArray Backing text array of the node. Should not be null + * for a primitive type. + * @param textOffset Starting offset into the text array. Should be valid + * for a primitive type. + * @param textCount Text segment length. Should be valid for a primitive + * type. + * @param parent Parent of the node. Should not be null. + */ + private ParseTreeNode(ParseTreeNodeType type, char[] textArray, int textOffset, int textCount, + ParseTreeNode parent) { + Objects.requireNonNull(type); + Objects.requireNonNull(parent); + if (textArray == null && type.isPrimitive()) { + throw new IllegalArgumentException("Primitive type should have text present"); + } + this.parent = parent; + this.type = type; + this.textArray = textArray; + this.textOffset = textOffset; + this.textCount = textCount; + } + + /** + * Returns whether the node is a root node or not. + * + * @return Whether the node is a root node or not. + */ + public boolean isRoot() { + // Only checking the parent pointer, as you should not be able to + // create a non-root node without a parent + return parent == null; + } + + /** + * Returns whether the node is a leaf node. I.e. it is a leaf node, if it + * has no children. Should be false only for root and primitive nodes. + * + * @return Whether the node is a leaf node. + */ + public boolean isLeaf() { + return children == null || (children.getNext() == children); + } + + /** + * Returns whether text of this node matches the specified text. This + * operation is valid only for primitive nodes. + * + * @param text Expected text. + * + * @return Whether text of this node matches the specified text. + */ + public boolean is(char[] text) { + return Arrays.equals(text, 0, text.length, textArray, textOffset, textOffset + textCount); + } + + /** + * Returns whether this is an operator type node with the specified text. + * + * @param operator Operator text. + * + * @return Whether this is an operator type node with the specified text. + */ + public boolean isOperator(char[] operator) { + if (type != ParseTreeNodeType.OPERATOR) { + return false; + } + return is(operator); + } + + /** + * Returns the parent of the node. Will return null for root. + * + * @return The parent of the node. Will return null for root. + */ + public ParseTreeNode getParent() { + return parent; + } + + /** + * Returns the type of the node. + * + * @return The type of the node. + */ + public ParseTreeNodeType getType() { + return type; + } + + /** + * Returns the backing text of a node as a char sequence. Only valid for + * primitive type nodes. + * + * @return The backing text of a node as a char sequence. + */ + public CharSequence getText() { + return CharBuffer.wrap(textArray, textOffset, textCount); + } + + /** + * Returns the backing text array. Only valid for primitive type nodes. + * + * @return The backing text array. + */ + public char[] getTextArray() { + return textArray; + } + + /** + * Returns the starting offset into the text array. Only valid for + * primitive type nodes. + * + * @return The starting offset into the text array. + */ + public int getTextOffset() { + return textOffset; + } + + /** + * Returns the text segment length. Only valid for primitive type nodes. + * + * @return The text segment length. + */ + public int getTextCount() { + return textCount; + } + + /** + * Returns the start offset for the node. If this is a primitive node, + * then it is equivalent to calling {@link #getTextOffset()}. But if it is + * a composite node, it returns the text offset of the leftmost + * primitive descendant. + * + * @return The start offset for the node. + */ + public int getStartOffset() { + if (textArray != null) { + return textOffset; + } + ParseTreeNode child = getFirstChild(); + while (child != null) { + if (child.textArray != null) { + return child.textOffset; + } + child = child.getFirstChild(); + } + return 0; + } + + /** + * Returns the end offset for the node. If this is a primitive node, then + * it is equivalent to summing {@link #getTextOffset()} and + * {@link #getTextCount()}. But if it is a composite node, it returns the + * end offset of the leftmost primitive descendant. + * + * @return The start offset for the node. + */ + public int getEndOffset() { + if (textArray != null) { + return textOffset + textCount; + } + ParseTreeNode child = getLastChild(); + while (child != null) { + if (child.textArray != null) { + return child.textOffset + child.textCount; + } + child = child.getLastChild(); + } + return 0; + } + + /** + * Returns the first child of a node, or null, if it is a leaf. + * + * @return The first child of a node, or null, if it is a leaf. + */ + public ParseTreeNode getFirstChild() { + if (children == null) { + return null; + } + return children.getNext(); + } + + /** + * Returns the last child of a node, or null, if it is a leaf. + * + * @return The last child of a node, or null, if it is a leaf. + */ + public ParseTreeNode getLastChild() { + if (children == null) { + return null; + } + return children.getPrev(); + } + + /** + * Creates a new tree node and adds it as the last child of the node. + * + * @param type Type of the node. + * @param textArray Backing text array of the node. Should not be null + * for a primitive type. + * @param textOffset Starting offset into the text array. Should be valid + * for a primitive type. + * @param textCount Text segment length. Should be valid for a primitive + * type. + * + * @return The newly created child node. + */ + public ParseTreeNode addChild(ParseTreeNodeType type, char[] textArray, int textOffset, int textCount) { + return addChild(new ParseTreeNode(type, textArray, textOffset, textCount, this)); + } + + /** + * Creates a new tree node of a composite type and adds it as the last + * child of the node. + * + * @param type Type of the node. Should be a composite type. + * + * @return The newly created child node. + */ + public ParseTreeNode addChild(ParseTreeNodeType type) { + return addChild(new ParseTreeNode(type, this)); + } + + /** + * Creates a new tree node and adds it as the next sibling of the node. + * + * @param type Type of the node. + * @param textArray Backing text array of the node. Should not be null + * for a primitive type. + * @param textOffset Starting offset into the text array. Should be valid + * for a primitive type. + * @param textCount Text segment length. Should be valid for a primitive + * type. + * + * @return The newly created child node. + */ + public ParseTreeNode addNext(ParseTreeNodeType type, char[] textArray, int textOffset, int textCount) { + final ParseTreeNode node = new ParseTreeNode(type, textArray, textOffset, textCount, getParent()); + linkNext(node); + return node; + } + + /** + * Returns the total length of text in this parse tree. This is calculated + * by summing the text lengths of all underlying primitive nodes. + * + * @return The total length of text in this parse tree. + */ + public int length() { + int result = 0; + final Iterator+ * Marker type is a type, which does not have anything to do with PDF, but it + * is used internally as markers with a special meaning, like the root of the + * parse tree. + *
+ * + *+ * Primitive type means, that it is a leaf node and it is defined by its text. + * For example, {@code NUMERIC} is a primitive type, which has no children and + * contains text of a number. + *
+ * + *+ * Composite type means, that this node does not contain text, but is just a + * container for other primitive nodes. For example, {@code STRING_LITERAL} is + * a composite type, and its children contain string open markers, string + * data and string close markers. + *
+ */ +public enum ParseTreeNodeType { + /** + * Marker type. Root of the parse tree. + */ + ROOT, + /** + * Marker type. A sentinel for a circular linked list of children. + */ + CHILD_SENTINEL, + + /** + * Primitive type. Whitespace between tokens. + */ + WHITESPACE, + + /** + * Primitive type. End-of-line comment marker with its body. Whitespace + * at the end is not included. + */ + COMMENT, + + /** + * Primitive type. Boolean {@code true} and {@code false} objects. + */ + BOOLEAN, + + /** + * Primitive type. Numeric PDF objects. + */ + NUMERIC, + + /** + * Composite type. Literal PDF strings, enclosed in parentheses. + */ + STRING_LITERAL, + /** + * Primitive type. Byte sequence within a literal PDF string, excluding + * left and right parentheses. + */ + STRING_LITERAL_DATA, + /** + * Primitive type. A left parenthesis. + * + *+ * First child of a {@code STRING_LITERAL} node will be of this type. One + * literal node can have multiple open tokens, as they are parsed + * separately to support parentheses matching. + *
+ * + *+ * Can also be found outside of a {@code STRING_LITERAL} node as an + * unexpected token. + *
+ */ + STRING_LITERAL_OPEN, + /** + * Primitive type. A right parenthesis. This will be the first child of a + * {@code STRING_LITERAL} node. + * + *+ * Should be the last child of a {@code STRING_LITERAL} node, if it has + * been finished and closed properly. One literal node can have multiple + * close tokens, as they are parsed separately to support parentheses + * matching. + *
+ * + *+ * In contrast to {@code STRING_LITERAL_OPEN}, these should only be found + * withing a {@code STRING_LITERAL} node. + *
+ */ + STRING_LITERAL_CLOSE, + + /** + * Composite type. Hexadecimal PDF strings, enclosed in <>. + */ + STRING_HEX, + /** + * Primitive type. Byte sequence within a hexadecimal PDF string, + * excluding < and >. + */ + STRING_HEX_DATA, + /** + * Primitive type. < symbol. + * + *+ * First child of a {@code STRING_HEX} node will be of this type. Compared + * to literal strings, there can only be one in each string. But they are + * still parsed separately to support begin/end matching. + *
+ * + *+ * These should only be found withing a {@code STRING_HEX} node. + *
+ */ + STRING_HEX_OPEN, + /** + * Primitive type. > symbol. + * + *+ * Should be the last child of a {@code STRING_HEX} node, if it has + * been finished and closed properly. Compared to literal strings, there + * can only be one in each string. But they are still parsed separately to + * support begin/end matching. + *
+ * + *+ * These should only be found withing a {@code STRING_HEX} node. + *
+ */ + STRING_HEX_CLOSE, + + /** + * Primitive type. Name PDF objects. + */ + NAME, + + /** + * Composite type. PDF arrays, enclosed in square brackets. + */ + ARRAY, + /** + * Primitive type. A left square bracket. + * + *+ * First child of an {@code ARRAY} node will be of this type. Compared to + * literal strings, there can only be one in each array. But they are + * still parsed separately to support begin/end matching. + *
+ * + *+ * These should only be found withing an {@code ARRAY} node. + *
+ */ + ARRAY_OPEN, + /** + * Primitive type. A right square bracket. + * + *+ * Should be the last child of an {@code ARRAY} node, if it has + * been finished and closed properly. Compared to literal strings, there + * can only be one in each array. But they are still parsed separately to + * support begin/end matching. + *
+ * + *+ * These should only be found withing an {@code ARRAY} node. + *
+ */ + ARRAY_CLOSE, + + /** + * Composite type. PDF dictionaries, enclosed in << >>. + */ + DICTIONARY, + /** + * Primitive type. A << token. + * + *+ * First child of a {@code DICTIONARY} node will be of this type. Compared + * to literal strings, there can only be one in each dictionary. But they + * are still parsed separately to support begin/end matching. + *
+ * + *+ * These should only be found withing a {@code DICTIONARY} node. + *
+ */ + DICTIONARY_OPEN, + /** + * Primitive type. A >> token. + * + *+ * Should be the last child of a {@code DICTIONARY} node, if it has + * been finished and closed properly. Compared to literal strings, there + * can only be one in each dictionary. But they are still parsed + * separately to support begin/end matching. + *
+ * + *+ * These should only be found withing an {@code DICTIONARY} node. + *
+ */ + DICTIONARY_CLOSE, + + /** + * Primitive type. {@code null} objects. + */ + NULL, + + /** + * Primitive type. PDF content stream operator. + */ + OPERATOR, + + /** + * Primitive type. A byte sequence, which should not be rendered as text. + * An example would be a body of an inline image. + */ + BINARY_DATA, + + /** + * Primitive type. A byte sequence, which is unexpected and has not been + * covered by any of the concrete types. + */ + UNKNOWN; + + /** + * Returns whether this is a marker type or not. + * + * @return Whether this is a marker type or not. + */ + public final boolean isMarker() { + switch (this) { + case ROOT: + case CHILD_SENTINEL: + return true; + default: + return false; + } + } + + /** + * Returns whether this is a primitive type or not. + * + * @return Whether this is a primitive type or not. + */ + public final boolean isPrimitive() { + switch (this) { + case WHITESPACE: + case COMMENT: + case BOOLEAN: + case NUMERIC: + case STRING_LITERAL_DATA: + case STRING_LITERAL_OPEN: + case STRING_LITERAL_CLOSE: + case STRING_HEX_DATA: + case STRING_HEX_OPEN: + case STRING_HEX_CLOSE: + case NAME: + case ARRAY_OPEN: + case ARRAY_CLOSE: + case DICTIONARY_OPEN: + case DICTIONARY_CLOSE: + case NULL: + case OPERATOR: + case BINARY_DATA: + case UNKNOWN: + return true; + default: + return false; + } + } + + /** + * Returns whether this is a composite type or not. + * + * @return Whether this is a composite type or not. + */ + public final boolean isComposite() { + switch (this) { + case STRING_LITERAL: + case STRING_HEX: + case ARRAY: + case DICTIONARY: + return true; + default: + return false; + } + } +} diff --git a/src/main/java/com/itextpdf/rups/model/contentstream/PdfContentStreamParser.java b/src/main/java/com/itextpdf/rups/model/contentstream/PdfContentStreamParser.java new file mode 100644 index 00000000..e95a0e5a --- /dev/null +++ b/src/main/java/com/itextpdf/rups/model/contentstream/PdfContentStreamParser.java @@ -0,0 +1,713 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.model.contentstream; + +import java.util.Arrays; +import javax.swing.text.Segment; + +/** + * A parser, which parses a PDF content stream string into a parse tree. + * + *+ * This code is based on the {@link com.itextpdf.io.source.PdfTokenizer}. + * Ideally we would just use that, but it has some limitations, which make it + * unusable for our tasks. + *
+ * + *+ * Currently there are not a lot of composite types in the parse tree, so the + * resulting representation is pretty low-level. This might get improved in + * the future to simplify static analysis. + *
+ * + *+ * It is somewhat assumed, that input text is in a Latin-1 encoding (as in no + * char exceeds U+00FF), so it might produce ambiguous results for non-Latin-1 + * characters. + *
+ */ +public final class PdfContentStreamParser { + /** + * "false" string as a char array. + */ + private static final char[] FALSE = {'f', 'a', 'l', 's', 'e'}; + /** + * "true" string as a char array. + */ + private static final char[] TRUE = {'t', 'r', 'u', 'e'}; + /** + * "null" string as a char array. + */ + private static final char[] NULL = {'n', 'u', 'l', 'l'}; + /** + * A length based mapping of PDF content stream operators. + * + *+ * If {@code L} is the expected length of the operator string, then at + * index {@code L - 1} you will get an array of all the possible + * operators, which has the length of {@code L}. + *
+ * + *+ * This is done to make the linear search a bit faster. While this can be + * improved, operator matching doesn't seem to be a bottleneck, so this + * will suffice for now. + *
+ */ + private static final char[][][] LENGTH_OPERATOR_MAP = { + { + PdfOperators.w, + PdfOperators.J, + PdfOperators.j, + PdfOperators.M, + PdfOperators.d, + PdfOperators.i, + PdfOperators.q, + PdfOperators.Q, + PdfOperators.m, + PdfOperators.l, + PdfOperators.c, + PdfOperators.v, + PdfOperators.y, + PdfOperators.h, + PdfOperators.S, + PdfOperators.s, + PdfOperators.f, + PdfOperators.F, + PdfOperators.B, + PdfOperators.b, + PdfOperators.n, + PdfOperators.W, + PdfOperators.SINGLE_QUOTE, + PdfOperators.DOUBLE_QUOTE, + PdfOperators.G, + PdfOperators.g, + PdfOperators.K, + PdfOperators.k, + }, + { + PdfOperators.ri, + PdfOperators.gs, + PdfOperators.cm, + PdfOperators.re, + PdfOperators.f_STAR, + PdfOperators.B_STAR, + PdfOperators.b_STAR, + PdfOperators.W_STAR, + PdfOperators.BT, + PdfOperators.ET, + PdfOperators.Tc, + PdfOperators.Tw, + PdfOperators.Tz, + PdfOperators.TL, + PdfOperators.Tf, + PdfOperators.Tr, + PdfOperators.Ts, + PdfOperators.Td, + PdfOperators.TD, + PdfOperators.Tm, + PdfOperators.T_STAR, + PdfOperators.Tj, + PdfOperators.TJ, + PdfOperators.d0, + PdfOperators.d1, + PdfOperators.CS, + PdfOperators.cs, + PdfOperators.SC, + PdfOperators.sc, + PdfOperators.RG, + PdfOperators.rg, + PdfOperators.Sh, + PdfOperators.BI, + PdfOperators.ID, + PdfOperators.EI, + PdfOperators.Do, + PdfOperators.MP, + PdfOperators.DP, + PdfOperators.BX, + PdfOperators.EX, + }, + { + PdfOperators.SCN, + PdfOperators.scn, + PdfOperators.BMC, + PdfOperators.BDC, + PdfOperators.EMC, + }, + }; + + /** + * In progress parsing result in a parse tree form. + */ + private ParseTreeNode result; + /** + * Current composite/marker node, that is being appended to. + */ + private ParseTreeNode currentNode; + /** + * Current parentheses balance inside a string literal. This is valid only + * when current node type is {@link ParseTreeNodeType#STRING_LITERAL}. + */ + private int stringLiteralParenthesesBalance; + + /** + * Creates a new PDF content stream parser. + */ + public PdfContentStreamParser() { + reset(); + } + + /** + * Parses the provided PDF content stream string into a parse tree. + * + * @param text PDF content stream string to parse. + * + * @return Resulting parse tree. + */ + public static ParseTreeNode parse(String text) { + final PdfContentStreamParser parser = new PdfContentStreamParser(); + parser.append(text); + return parser.result(); + } + + /** + * Resets the parser into its initial state. + */ + public void reset() { + result = new ParseTreeNode(); + currentNode = result; + stringLiteralParenthesesBalance = 0; + } + + /** + * Appends the string to be processed by the parser. The string is parsed + * immediately during this call. + * + * @param text String to parse. + */ + public void append(String text) { + final char[] textArray = text.toCharArray(); + append(textArray, 0, textArray.length); + } + + /** + * Appends a sequence, which repeats a single character, to be processed + * by the parser. The sequence is parsed immediately during this call. + * + *+ * This could be useful, if you want to parse only a part of the stream, + * but you know, that it starts in the middle of a string literal with a + * known parentheses balance. In such case you can start parsing with a + * {@code parser.append('(', balance)} call and append the stream part + * after. After that you would just skip the added tokens in the result. + *
+ * + * @param ch Character to repeat in the sequence. + * @param count Amount of times to repeat the character. Should not be + * negative. + */ + public void append(char ch, int count) { + final char[] text = new char[count]; + Arrays.fill(text, ch); + append(text, 0, text.length); + } + + /** + * Appends the character array to be processed by the parser. The + * characters are parsed immediately during this call. + * + * @param text Characters to parse. + */ + public void append(char[] text) { + append(text, 0, text.length); + } + + /** + * Appends the character array slice to be processed by the parser. The + * characters are parsed immediately during this call. + * + * @param text Text slice backing array. + * @param begin Text slice begin index, inclusive. + * @param end Text slice end index, exclusive. + */ + public void append(char[] text, int begin, int end) { + int index = begin; + while (index < end) { + index = appendToken(text, index, end); + } + } + + /** + * Appends the text segment to be processed by the parser. The text is + * parsed immediately during this call. + * + * @param segment Text segment to parse. + */ + public void append(Segment segment) { + append(segment.array, segment.offset, segment.offset + segment.count); + } + + /** + * Returns the parsing result. + * + * @return The parsing result. + */ + public ParseTreeNode result() { + return result; + } + + /* + * append* methods below are all made the same way. The take an input + * slice as an input and if a token is parsed, returned index will be + * incremented forwards. And they are designed in such a way, that they + * shouldn't parse things there are not supposed to. + * + * So to process a slice you would just go through the token types and try + * appending them. If index wasn't moved, then just try a different type. + */ + + /** + * Process a single token from the input text and returns the index, where + * the next token will start. + * + *+ * With how the method is designed, it will add one primitive token at + * most, so to process the whole string, you need to call this in a loop + * till the return index is outside the string. + *
+ * + * @param text Text slice backing array. + * @param begin Text slice begin index, inclusive. + * @param end Text slice end index, exclusive. + * + * @return Starting index for the next token. + */ + private int appendToken(char[] text, int begin, int end) { + assert begin < end; + + // Special case: we are currently inside a string literal + if (currentNode.getType() == ParseTreeNodeType.STRING_LITERAL) { + return appendStringLiteralContinuation(text, begin, end); + } + + // Special case: we are currently inside a hex string + if (currentNode.getType() == ParseTreeNodeType.STRING_HEX) { + return appendStringHexContinuation(text, begin, end); + } + + /* + * Everything below is the normal parsing case. Append token calls + * should be ordered based on how often you would encounter them in a + * PDF content stream for performance reasons. + */ + + int index = appendWhitespace(text, begin, end); + if (index > begin) { + return index; + } + + index = appendNumeric(text, begin, end); + if (index > begin) { + return index; + } + + index = appendName(text, begin, end); + if (index > begin) { + return index; + } + + index = appendStringLiteralOpen(text, begin); + if (index > begin) { + return index; + } + + // If a hex string or a dictionary is being open + if (text[begin] == '<') { + // Opening a dictionary + if (begin + 1 < end && text[begin + 1] == '<') { + currentNode = currentNode.addChild(ParseTreeNodeType.DICTIONARY); + currentNode.addChild(ParseTreeNodeType.DICTIONARY_OPEN, text, begin, 2); + return begin + 2; + } + // Otherwise opening a hex string + currentNode = currentNode.addChild(ParseTreeNodeType.STRING_HEX); + currentNode.addChild(ParseTreeNodeType.STRING_HEX_OPEN, text, begin, 1); + return begin + 1; + } + + /* + * Hex string terminator is handled within appendStringHexContinuation. + * Here we just handle dictionary terminators and rogues tokens. + */ + if (text[begin] == '>') { + // Closing a dictionary + if (begin + 1 < end && text[begin + 1] == '>') { + currentNode.addChild(ParseTreeNodeType.DICTIONARY_CLOSE, text, begin, 2); + // If this is actually a dictionary terminator, then finishing the dictionary node + if (currentNode.getType() == ParseTreeNodeType.DICTIONARY) { + currentNode = currentNode.getParent(); + } + return begin + 2; + } + // Otherwise a rogue hex string termination token + currentNode.addChild(ParseTreeNodeType.STRING_HEX_CLOSE, text, begin, 1); + return begin + 1; + } + + // If an array is being open + if (text[begin] == '[') { + currentNode = currentNode.addChild(ParseTreeNodeType.ARRAY); + currentNode.addChild(ParseTreeNodeType.ARRAY_OPEN, text, begin, 1); + return begin + 1; + } + + // If an array is being closed + if (text[begin] == ']') { + currentNode.addChild(ParseTreeNodeType.ARRAY_CLOSE, text, begin, 1); + // If this is actually an array terminator, then finishing the array node + if (currentNode.getType() == ParseTreeNodeType.ARRAY) { + currentNode = currentNode.getParent(); + } + return begin + 1; + } + + index = appendBoolean(text, begin, end); + if (index > begin) { + return index; + } + + index = appendNull(text, begin, end); + if (index > begin) { + return index; + } + + index = appendComment(text, begin, end); + if (index > begin) { + return index; + } + + // This will add something, either an operator or an UNKNOWN token + return appendPotentialOperator(text, begin, end); + } + + private int appendStringLiteralContinuation(char[] text, int begin, int end) { + assert begin < end; + assert currentNode.getType() == ParseTreeNodeType.STRING_LITERAL; + + int index = appendStringLiteralData(text, begin, end); + if (index > begin) { + return index; + } + + index = appendStringLiteralClose(text, begin); + if (index > begin) { + return index; + } + + return appendStringLiteralOpen(text, begin); + } + + private int appendStringHexContinuation(char[] text, int begin, int end) { + assert begin < end; + assert currentNode.getType() == ParseTreeNodeType.STRING_HEX; + + int index = appendStringHexData(text, begin, end); + if (index > begin) { + return index; + } + + index = appendStringHexClose(text, begin); + if (index > begin) { + return index; + } + + return appendWhitespace(text, begin, end); + } + + private int appendWhitespace(char[] text, int begin, int end) { + int index = begin; + while (index < end && isWhitespace(text[index])) { + ++index; + } + if (index > begin) { + currentNode.addChild(ParseTreeNodeType.WHITESPACE, text, begin, index - begin); + } + return index; + } + + private int appendComment(char[] text, int begin, int end) { + assert begin < end; + + int index = begin; + if (text[index] != '%') { + return index; + } + + do { + ++index; + } while (index < end && text[index] != '\r' && text[index] != '\n'); + currentNode.addChild(ParseTreeNodeType.COMMENT, text, begin, index - begin); + return index; + } + + private int appendBoolean(char[] text, int begin, int end) { + if (containsAt(FALSE, text, begin, end)) { + currentNode.addChild(ParseTreeNodeType.BOOLEAN, text, begin, FALSE.length); + return begin + FALSE.length; + } + if (containsAt(TRUE, text, begin, end)) { + currentNode.addChild(ParseTreeNodeType.BOOLEAN, text, begin, TRUE.length); + return begin + TRUE.length; + } + return begin; + } + + private int appendNumeric(char[] text, int begin, int end) { + assert begin < end; + + int index = begin; + while (index < end && text[index] == '-') { + ++index; + } + while (index < end && ('0' <= text[index] && text[index] <= '9')) { + ++index; + } + if (index < end && text[index] == '.') { + do { + ++index; + } while (index < end && ('0' <= text[index] && text[index] <= '9')); + } + if (index > begin) { + currentNode.addChild(ParseTreeNodeType.NUMERIC, text, begin, index - begin); + } + return index; + } + + private int appendStringLiteralData(char[] text, int begin, int end) { + int index = begin; + while (index < end && text[index] != '(' && text[index] != ')') { + if (text[index] == '\\') { + index = Math.min(index + 2, end); + } else { + ++index; + } + } + if (index > begin) { + currentNode.addChild(ParseTreeNodeType.STRING_LITERAL_DATA, text, begin, index - begin); + } + return index; + } + + private int appendStringLiteralOpen(char[] text, int index) { + if (text[index] != '(') { + return index; + } + if (stringLiteralParenthesesBalance == 0) { + currentNode = currentNode.addChild(ParseTreeNodeType.STRING_LITERAL); + } + currentNode.addChild(ParseTreeNodeType.STRING_LITERAL_OPEN, text, index, 1); + ++stringLiteralParenthesesBalance; + return index + 1; + } + + private int appendStringLiteralClose(char[] text, int index) { + if (text[index] != ')') { + return index; + } + currentNode.addChild(ParseTreeNodeType.STRING_LITERAL_CLOSE, text, index, 1); + if (stringLiteralParenthesesBalance == 1) { + currentNode = currentNode.getParent(); + } + if (stringLiteralParenthesesBalance > 0) { + --stringLiteralParenthesesBalance; + } + return index + 1; + } + + private int appendStringHexData(char[] text, int begin, int end) { + int index = begin; + while (index < end && text[index] != '>' && !isWhitespace(text[index])) { + ++index; + } + if (index > begin) { + currentNode.addChild(ParseTreeNodeType.STRING_HEX_DATA, text, begin, index - begin); + } + return index; + } + + private int appendStringHexClose(char[] text, int index) { + if (text[index] == '>') { + currentNode.addChild(ParseTreeNodeType.STRING_HEX_CLOSE, text, index, 1); + currentNode = currentNode.getParent(); + return index + 1; + } + return index; + } + + private int appendName(char[] text, int begin, int end) { + assert begin < end; + + int index = begin; + if (text[index] != '/') { + return index; + } + + do { + ++index; + } while (index < end && !isDelimiterWhitespace(text[index])); + currentNode.addChild(ParseTreeNodeType.NAME, text, begin, index - begin); + return index; + } + + private int appendNull(char[] text, int begin, int end) { + if (containsAt(NULL, text, begin, end)) { + currentNode.addChild(ParseTreeNodeType.NULL, text, begin, NULL.length); + return begin + NULL.length; + } + return begin; + } + + private int appendPotentialOperator(char[] text, int begin, int end) { + assert begin < end; + + /* + * At this point it might only be an operator or garbage... Since we + * need to match the biggest operator, we need to find the end of the + * token before matching. + */ + + int index = begin + 1; + while (index < end && !isDelimiterWhitespace(text[index])) { + ++index; + } + + final int length = index - begin; + if (length <= LENGTH_OPERATOR_MAP.length) { + final char[][] operatorMap = LENGTH_OPERATOR_MAP[length - 1]; + for (final char[] operator : operatorMap) { + if (equals(operator, text, begin, index)) { + currentNode.addChild(ParseTreeNodeType.OPERATOR, text, begin, operator.length); + return index; + } + } + } + + currentNode.addChild(ParseTreeNodeType.UNKNOWN, text, begin, length); + return index; + } + + private static boolean isWhitespace(char ch) { + switch (ch) { + case '\0': + case '\t': + case '\n': + case '\f': + case '\r': + case ' ': + return true; + default: + return false; + } + } + + private static boolean isDelimiterWhitespace(char ch) { + switch (ch) { + case '\0': + case '\t': + case '\n': + case '\f': + case '\r': + case ' ': + case '(': + case ')': + case '<': + case '>': + case '[': + case ']': + case '/': + case '%': + return true; + default: + return false; + } + } + + private static boolean containsAt(char[] expected, char[] text, int begin, int end) { + final int toIndex = begin + expected.length; + if (toIndex > end) { + return false; + } + return Arrays.equals(expected, 0, expected.length, text, begin, toIndex); + } + + private static boolean equals(char[] expected, char[] text, int begin, int end) { + return Arrays.equals(expected, 0, expected.length, text, begin, end); + } +} diff --git a/src/main/java/com/itextpdf/rups/model/contentstream/PdfOperators.java b/src/main/java/com/itextpdf/rups/model/contentstream/PdfOperators.java new file mode 100644 index 00000000..1f14feac --- /dev/null +++ b/src/main/java/com/itextpdf/rups/model/contentstream/PdfOperators.java @@ -0,0 +1,176 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.model.contentstream; + +/** + * Static class, which stores all PDF content stream operators as + * {@code char[]}. + */ +@SuppressWarnings({"java:S1845", "java:S2386"}) +public final class PdfOperators { + /* + * General graphics state + */ + public static final char[] w = new char[] {'w'}; + public static final char[] J = new char[] {'J'}; + public static final char[] j = new char[] {'j'}; + public static final char[] M = new char[] {'M'}; + public static final char[] d = new char[] {'d'}; + public static final char[] ri = new char[] {'r', 'i'}; + public static final char[] i = new char[] {'i'}; + public static final char[] gs = new char[] {'g', 's'}; + public static final char[] Q = new char[] {'Q'}; + public static final char[] q = new char[] {'q'}; + /* + * Special graphics state + */ + public static final char[] cm = new char[] {'c', 'm'}; + /* + * Path construction + */ + public static final char[] m = new char[] {'m'}; + public static final char[] l = new char[] {'l'}; + public static final char[] c = new char[] {'c'}; + public static final char[] v = new char[] {'v'}; + public static final char[] y = new char[] {'y'}; + public static final char[] h = new char[] {'h'}; + public static final char[] re = new char[] {'r', 'e'}; + /* + * Path painting + */ + public static final char[] S = new char[] {'S'}; + public static final char[] s = new char[] {'s'}; + public static final char[] F = new char[] {'F'}; + public static final char[] f = new char[] {'f'}; + public static final char[] f_STAR = new char[] {'f', '*'}; + public static final char[] B = new char[] {'B'}; + public static final char[] B_STAR = new char[] {'B', '*'}; + public static final char[] b = new char[] {'b'}; + public static final char[] b_STAR = new char[] {'b', '*'}; + public static final char[] n = new char[] {'n'}; + /* + * Clipping paths + */ + public static final char[] W = new char[] {'W'}; + public static final char[] W_STAR = new char[] {'W', '*'}; + /* + * Text objects + */ + public static final char[] BT = new char[] {'B', 'T'}; + public static final char[] ET = new char[] {'E', 'T'}; + /* + * Text state + */ + public static final char[] Tc = new char[] {'T', 'c'}; + public static final char[] Tw = new char[] {'T', 'w'}; + public static final char[] Tz = new char[] {'T', 'z'}; + public static final char[] TL = new char[] {'T', 'L'}; + public static final char[] Tf = new char[] {'T', 'f'}; + public static final char[] Tr = new char[] {'T', 'r'}; + public static final char[] Ts = new char[] {'T', 's'}; + /* + * Text positioning + */ + public static final char[] Td = new char[] {'T', 'd'}; + public static final char[] TD = new char[] {'T', 'D'}; + public static final char[] Tm = new char[] {'T', 'm'}; + public static final char[] T_STAR = new char[] {'T', '*'}; + /* + * Text showing + */ + public static final char[] Tj = new char[] {'T', 'j'}; + public static final char[] TJ = new char[] {'T', 'J'}; + public static final char[] SINGLE_QUOTE = new char[] {'\''}; + public static final char[] DOUBLE_QUOTE = new char[] {'"'}; + /* + * Type 3 fonts + */ + public static final char[] d0 = new char[] {'d', '0'}; + public static final char[] d1 = new char[] {'d', '1'}; + /* + * Colour + */ + public static final char[] CS = new char[] {'C', 'S'}; + public static final char[] cs = new char[] {'c', 's'}; + public static final char[] SC = new char[] {'S', 'C'}; + public static final char[] sc = new char[] {'s', 'c'}; + public static final char[] SCN = new char[] {'S', 'C', 'N'}; + public static final char[] scn = new char[] {'s', 'c', 'n'}; + public static final char[] G = new char[] {'G'}; + public static final char[] g = new char[] {'g'}; + public static final char[] RG = new char[] {'R', 'G'}; + public static final char[] rg = new char[] {'r', 'g'}; + public static final char[] K = new char[] {'K'}; + public static final char[] k = new char[] {'k'}; + /* + * Shading patterns + */ + public static final char[] Sh = new char[] {'S', 'h'}; + /* + * Inline images + */ + public static final char[] BI = new char[] {'B', 'I'}; + public static final char[] ID = new char[] {'I', 'D'}; + public static final char[] EI = new char[] {'E', 'I'}; + /* + * XObjects + */ + public static final char[] Do = new char[] {'D', 'o'}; + /* + * Marked-content + */ + public static final char[] MP = new char[] {'M', 'P'}; + public static final char[] DP = new char[] {'D', 'P'}; + public static final char[] BMC = new char[] {'B', 'M', 'C'}; + public static final char[] BDC = new char[] {'B', 'D', 'C'}; + public static final char[] EMC = new char[] {'E', 'M', 'C'}; + /* + * Compatibility + */ + public static final char[] BX = new char[] {'B', 'X'}; + public static final char[] EX = new char[] {'E', 'X'}; + + private PdfOperators() { + // Static class + } +} diff --git a/src/main/java/com/itextpdf/rups/view/Language.java b/src/main/java/com/itextpdf/rups/view/Language.java index b0805779..e01daaa9 100644 --- a/src/main/java/com/itextpdf/rups/view/Language.java +++ b/src/main/java/com/itextpdf/rups/view/Language.java @@ -95,6 +95,7 @@ public enum Language { ERROR_BUILDING_CONTENT_STREAM, ERROR_CANNOT_CHECK_NULL_FOR_INPUT_STREAM, ERROR_CANNOT_FIND_FILE, + ERROR_CHARACTER_ENCODING, ERROR_CLOSING_STREAM, ERROR_COMPARE_DOCUMENT_CREATION, ERROR_COMPARED_DOCUMENT_CLOSED, @@ -196,6 +197,21 @@ public enum Language { PAGE_NUMBER, PAGES, PAGES_TABLE_OBJECT, + + PARSER_NOT_CLOSED_ARRAY, + PARSER_NOT_CLOSED_DICTIONARY, + PARSER_NOT_CLOSED_STRING_HEX, + PARSER_NOT_CLOSED_STRING_LITERAL, + PARSER_OPERAND_TYPES_C, + PARSER_OPERAND_TYPES_H, + PARSER_OPERAND_TYPES_L, + PARSER_OPERAND_TYPES_M, + PARSER_OPERAND_TYPES_RE, + PARSER_OPERAND_TYPES_V, + PARSER_OPERAND_TYPES_Y, + PARSER_UNEXPECTED_TOKEN, + PARSER_WASTEFUL_WHITESPACE, + PDF_READING, PDF_OBJECT_TREE, PLAINTEXT, diff --git a/src/main/java/com/itextpdf/rups/view/contextmenu/InspectObjectAction.java b/src/main/java/com/itextpdf/rups/view/contextmenu/InspectObjectAction.java index 41cdd73a..b58dd194 100644 --- a/src/main/java/com/itextpdf/rups/view/contextmenu/InspectObjectAction.java +++ b/src/main/java/com/itextpdf/rups/view/contextmenu/InspectObjectAction.java @@ -45,7 +45,7 @@ This file is part of the iText (R) project. import com.itextpdf.rups.view.Language; import com.itextpdf.rups.view.icons.FrameIconUtil; import com.itextpdf.rups.view.itext.PdfTree; -import com.itextpdf.rups.view.itext.SyntaxHighlightedStreamPane; +import com.itextpdf.rups.view.itext.stream.StreamTextEditorPane; import com.itextpdf.rups.view.itext.treenodes.PdfObjectTreeNode; import javax.swing.AbstractAction; @@ -83,10 +83,10 @@ public void actionPerformed(ActionEvent e) { final PdfObjectTreeNode node = (PdfObjectTreeNode) ((PdfTree) invoker).getSelectionPath().getLastPathComponent(); - final SyntaxHighlightedStreamPane syntaxHighlightedStreamPane = new SyntaxHighlightedStreamPane(null); + final StreamTextEditorPane streamPane = new StreamTextEditorPane(null); - frame.add(syntaxHighlightedStreamPane); - syntaxHighlightedStreamPane.render(node); + frame.add(streamPane); + streamPane.render(node); final Language dialogCancel = Language.DIALOG_CANCEL; frame.getRootPane().getInputMap(JComponent.WHEN_IN_FOCUSED_WINDOW) diff --git a/src/main/java/com/itextpdf/rups/view/contextmenu/SaveToPdfStreamJTextPaneAction.java b/src/main/java/com/itextpdf/rups/view/contextmenu/SaveToPdfStreamJTextPaneAction.java index 431af854..2ae6b6ea 100644 --- a/src/main/java/com/itextpdf/rups/view/contextmenu/SaveToPdfStreamJTextPaneAction.java +++ b/src/main/java/com/itextpdf/rups/view/contextmenu/SaveToPdfStreamJTextPaneAction.java @@ -42,18 +42,18 @@ This file is part of the iText (R) project. */ package com.itextpdf.rups.view.contextmenu; -import com.itextpdf.rups.view.itext.SyntaxHighlightedStreamPane; +import com.itextpdf.rups.view.itext.stream.StreamTextEditorPane; import java.awt.event.ActionEvent; public class SaveToPdfStreamJTextPaneAction extends AbstractRupsAction { - public SaveToPdfStreamJTextPaneAction(String name, SyntaxHighlightedStreamPane invoker) { + public SaveToPdfStreamJTextPaneAction(String name, StreamTextEditorPane invoker) { super(name, invoker); } public void actionPerformed(ActionEvent event) { - final SyntaxHighlightedStreamPane pane = (SyntaxHighlightedStreamPane) invoker; + final StreamTextEditorPane pane = (StreamTextEditorPane) invoker; pane.saveToTarget(); } } diff --git a/src/main/java/com/itextpdf/rups/view/contextmenu/StreamPanelContextMenu.java b/src/main/java/com/itextpdf/rups/view/contextmenu/StreamPanelContextMenu.java index 0b72a941..6320c635 100644 --- a/src/main/java/com/itextpdf/rups/view/contextmenu/StreamPanelContextMenu.java +++ b/src/main/java/com/itextpdf/rups/view/contextmenu/StreamPanelContextMenu.java @@ -43,13 +43,13 @@ This file is part of the iText (R) project. package com.itextpdf.rups.view.contextmenu; import com.itextpdf.rups.view.Language; -import com.itextpdf.rups.view.itext.SyntaxHighlightedStreamPane; +import com.itextpdf.rups.view.itext.stream.StreamTextEditorPane; import javax.swing.Action; +import javax.swing.JComponent; import javax.swing.JMenuItem; import javax.swing.JPopupMenu; import javax.swing.JSeparator; -import javax.swing.JTextPane; import javax.swing.text.DefaultEditorKit; /** @@ -73,7 +73,7 @@ public final class StreamPanelContextMenu extends JPopupMenu { * @param textPane the text pane * @param controller the controller */ - public StreamPanelContextMenu(final JTextPane textPane, final SyntaxHighlightedStreamPane controller) { + public StreamPanelContextMenu(final JComponent textPane, final StreamTextEditorPane controller) { super(); final JMenuItem copyItem = getJMenuItem( diff --git a/src/main/java/com/itextpdf/rups/view/itext/SyntaxHighlightedStreamPane.java b/src/main/java/com/itextpdf/rups/view/itext/SyntaxHighlightedStreamPane.java deleted file mode 100644 index 282bb480..00000000 --- a/src/main/java/com/itextpdf/rups/view/itext/SyntaxHighlightedStreamPane.java +++ /dev/null @@ -1,371 +0,0 @@ -/* - This file is part of the iText (R) project. - Copyright (c) 1998-2025 Apryse Group NV - Authors: Apryse Software. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License version 3 - as published by the Free Software Foundation with the addition of the - following permission added to Section 15 as permitted in Section 7(a): - FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY - APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT - OF THIRD PARTY RIGHTS - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU Affero General Public License for more details. - You should have received a copy of the GNU Affero General Public License - along with this program; if not, see http://www.gnu.org/licenses or write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA, 02110-1301 USA, or download the license from the following URL: - http://itextpdf.com/terms-of-use/ - - The interactive user interfaces in modified source and object code versions - of this program must display Appropriate Legal Notices, as required under - Section 5 of the GNU Affero General Public License. - - In accordance with Section 7(b) of the GNU Affero General Public License, - a covered work must retain the producer line in every PDF that is created - or manipulated using iText. - - You can be released from the requirements of the license by purchasing - a commercial license. Buying such a license is mandatory as soon as you - develop commercial activities involving the iText software without - disclosing the source code of your own applications. - These activities include: offering paid services to customers as an ASP, - serving PDFs on the fly in a web application, shipping iText with a closed - source product. - - For more information, please contact iText Software Corp. at this - address: sales@itextpdf.com - */ -package com.itextpdf.rups.view.itext; - -import com.itextpdf.kernel.exceptions.PdfException; -import com.itextpdf.kernel.pdf.PdfDictionary; -import com.itextpdf.kernel.pdf.PdfName; -import com.itextpdf.kernel.pdf.PdfStream; -import com.itextpdf.kernel.pdf.xobject.PdfImageXObject; -import com.itextpdf.rups.controller.PdfReaderController; -import com.itextpdf.rups.model.LoggerHelper; -import com.itextpdf.rups.model.ObjectLoader; -import com.itextpdf.rups.model.IRupsEventListener; -import com.itextpdf.rups.view.Language; -import com.itextpdf.rups.view.contextmenu.ContextMenuMouseListener; -import com.itextpdf.rups.view.contextmenu.SaveImageAction; -import com.itextpdf.rups.view.contextmenu.StreamPanelContextMenu; -import com.itextpdf.rups.view.itext.contentstream.ContentStreamWriter; -import com.itextpdf.rups.view.itext.contentstream.StyledSyntaxDocument; -import com.itextpdf.rups.view.itext.treenodes.PdfObjectTreeNode; - -import java.awt.Toolkit; -import java.awt.event.ActionEvent; -import java.awt.event.InputEvent; -import java.awt.event.KeyEvent; -import java.awt.event.MouseEvent; -import java.awt.image.BufferedImage; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.lang.reflect.Method; -import java.nio.charset.StandardCharsets; -import javax.swing.AbstractAction; -import javax.swing.ImageIcon; -import javax.swing.JComponent; -import javax.swing.JScrollPane; -import javax.swing.JTextPane; -import javax.swing.KeyStroke; -import javax.swing.ToolTipManager; -import javax.swing.text.BadLocationException; -import javax.swing.text.SimpleAttributeSet; -import javax.swing.text.Style; -import javax.swing.text.StyleConstants; -import javax.swing.text.StyledDocument; -import javax.swing.tree.TreeNode; -import javax.swing.undo.CannotRedoException; -import javax.swing.undo.CannotUndoException; -import javax.swing.undo.UndoManager; - -public final class SyntaxHighlightedStreamPane extends JScrollPane implements IRupsEventListener { - - private static final int MAX_NUMBER_OF_EDITS = 8192; - - private static Method pdfStreamGetInputStreamMethod; - - /** - * The text pane with the content stream. - */ - private final JSyntaxPane text; - - private final StreamPanelContextMenu popupMenu; - - private PdfObjectTreeNode target; - - private final UndoManager manager; - - //Todo: Remove that field after proper application structure will be implemented. - private final PdfReaderController controller; - - private boolean editable = false; - - static { - try { - pdfStreamGetInputStreamMethod = PdfStream.class.getDeclaredMethod("getInputStream"); - pdfStreamGetInputStreamMethod.setAccessible(true); - } catch (NoSuchMethodException | SecurityException any) { - pdfStreamGetInputStreamMethod = null; - LoggerHelper.error(Language.ERROR_REFLECTION_PDF_STREAM.getString(), any, PdfReaderController.class); - } - } - - /** - * Constructs a SyntaxHighlightedStreamPane. - * - * @param controller the pdf reader controller - */ - public SyntaxHighlightedStreamPane(PdfReaderController controller) { - this.text = new JSyntaxPane(); - ToolTipManager.sharedInstance().registerComponent(text); - setViewportView(text); - this.controller = controller; - - popupMenu = new StreamPanelContextMenu(text, this); - text.setComponentPopupMenu(popupMenu); - text.addMouseListener(new ContextMenuMouseListener(popupMenu, text)); - - manager = new UndoManager(); - manager.setLimit(MAX_NUMBER_OF_EDITS); - text.getDocument().addUndoableEditListener(manager); - text.registerKeyboardAction(new UndoAction(manager), - KeyStroke.getKeyStroke(KeyEvent.VK_Z, InputEvent.CTRL_DOWN_MASK), JComponent.WHEN_FOCUSED); - text.registerKeyboardAction(new RedoAction(manager), - KeyStroke.getKeyStroke(KeyEvent.VK_Y, InputEvent.CTRL_DOWN_MASK), JComponent.WHEN_FOCUSED); - } - - /** - * Renders the content stream of a PdfObject or empties the text area. - * - * @param target the node of which the content stream needs to be rendered - */ - public void render(PdfObjectTreeNode target) { - manager.discardAllEdits(); - manager.setLimit(0); - this.target = target; - if (!(target.getPdfObject() instanceof PdfStream)) { - clearPane(); - return; - } - final PdfStream stream = (PdfStream) target.getPdfObject(); - text.setText(""); - //Check if stream is image - if (PdfName.Image.equals(stream.getAsName(PdfName.Subtype))) { - try { - //Convert byte array back to Image - if (!stream.get(PdfName.Width, false).isNumber() && !stream.get(PdfName.Height, false).isNumber()) { - return; - } - PdfImageXObject pimg = new PdfImageXObject(stream); - BufferedImage img = pimg.getBufferedImage(); - if (img == null) { - text.setText(Language.ERROR_LOADING_IMAGE.getString()); - } else { - //Show image in textpane - StyledDocument doc = (StyledDocument) text.getDocument(); - Style style = doc.addStyle("Image", null); - StyleConstants.setIcon(style, new ImageIcon(img)); - - try { - doc.insertString(doc.getLength(), Language.IGNORED_TEXT.getString(), style); - doc.insertString(doc.getLength(), "\n", SimpleAttributeSet.EMPTY); - text.insertComponent(SaveImageAction.createSaveImageButton(img)); - } catch (BadLocationException e) { - LoggerHelper.error(Language.ERROR_UNEXPECTED_EXCEPTION.getString(), e, getClass()); - } - } - } catch (IOException e) { - LoggerHelper.error(Language.ERROR_UNEXPECTED_EXCEPTION.getString(), e, getClass()); - } - setTextEditableRoutine(false); - } else if (stream.get(PdfName.Length1) != null) { - try { - setTextEditableRoutine(true); - byte[] bytes = stream.getBytes(false); - // This is binary content, so encoding doesn't really matter - text.setText(new String(bytes, StandardCharsets.ISO_8859_1)); - text.setCaretPosition(0); - } catch (com.itextpdf.io.exceptions.IOException e) { - text.setText(""); - setTextEditableRoutine(false); - } - } else { - renderGenericContentStream(stream); - } - text.repaint(); - manager.setLimit(MAX_NUMBER_OF_EDITS); - repaint(); - } - - public void saveToTarget() { - /* - * FIXME: With indirect objects with multiple references, this will - * change the tree only in one of them. - * FIXME: This doesn't change Length... - */ - manager.discardAllEdits(); - manager.setLimit(0); - if (controller != null && ((PdfDictionary) target.getPdfObject()).containsKey(PdfName.Filter)) { - controller.deleteTreeNodeDictChild(target, PdfName.Filter); - } - /* - * In the current state, stream node could contain ASN1. data, which - * is parsed and added as tree nodes. After editing, it won't be valid, - * so we must remove them. - */ - if (controller != null) { - int i = 0; - while (i < target.getChildCount()) { - final TreeNode child = target.getChildAt(i); - if (child instanceof PdfObjectTreeNode) { - ++i; - } else { - controller.deleteTreeChild(target, i); - // Will assume it being just a shift... - } - } - } - final int sizeEst = text.getText().length(); - final ByteArrayOutputStream baos = new ByteArrayOutputStream(sizeEst); - try { - new ContentStreamWriter(baos).write(text.getDocument()); - } catch (IOException e) { - LoggerHelper.error(Language.ERROR_UNEXPECTED_EXCEPTION.getString(), e, getClass()); - } - ((PdfStream) target.getPdfObject()).setData(baos.toByteArray()); - if (controller != null) { - controller.selectNode(target); - } - manager.setLimit(MAX_NUMBER_OF_EDITS); - } - - public void setEditable(boolean editable) { - this.editable = editable; - setTextEditableRoutine(editable); - } - - @Override - public void handleCloseDocument() { - clearPane(); - setEditable(false); - } - - @Override - public void handleOpenDocument(ObjectLoader loader) { - clearPane(); - setEditable(loader.getFile().isOpenedAsOwner()); - } - - private void setTextEditableRoutine(boolean editable) { - if (!this.editable) { - text.setEditable(false); - popupMenu.setSaveToStreamEnabled(false); - return; - } - - text.setEditable(editable); - if ((pdfStreamGetInputStreamMethod != null) && editable && (target != null) && - (target.getPdfObject() instanceof PdfStream)) { - try { - popupMenu.setSaveToStreamEnabled(pdfStreamGetInputStreamMethod.invoke(target.getPdfObject()) == null); - return; - } catch (Exception any) { - LoggerHelper.error(Language.ERROR_CANNOT_CHECK_NULL_FOR_INPUT_STREAM.getString(), any, getClass()); - } - } - popupMenu.setSaveToStreamEnabled(false); - } - - private void clearPane() { - target = null; - manager.discardAllEdits(); - manager.setLimit(0); - text.setText(""); - setTextEditableRoutine(false); - } - - private void renderGenericContentStream(PdfStream stream) { - final StyledSyntaxDocument doc = (StyledSyntaxDocument) text.getDocument(); - setTextEditableRoutine(true); - - byte[] bb = null; - try { - bb = stream.getBytes(); - doc.processContentStream(bb); - } catch (PdfException | com.itextpdf.io.exceptions.IOException e) { - LoggerHelper.warn(Language.ERROR_PARSING_PDF_STREAM.getString(), e, getClass()); - if (bb != null) { - text.setText(new String(bb, StandardCharsets.ISO_8859_1)); - } - } - text.setCaretPosition(0); // set the caret at the start so the panel will show the first line - } - - private static final class JSyntaxPane extends JTextPane { - - JSyntaxPane() { - super(new StyledSyntaxDocument()); - } - - StyledSyntaxDocument getStyledSyntaxDocument() { - // can't just override getDocument() because the superclass - // constructor relies on it - return (StyledSyntaxDocument) super.getDocument(); - } - - @Override - public String getToolTipText(MouseEvent ev) { - final String toolTip = getStyledSyntaxDocument() - .getToolTipAt(viewToModel2D(ev.getPoint())); - return toolTip == null ? super.getToolTipText(ev) : toolTip; - } - - @Override - public boolean getScrollableTracksViewportWidth() { - // Disable line wrapping by ensuring the text pane is never resized smaller than its preferred width - return getParent().getSize().width > getUI().getPreferredSize(this).width; - } - } - -} - - -class UndoAction extends AbstractAction { - private final UndoManager manager; - - public UndoAction(UndoManager manager) { - this.manager = manager; - } - - public void actionPerformed(ActionEvent evt) { - try { - manager.undo(); - } catch (CannotUndoException e) { - Toolkit.getDefaultToolkit().beep(); - } - } -} - -class RedoAction extends AbstractAction { - private final UndoManager manager; - - public RedoAction(UndoManager manager) { - this.manager = manager; - } - - public void actionPerformed(ActionEvent evt) { - try { - manager.redo(); - } catch (CannotRedoException e) { - Toolkit.getDefaultToolkit().beep(); - } - } -} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/StreamImagePane.java b/src/main/java/com/itextpdf/rups/view/itext/stream/StreamImagePane.java new file mode 100644 index 00000000..ae150f13 --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/StreamImagePane.java @@ -0,0 +1,73 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream; + +import java.awt.Image; +import javax.swing.Icon; +import javax.swing.ImageIcon; +import javax.swing.JLabel; +import javax.swing.JScrollPane; +import javax.swing.SwingConstants; + +/** + * Simple pane, which shows an image that can be interacted with via a context + * menu. + */ +public final class StreamImagePane extends JScrollPane { + private final JLabel label; + + public StreamImagePane() { + this.label = new JLabel(); + this.label.setVerticalAlignment(SwingConstants.TOP); + this.label.setHorizontalAlignment(SwingConstants.LEFT); + setViewportView(this.label); + } + + public void setImage(Image image) { + Icon icon = null; + if (image != null) { + icon = new ImageIcon(image); + } + label.setIcon(icon); + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/StreamPane.java b/src/main/java/com/itextpdf/rups/view/itext/stream/StreamPane.java new file mode 100644 index 00000000..2fbdacdc --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/StreamPane.java @@ -0,0 +1,135 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream; + +import com.itextpdf.rups.controller.PdfReaderController; +import com.itextpdf.rups.model.IRupsEventListener; +import com.itextpdf.rups.model.ObjectLoader; +import com.itextpdf.rups.model.PdfStreamUtil; +import com.itextpdf.rups.view.itext.treenodes.PdfObjectTreeNode; + +import java.awt.CardLayout; +import java.awt.image.BufferedImage; +import javax.swing.JComponent; +import javax.swing.JPanel; + +/** + * Pane for showing PDF stream content. + * + *+ * For images the pane shows the image itself, with the relevant image options. + *
+ * + *+ * For everything else (at the moment of writing) a syntax editor is used. + *
+ */ +public final class StreamPane extends JPanel implements IRupsEventListener { + private final StreamTextEditorPane textEditorPane; + private final StreamImagePane imagePane; + private final JPanel emptyPane; + + public StreamPane(PdfReaderController controller) { + this.textEditorPane = new StreamTextEditorPane(controller); + this.textEditorPane.setVisible(false); + this.imagePane = new StreamImagePane(); + this.imagePane.setVisible(false); + this.emptyPane = new JPanel(); + this.emptyPane.setVisible(true); + + setLayout(new CardLayout()); + add(this.textEditorPane); + add(this.imagePane); + add(this.emptyPane); + } + + public void render(PdfObjectTreeNode target) { + if (target == null || !target.isStream()) { + showPane(emptyPane); + return; + } + final BufferedImage image = PdfStreamUtil.getAsImage(target.getAsStream()); + if (image != null) { + imagePane.setImage(image); + showPane(imagePane); + return; + } + textEditorPane.render(target); + showPane(textEditorPane); + } + + @Override + public void handleCloseDocument() { + showPane(emptyPane); + textEditorPane.handleCloseDocument(); + } + + @Override + public void handleOpenDocument(ObjectLoader loader) { + showPane(emptyPane); + textEditorPane.handleOpenDocument(loader); + } + + private void showPane(JComponent pane) { + assert pane != null; + + showImagePane(imagePane == pane); + showTextEditorPane(textEditorPane == pane); + emptyPane.setVisible(emptyPane == pane); + validate(); + } + + private void showImagePane(boolean flag) { + imagePane.setVisible(flag); + if (!flag) { + imagePane.setImage(null); + } + } + + private void showTextEditorPane(boolean flag) { + textEditorPane.setVisible(flag); + if (!flag) { + textEditorPane.render(null); + } + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/StreamTextEditorPane.java b/src/main/java/com/itextpdf/rups/view/itext/stream/StreamTextEditorPane.java new file mode 100644 index 00000000..b9186fc4 --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/StreamTextEditorPane.java @@ -0,0 +1,358 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream; + +import com.itextpdf.kernel.pdf.PdfDictionary; +import com.itextpdf.kernel.pdf.PdfName; +import com.itextpdf.kernel.pdf.PdfObject; +import com.itextpdf.kernel.pdf.PdfStream; +import com.itextpdf.rups.controller.PdfReaderController; +import com.itextpdf.rups.model.IRupsEventListener; +import com.itextpdf.rups.model.LoggerHelper; +import com.itextpdf.rups.model.ObjectLoader; +import com.itextpdf.rups.model.contentstream.ParseTreeNode; +import com.itextpdf.rups.model.contentstream.ParseTreeNodeType; +import com.itextpdf.rups.model.contentstream.PdfContentStreamParser; +import com.itextpdf.rups.view.Language; +import com.itextpdf.rups.view.contextmenu.StreamPanelContextMenu; +import com.itextpdf.rups.view.itext.stream.editor.PdfSyntaxTextArea; +import com.itextpdf.rups.view.itext.treenodes.PdfObjectTreeNode; + +import java.awt.BorderLayout; +import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; +import javax.swing.JPanel; +import javax.swing.tree.TreeNode; +import org.fife.ui.rsyntaxtextarea.ErrorStrip; +import org.fife.ui.rsyntaxtextarea.SyntaxConstants; +import org.fife.ui.rtextarea.ExpandedFoldRenderStrategy; +import org.fife.ui.rtextarea.RTextScrollPane; + +public final class StreamTextEditorPane extends JPanel implements IRupsEventListener { + /** + * Char buffer with a single LF character. + */ + private static final char[] LF_TEXT = {'\n'}; + /** + * Max text line width after which it will be forcefully split. + */ + private static final int MAX_LINE_LENGTH = 2048; + + private static final Method GET_INPUT_STREAM_METHOD; + + private final RTextScrollPane textScrollPane; + private final StreamPanelContextMenu popupMenu; + + //Todo: Remove that field after proper application structure will be implemented. + private final PdfReaderController controller; + private PdfObjectTreeNode target; + private boolean editable = false; + + static { + /* + * There doesn't seem to be a good way to detect, whether you can call + * setData on a PdfStream or not in advance. It cannot be called if a + * PDF stream was created from an InputStream, so we will be testing + * that via the protected method. + */ + try { + GET_INPUT_STREAM_METHOD = PdfStream.class.getDeclaredMethod("getInputStream"); + GET_INPUT_STREAM_METHOD.setAccessible(true); + } catch (NoSuchMethodException | SecurityException e) { + throw new IllegalStateException(e); + } + } + + /** + * Constructs a SyntaxHighlightedStreamPane. + * + * @param controller the pdf reader controller + */ + public StreamTextEditorPane(PdfReaderController controller) { + super(new BorderLayout()); + this.controller = controller; + + final PdfSyntaxTextArea textArea = new PdfSyntaxTextArea(); + this.textScrollPane = new RTextScrollPane(textArea); + // This will make sure, that the arrow for folding code blocks are + // always visible + this.textScrollPane.getGutter().setExpandedFoldRenderStrategy( + ExpandedFoldRenderStrategy.ALWAYS + ); + add(this.textScrollPane); + + final ErrorStrip errorStrip = new ErrorStrip(textArea); + add(errorStrip, BorderLayout.LINE_END); + + popupMenu = new StreamPanelContextMenu(getTextArea(), this); + // TODO: Augment existing menu with our own options +// getTextArea().setComponentPopupMenu(popupMenu); +// getTextArea().addMouseListener(new ContextMenuMouseListener(popupMenu, getTextArea())); + } + + public PdfSyntaxTextArea getTextArea() { + return (PdfSyntaxTextArea) textScrollPane.getTextArea(); + } + + /** + * Renders the content stream of a PdfObject or empties the text area. + * + * @param target the node of which the content stream needs to be rendered + */ + public void render(PdfObjectTreeNode target) { + getTextArea().discardAllEdits(); + this.target = target; + final PdfStream stream = getTargetStream(); + if (stream == null) { + clearPane(); + return; + } + + // Assuming that this will stop parsing for a moment... + getTextArea().setVisible(false); + String textToSet; + String styleToSet; + boolean editableToSet; + /* + * TODO: Differentiate between different content. See below. + * + * Images should be rendered as images (this was before the syntax + * highlight changes). Or at least as hex binary data. + * + * Fonts, binary XMP or just random binary data should be displayed + * as hex. + * + * XML data should be edited as XML with proper encoding and saved + * as such. + * + * Only PDF content streams should be altered and parsed in a custom + * way. + */ + try { + if (isFont(stream) || isImage(stream)) { + textToSet = getText(stream, false); + styleToSet = PdfSyntaxTextArea.SYNTAX_STYLE_BINARY; + editableToSet = false; + } else { + textToSet = prepareContentStreamText(getText(stream, true)); + styleToSet = PdfSyntaxTextArea.SYNTAX_STYLE_PDF; + editableToSet = true; + } + setTextEditableRoutine(true); + } catch (RuntimeException e) { + LoggerHelper.error(Language.ERROR_UNEXPECTED_EXCEPTION.getString(), e, getClass()); + textToSet = ""; + styleToSet = SyntaxConstants.SYNTAX_STYLE_NONE; + editableToSet = false; + } + getTextArea().setSyntaxEditingStyle(styleToSet); + getTextArea().setText(textToSet); + getTextArea().setCaretPosition(0); + getTextArea().discardAllEdits(); + setTextEditableRoutine(editableToSet); + getTextArea().setVisible(true); + + repaint(); + } + + public void saveToTarget() { + /* + * FIXME: With indirect objects with multiple references, this will + * change the tree only in one of them. + * FIXME: This doesn't change Length... + */ + if (controller != null && ((PdfDictionary) target.getPdfObject()).containsKey(PdfName.Filter)) { + controller.deleteTreeNodeDictChild(target, PdfName.Filter); + } + /* + * In the current state, stream node could contain ASN1. data, which + * is parsed and added as tree nodes. After editing, it won't be valid, + * so we must remove them. + */ + if (controller != null) { + int i = 0; + while (i < target.getChildCount()) { + final TreeNode child = target.getChildAt(i); + if (child instanceof PdfObjectTreeNode) { + ++i; + } else { + controller.deleteTreeChild(target, i); + // Will assume it being just a shift... + } + } + } + final byte[] streamData = getTextArea().getText().getBytes(StandardCharsets.ISO_8859_1); + getTargetStream().setData(streamData); + if (controller != null) { + controller.selectNode(target); + } + } + + public void setEditable(boolean editable) { + this.editable = editable; + setTextEditableRoutine(editable); + } + + @Override + public void handleCloseDocument() { + clearPane(); + setEditable(false); + } + + @Override + public void handleOpenDocument(ObjectLoader loader) { + clearPane(); + setEditable(loader.getFile().isOpenedAsOwner()); + } + + private void setTextEditableRoutine(boolean editable) { + // If pane is read-only or in a temp read-only state + if (!this.editable || !editable) { + getTextArea().setEditable(false); + popupMenu.setSaveToStreamEnabled(false); + return; + } + + getTextArea().setEditable(true); + final PdfStream targetStream = getTargetStream(); + if (targetStream != null) { + popupMenu.setSaveToStreamEnabled(isStreamEditable(targetStream)); + } else { + popupMenu.setSaveToStreamEnabled(false); + } + } + + private PdfStream getTargetStream() { + if (target == null) { + return null; + } + final PdfObject obj = target.getPdfObject(); + if (obj instanceof PdfStream) { + return (PdfStream) obj; + } + return null; + } + + private void clearPane() { + target = null; + getTextArea().setText(""); + getTextArea().discardAllEdits(); + setTextEditableRoutine(false); + } + + /** + * Modifies the PDF content stream text to make it suitable for usage in + * a code editor. + * + *+ * At the moment this just splits lines after operators, if lines are too + * long. If the are long lines in the code editor, is is noticeably + * laggier. + *
+ * + * @param originalText PDF content stream text to modify. + * + * @return Modified PDF content stream text. + */ + private static String prepareContentStreamText(String originalText) { + boolean hasOnlyShortLines = true; + int startIndex = 0; + while (startIndex < originalText.length()) { + int lineFeedIndex = originalText.indexOf('\n', startIndex); + if (lineFeedIndex == -1) { + lineFeedIndex = originalText.length(); + } + final int length = lineFeedIndex - startIndex; + if (length > MAX_LINE_LENGTH) { + hasOnlyShortLines = false; + break; + } + startIndex = lineFeedIndex + 1; + } + if (hasOnlyShortLines) { + return originalText; + } + + /* + * TODO: Make this logic smarter. + * + * At the moment if lines are too big, we just replace all whitespace + * after an operator with LF. This is not ideal and destructive. This + * was prompted by a document, where lines were denoted with just CR + * and the text area does not treat them as end-of-line indicators. + */ + final ParseTreeNode tree = PdfContentStreamParser.parse(originalText); + ParseTreeNode child = tree.getFirstChild(); + while (child != null) { + if (child.getType() == ParseTreeNodeType.OPERATOR) { + ParseTreeNode next = child.getNext(); + while (next != null && next.getType() == ParseTreeNodeType.WHITESPACE) { + next = next.remove(); + } + child = child.addNext(ParseTreeNodeType.WHITESPACE, LF_TEXT, 0, LF_TEXT.length); + } + child = child.getNext(); + } + return tree.getFullText(); + } + + private static String getText(PdfStream stream, boolean decoded) { + return new String(stream.getBytes(decoded), StandardCharsets.ISO_8859_1); + } + + private static boolean isImage(PdfStream stream) { + return PdfName.Image.equals(stream.getAsName(PdfName.Subtype)); + } + + private static boolean isFont(PdfStream stream) { + return stream.get(PdfName.Length1) != null; + } + + private static boolean isStreamEditable(PdfStream stream) { + try { + return (GET_INPUT_STREAM_METHOD.invoke(stream) == null); + } catch (ReflectiveOperationException e) { + throw new IllegalStateException(e); + } + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/editor/AbstractPainterAwareTokenMaker.java b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/AbstractPainterAwareTokenMaker.java new file mode 100644 index 00000000..7cdde3df --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/AbstractPainterAwareTokenMaker.java @@ -0,0 +1,119 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream.editor; + +import org.fife.ui.rsyntaxtextarea.TokenMakerBase; + +/** + * Base class for our custom token makers. + * + *+ * This class really wants to just implement TokenMaker, as {@code firstToken}, + * {@code currentToken}, {@code previousToken} and {@code tokenFactory} from + * {@link TokenMakerBase} are of no use here. But just implementing the + * interface would force us to copy a lot of code from + * the library, and, for some reason {@code DefaultOccurrenceMarker} is marked + * as package-private, so we would need to reimplement that as well. + *
+ * + *+ * So, at the moment, these fields from TokenMakerBase should be ignored. For + * token manipulation, {@code firstRupsToken} and {@code lastRupsToken} should + * be used instead. + *
+ * + *+ * This class is expected to be used with a text area, which has a + * {@link Latin1Filter} on the underlying document. This is used as a way to + * represent a byte stream as a string. + *
+ */ +public abstract class AbstractPainterAwareTokenMaker extends TokenMakerBase { + /** + * First token in the output token list. Should be used instead of + * {@code firstToken}. + */ + protected PainterAwareToken firstRupsToken = null; + /** + * Last token in the output token list. Should be used instead of + * {@code lastToken}. + */ + protected PainterAwareToken lastRupsToken = null; + + @Override + public void addNullToken() { + final PainterAwareToken token = new PainterAwareToken(); + token.setLanguageIndex(getLanguageIndex()); + addToken(token); + } + + @Override + public void addToken(char[] array, int start, int end, int tokenType, int startOffset, boolean hyperlink) { + final PainterAwareToken token = new PainterAwareToken( + array, start, end, startOffset, tokenType, getLanguageIndex() + ); + token.setHyperlink(hyperlink); + addToken(token); + } + + @Override + protected void resetTokenList() { + firstRupsToken = null; + lastRupsToken = null; + super.resetTokenList(); + } + + /** + * Appends a PdfToken to the output token list. + * + * @param token Token to append. + */ + protected void addToken(PainterAwareToken token) { + if (firstRupsToken == null) { + firstRupsToken = token; + } else { + lastRupsToken.setNextToken(token); + } + lastRupsToken = token; + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/editor/BinaryTokenMaker.java b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/BinaryTokenMaker.java new file mode 100644 index 00000000..152a3dd8 --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/BinaryTokenMaker.java @@ -0,0 +1,62 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream.editor; + +import javax.swing.text.Segment; +import org.fife.ui.rsyntaxtextarea.Token; +import org.fife.ui.rsyntaxtextarea.TokenTypes; + +public final class BinaryTokenMaker extends AbstractPainterAwareTokenMaker { + @Override + public boolean getMarkOccurrencesOfTokenType(int type) { + return false; + } + + @Override + public Token getTokenList(Segment text, int initialTokenType, int startOffset) { + resetTokenList(); + addToken(text, text.offset, text.offset + text.count, TokenTypes.IDENTIFIER, startOffset); + addNullToken(); + return firstRupsToken; + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/editor/CustomConfigurableCaret.java b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/CustomConfigurableCaret.java new file mode 100644 index 00000000..a8fe891a --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/CustomConfigurableCaret.java @@ -0,0 +1,75 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream.editor; + +import java.awt.event.FocusEvent; +import org.fife.ui.rtextarea.ConfigurableCaret; + +/** + * Our custom {@link ConfigurableCaret}, which remains visible, if the text + * area is not editable. + */ +public final class CustomConfigurableCaret extends ConfigurableCaret { + private static final int DEFAULT_BLINK_RATE = 500; + + public CustomConfigurableCaret() { + /* + * The situation is a bit odd. Usually a caret is created via the UI + * class, and then the blink rate is set manually in that class after + * creation based on some component properties. + * + * But what it also means is that if you replace the caret in a text + * area afterward, it will not blink, even though it is the default + * behavior. So for simplicity we will set it here. + */ + setBlinkRate(DEFAULT_BLINK_RATE); + } + + @Override + public void focusGained(FocusEvent e) { + super.focusGained(e); + if (getComponent().isEnabled()) { + setVisible(true); + } + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/editor/Latin1Filter.java b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/Latin1Filter.java new file mode 100644 index 00000000..555cf827 --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/Latin1Filter.java @@ -0,0 +1,180 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream.editor; + +import com.itextpdf.kernel.exceptions.PdfException; +import com.itextpdf.rups.view.Language; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.StandardCharsets; +import javax.swing.text.AttributeSet; +import javax.swing.text.BadLocationException; +import javax.swing.text.DocumentFilter; + +/** + * A document filter, which retains Latin-1 characters as-is, and for all + * others returns a "byte equivalent" UTF-8 encoding of characters. + * + *+ * This is, pretty much, a hack to allow working with a PDF byte stream as a + * char stream with trivial conversions. At all points in time the document + * characters will have codepoints in a 0-255 range and can be freely encoded + * as Latin-1. + *
+ * + *+ * Under this filter, if you type, for example, "į" (U+012F), you would get + * "į" instead (U+00C4 U+00AF, which is the UTF-8 encoding of the symbol, + * where each byte is padded to two bytes). + *
+ */ +public final class Latin1Filter extends DocumentFilter { + /** + * Pre-allocated output buffer for the UTF-8 character encoder. + */ + private final ByteBuffer utf8CharBuffer = ByteBuffer.allocate(4); + + @Override + public void insertString(FilterBypass fb, int offset, String string, AttributeSet attr) + throws BadLocationException { + super.insertString(fb, offset, generateSubstitute(string), attr); + } + + @Override + public void replace(FilterBypass fb, int offset, int length, String text, AttributeSet attrs) + throws BadLocationException { + super.replace(fb, offset, length, generateSubstitute(text), attrs); + } + + private String generateSubstitute(String original) { + /* + * If text is encodable in Latin-1, just return the string as-is. + * This is a very common case, as the majority of PDF content streams + * contains just ASCII text, so a separate branch at the start should + * be worth it to avoid any allocations. + */ + int index = getNonLatin1Index(original); + if (index >= original.length()) { + return original; + } + /* + * Otherwise we build a substitute string, where non-Latin-1 chars + * are replaced with UTF-8 "bytes". We will assume there is only + * one inconvenient character for pre-allocation (thus +3). + */ + final CharsetEncoder utf8Encoder = StandardCharsets.UTF_8.newEncoder(); + final StringBuilder substitute = initStringBuilder(original, index); + while (index < original.length()) { + /* + * Encoding 1 non-Latin-1 code point first. + */ + final char ch = original.charAt(index); + utf8CharBuffer.clear(); + int end = index + 1; + if (Character.isHighSurrogate(ch) && end < original.length()) { + ++end; + } + final CharBuffer encoderInput = CharBuffer.wrap(original, index, end); + final CoderResult result = utf8Encoder.encode(encoderInput, utf8CharBuffer, true); + if (!result.isUnderflow()) { + throwException(result); + } + for (int j = 0; j < utf8CharBuffer.position(); ++j) { + substitute.append((char) (utf8CharBuffer.get(j) & 0xFF)); + } + /* + * At the end append the possible remaining Latin-1 part. + */ + index = getNonLatin1Index(original, end); + substitute.append(original, end, index); + } + return substitute.toString(); + } + + private static void throwException(CoderResult cr) { + try { + cr.throwException(); + } catch (CharacterCodingException e) { + throw new PdfException(Language.ERROR_CHARACTER_ENCODING.getString(), e); + } + } + + private static int getNonLatin1Index(CharSequence cs) { + return getNonLatin1Index(cs, 0); + } + + private static int getNonLatin1Index(CharSequence cs, int start) { + int index = start; + while (index < cs.length() && isLatin1(cs.charAt(index))) { + ++index; + } + return index; + } + + private static boolean isLatin1(char c) { + return c <= '\u00FF'; + } + + private static StringBuilder initStringBuilder(String str, int nonLatin1Index) { + int capacity = nonLatin1Index; + final int suffixLength = str.length() - nonLatin1Index; + /* + * For small enough strings just assume the worst case scenario and + * allocate 4 "bytes" for each char in suffix. Otherwise, just do + * something more conservative like 1.25 "bytes" per char. + */ + if (suffixLength <= 1024) { + capacity += (4 * suffixLength); + } else { + capacity += (5 * suffixLength / 4); + } + final StringBuilder sb = new StringBuilder(capacity); + // Immediately add the Latin-1 part + sb.append(str, 0, nonLatin1Index); + return sb; + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/editor/PainterAwareToken.java b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/PainterAwareToken.java new file mode 100644 index 00000000..d723039d --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/PainterAwareToken.java @@ -0,0 +1,230 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream.editor; + +import java.awt.Rectangle; +import java.lang.reflect.Method; +import javax.swing.text.Segment; +import javax.swing.text.TabExpander; +import org.fife.ui.rsyntaxtextarea.RSyntaxTextArea; +import org.fife.ui.rsyntaxtextarea.Token; +import org.fife.ui.rsyntaxtextarea.TokenImpl; +import org.fife.ui.rsyntaxtextarea.TokenPainter; + +/** + * {@link Token} implementation, which respect the painter of the text area, + * when calculating lengths and offsets. + * + *+ * Overridden code is heavily inspired by the original implementation + * in {@link TokenImpl}. + *
+ */ +public final class PainterAwareToken extends TokenImpl { + /* + * For some reason caret positioning logic in RSyntaxTextArea does not + * take the painter into the account. It calls methods within the Token + * interface, which try to calculate the text width on its own. + * + * This works fine in the default configuration, when both painter and + * TokenImpl has the same logic to calculate text width (i.e. just + * rendering text as-is). But since we want to show non-ASCII symbols + * differently, this no longer works. + * + * Ideally, we should just reuse methods in the Painter to calculate + * widths of what will be drawn. And this would work, but for some reason + * RSyntaxTextArea#getTokenPainter is declared package-private and cannot + * be accessed by a custom implementation. + * + * So we have a nasty reflection here for the time being to get access to + * that painter instead of hardcoding our own here. + */ + private static final Method GET_TOKEN_PAINTER_METHOD; + + static { + try { + GET_TOKEN_PAINTER_METHOD = RSyntaxTextArea.class.getDeclaredMethod("getTokenPainter"); + GET_TOKEN_PAINTER_METHOD.setAccessible(true); + } catch (NoSuchMethodException | SecurityException e) { + throw new IllegalStateException(e); + } + } + + public PainterAwareToken() { + } + + public PainterAwareToken(Segment line, int beg, int end, int startOffset, int type, int languageIndex) { + super(line, beg, end, startOffset, type, languageIndex); + } + + public PainterAwareToken(char[] line, int beg, int end, int startOffset, int type, int languageIndex) { + super(line, beg, end, startOffset, type, languageIndex); + } + + public PainterAwareToken(Token t2) { + super(t2); + } + + @Override + public int getListOffset(RSyntaxTextArea textArea, TabExpander e, float x0, float x) { + int offset = getOffset(); + + // If the coordinate in question is before this line's start, quit. + if (x0 >= x) { + return offset; + } + + final TokenPainter painter = getTokenPainter(textArea); + Token token = this; + float startX = x0; + float avgWidthPerChar = 0; + while (token != null && token.isPaintable()) { + final float endX = painter.nextX(token, token.length(), startX, textArea, e); + // Found the token for the offset + if (x < endX) { + avgWidthPerChar = (endX - startX) / token.length(); + break; + } + startX = endX; + offset += token.length(); + token = token.getNextToken(); + } + + // If we didn't find anything, return the end position of the text. + if (token == null || !token.isPaintable()) { + return offset; + } + + // Search for the char offset now + final int hint = (int) ((x - startX) / avgWidthPerChar); + final int charCount = getCharCountBeforeX(textArea, e, painter, token, startX, x, hint); + offset += charCount; + + // Checking if closer to next char + if (charCount < token.length()) { + final float prevX = painter.nextX(token, charCount, startX, textArea, e); + final float nextX = painter.nextX(token, charCount + 1, startX, textArea, e); + if ((x - prevX) > (nextX - x)) { + ++offset; + } + } + + return offset; + } + + @Override + public int getOffsetBeforeX(RSyntaxTextArea textArea, TabExpander e, float startX, float endBeforeX) { + final int textLength = length(); + // Same as in TokenImpl, 1 length token always fit to avoid inf loop + if (textLength <= 1) { + return getOffset(); + } + final TokenPainter painter = getTokenPainter(textArea); + final int charCount = getCharCountBeforeX(textArea, e, painter, this, startX, endBeforeX, 2); + return getOffset() + charCount - 1; + } + + @Override + public float getWidthUpTo(int numChars, RSyntaxTextArea textArea, TabExpander e, float x0) { + final TokenPainter painter = getTokenPainter(textArea); + return painter.nextX(this, numChars, x0, textArea, e) - x0; + } + + @Override + public Rectangle listOffsetToView(RSyntaxTextArea textArea, TabExpander e, int pos, int x0, Rectangle rect) { + final TokenPainter painter = getTokenPainter(textArea); + Token token = this; + float startX = x0; + while (token != null && token.isPaintable()) { + if (token.containsPosition(pos)) { + final int charOffset = pos - token.getOffset(); + final float endX = painter.nextX(token, charOffset + 1, startX, textArea, e); + if (charOffset > 0) { + startX = painter.nextX(token, charOffset, startX, textArea, e); + } + rect.x = (int) startX; + rect.width = (int) (endX - startX); + return rect; + } + startX = painter.nextX(token, token.length(), startX, textArea, e); + token = token.getNextToken(); + } + + // If we didn't find anything, we're at the end of the line. Return + // a width of 1 (so selection highlights don't extend way past line's + // text). A ConfigurableCaret will know to paint itself with a larger + // width. + rect.x = (int) startX; + rect.width = 1; + return rect; + } + + private static int getCharCountBeforeX(RSyntaxTextArea textArea, TabExpander e, TokenPainter painter, + Token token, float startX, float endBeforeX, int hint) { + final float width = endBeforeX - startX; + int left = 0; + int right = token.length(); + int current = Math.max(1, Math.min(hint, token.length())); + while (left < right) { + final float x = painter.nextX(token, current, startX, textArea, e); + final float avgWidthPerChar = (x - startX) / current; + final int expectedCharCount = (int) (width / avgWidthPerChar); + if (x <= endBeforeX) { + left = current; + current = Math.min(expectedCharCount + 1, right); + } else { + right = current - 1; + current = Math.max(expectedCharCount, left); + } + } + return left; + } + + private static TokenPainter getTokenPainter(RSyntaxTextArea host) { + try { + return (TokenPainter) GET_TOKEN_PAINTER_METHOD.invoke(host); + } catch (ReflectiveOperationException e) { + throw new IllegalStateException(e); + } + } +} diff --git a/src/main/java/com/itextpdf/rups/view/itext/stream/editor/PdfFoldParser.java b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/PdfFoldParser.java new file mode 100644 index 00000000..7cc36379 --- /dev/null +++ b/src/main/java/com/itextpdf/rups/view/itext/stream/editor/PdfFoldParser.java @@ -0,0 +1,224 @@ +/* + This file is part of the iText (R) project. + Copyright (c) 1998-2025 Apryse Group NV + Authors: Apryse Software. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License version 3 + as published by the Free Software Foundation with the addition of the + following permission added to Section 15 as permitted in Section 7(a): + FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY + APRYSE GROUP. APRYSE GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT + OF THIRD PARTY RIGHTS + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program; if not, see http://www.gnu.org/licenses or write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA, 02110-1301 USA, or download the license from the following URL: + http://itextpdf.com/terms-of-use/ + + The interactive user interfaces in modified source and object code versions + of this program must display Appropriate Legal Notices, as required under + Section 5 of the GNU Affero General Public License. + + In accordance with Section 7(b) of the GNU Affero General Public License, + a covered work must retain the producer line in every PDF that is created + or manipulated using iText. + + You can be released from the requirements of the license by purchasing + a commercial license. Buying such a license is mandatory as soon as you + develop commercial activities involving the iText software without + disclosing the source code of your own applications. + These activities include: offering paid services to customers as an ASP, + serving PDFs on the fly in a web application, shipping iText with a closed + source product. + + For more information, please contact iText Software Corp. at this + address: sales@itextpdf.com + */ +package com.itextpdf.rups.view.itext.stream.editor; + +import com.itextpdf.rups.model.LoggerHelper; +import com.itextpdf.rups.model.contentstream.ParseTreeNode; +import com.itextpdf.rups.model.contentstream.ParseTreeNodeType; +import com.itextpdf.rups.model.contentstream.PdfContentStreamParser; +import com.itextpdf.rups.model.contentstream.PdfOperators; +import com.itextpdf.rups.view.Language; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.Iterator; +import java.util.List; +import javax.swing.text.BadLocationException; +import org.fife.ui.rsyntaxtextarea.RSyntaxTextArea; +import org.fife.ui.rsyntaxtextarea.folding.Fold; +import org.fife.ui.rsyntaxtextarea.folding.FoldParser; +import org.fife.ui.rsyntaxtextarea.folding.FoldType; + +/** + * Fold parser for handling PDF content streams. + */ +public final class PdfFoldParser implements FoldParser { + /** + * Default size to use for the marker stack. + */ + private static final int DEFAULT_MARKER_STACK_SIZE = 8; + /** + * Marker for a marked content sequence fold. + */ + private static final Object MARKED_CONTENT = new Object(); + /** + * Marked for a text object block fold. + */ + private static final Object TEXT_OBJECT = new Object(); + + /** + * Pre-allocated content stream parser. + */ + private final PdfContentStreamParser parser = new PdfContentStreamParser(); + + @Override + public List