Skip to content

Commit a597eb6

Browse files
committed
TIKA-2873 -- workaround for newly re-discovered bug in POI's ChunkedCipherInputStream - bug 63431
1 parent dd17e10 commit a597eb6

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,11 @@ protected void parse(
224224
// Decrypt the OLE2 stream, and delegate the resulting OOXML
225225
// file to the regular OOXML parser for normal handling
226226
OOXMLParser parser = new OOXMLParser();
227-
228-
parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
229-
new BodyContentHandler(xhtml)),
230-
metadata, context);
227+
try (TikaInputStream tis = TikaInputStream.get(d.getDataStream(root))) {
228+
parser.parse(tis, new EmbeddedContentHandler(
229+
new BodyContentHandler(xhtml)),
230+
metadata, context);
231+
}
231232
} catch (GeneralSecurityException ex) {
232233
throw new EncryptedDocumentException(ex);
233234
}

tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java

+6
Original file line numberDiff line numberDiff line change
@@ -1215,12 +1215,18 @@ public void testPPTXThumbnail() throws Exception {
12151215
@Test
12161216
public void testEncrypted() throws Exception {
12171217
Map<String, String> tests = new HashMap<String, String>();
1218+
//the first three contain javax.crypto.CipherInputStream
12181219
tests.put("testWORD_protected_passtika.docx",
12191220
"This is an encrypted Word 2007 File");
12201221
tests.put("testPPT_protected_passtika.pptx",
12211222
"This is an encrypted PowerPoint 2007 slide.");
12221223
tests.put("testEXCEL_protected_passtika.xlsx",
12231224
"This is an Encrypted Excel spreadsheet.");
1225+
//TIKA-2873 this one contains a ChunkedCipherInputStream
1226+
//that is buggy at the POI level...can unwrap TikaInputStream in OfficeParser
1227+
//once https://bz.apache.org/bugzilla/show_bug.cgi?id=63431 is fixed.
1228+
tests.put("testEXCEL_protected_passtika_2.xlsx",
1229+
"This is an Encrypted Excel spreadsheet with a ChunkedCipherInputStream.");
12241230

12251231
Parser parser = new AutoDetectParser();
12261232
Metadata m = new Metadata();

0 commit comments

Comments
 (0)