Skip to content

Commit 85cf9f1

Browse files
committed
WIP: Fix of XmpUtilReader
1 parent 4de8825 commit 85cf9f1

File tree

1 file changed

+15
-44
lines changed

1 file changed

+15
-44
lines changed

src/main/java/org/jabref/logic/xmp/XmpUtilReader.java

+15-44
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
package org.jabref.logic.xmp;
22

3-
import java.io.ByteArrayInputStream;
43
import java.io.IOException;
4+
import java.io.InputStream;
55
import java.nio.file.Path;
6-
import java.util.ArrayList;
6+
import java.util.LinkedHashSet;
77
import java.util.List;
8-
import java.util.Optional;
8+
import java.util.SequencedCollection;
99

1010
import org.jabref.model.entry.BibEntry;
1111
import org.jabref.model.entry.LinkedFile;
@@ -63,33 +63,26 @@ public List<BibEntry> readXmp(Path path, XmpPreferences xmpPreferences) throws I
6363
}
6464

6565
public List<BibEntry> readXmp(Path path, PDDocument document, XmpPreferences xmpPreferences) {
66-
List<BibEntry> result = new ArrayList<>();
66+
SequencedCollection<BibEntry> result = new LinkedHashSet<>();
6767

68-
List<XMPMetadata> xmpMetaList = getXmpMetadata(document);
68+
// We add PDDocumentInformation in call cases
69+
PDDocumentInformation documentInformation = document.getDocumentInformation();
70+
new DocumentInformationExtractor(documentInformation).extractBibtexEntry().ifPresent(result::add);
6971

72+
List<XMPMetadata> xmpMetaList = getXmpMetadata(document);
7073
if (!xmpMetaList.isEmpty()) {
7174
// Only support Dublin Core since JabRef 4.2
7275
for (XMPMetadata xmpMeta : xmpMetaList) {
7376
DublinCoreSchema dcSchema = DublinCoreSchemaCustom.copyDublinCoreSchema(xmpMeta.getDublinCoreSchema());
7477
if (dcSchema != null) {
7578
DublinCoreExtractor dcExtractor = new DublinCoreExtractor(dcSchema, xmpPreferences, new BibEntry());
76-
Optional<BibEntry> entry = dcExtractor.extractBibtexEntry();
77-
entry.ifPresent(result::add);
79+
dcExtractor.extractBibtexEntry().ifPresent(result::add);
7880
}
7981
}
8082
}
8183

82-
if (result.isEmpty()) {
83-
// If we did not find any XMP metadata, search for non XMP metadata
84-
PDDocumentInformation documentInformation = document.getDocumentInformation();
85-
DocumentInformationExtractor diExtractor = new DocumentInformationExtractor(documentInformation);
86-
Optional<BibEntry> entry = diExtractor.extractBibtexEntry();
87-
entry.ifPresent(result::add);
88-
}
89-
9084
result.forEach(entry -> entry.addFile(new LinkedFile("", path.toAbsolutePath(), "PDF")));
91-
92-
return result;
85+
return result.stream().toList();
9386
}
9487

9588
/**
@@ -108,34 +101,12 @@ private List<XMPMetadata> getXmpMetadata(PDDocument document) {
108101
return List.of();
109102
}
110103

111-
List<XMPMetadata> metaList = new ArrayList<>();
112-
113-
String xmp = metaRaw.getCOSObject().toTextString();
114-
115-
int startDescriptionSection = xmp.indexOf(START_TAG);
116-
int endDescriptionSection = xmp.lastIndexOf(END_TAG) + END_TAG.length();
117-
118-
if ((startDescriptionSection < 0) || (startDescriptionSection > endDescriptionSection) || (endDescriptionSection == (END_TAG.length() - 1))) {
119-
return metaList;
120-
}
121-
122-
// XML header for the xmpDomParser
123-
String start = xmp.substring(0, startDescriptionSection);
124-
// descriptionArray - mid part of the textual metadata
125-
String[] descriptionsArray = xmp.substring(startDescriptionSection, endDescriptionSection).split(END_TAG);
126-
// XML footer for the xmpDomParser
127-
String end = xmp.substring(endDescriptionSection);
128-
129-
for (String s : descriptionsArray) {
130-
// END_TAG is appended, because of the split operation above
131-
String xmpMetaString = start + s + END_TAG + end;
132-
try {
133-
metaList.add(XmpUtilShared.parseXmpMetadata(new ByteArrayInputStream(xmpMetaString.getBytes())));
134-
} catch (IOException ex) {
135-
LOGGER.debug("Problem parsing XMP schema. Continuing with other schemas.", ex);
136-
}
104+
try (InputStream is = metaRaw.exportXMPMetadata()) {
105+
return List.of(XmpUtilShared.parseXmpMetadata(is));
106+
} catch (IOException e) {
107+
LOGGER.debug("Problem parsing XMP metadata.", e);
108+
return List.of();
137109
}
138-
return metaList;
139110
}
140111

141112
/**

0 commit comments

Comments
 (0)