1
1
package org .jabref .logic .xmp ;
2
2
3
- import java .io .ByteArrayInputStream ;
4
3
import java .io .IOException ;
4
+ import java .io .InputStream ;
5
5
import java .nio .file .Path ;
6
- import java .util .ArrayList ;
6
+ import java .util .LinkedHashSet ;
7
7
import java .util .List ;
8
- import java .util .Optional ;
8
+ import java .util .SequencedCollection ;
9
9
10
10
import org .jabref .model .entry .BibEntry ;
11
11
import org .jabref .model .entry .LinkedFile ;
@@ -63,33 +63,26 @@ public List<BibEntry> readXmp(Path path, XmpPreferences xmpPreferences) throws I
63
63
}
64
64
65
65
public List <BibEntry > readXmp (Path path , PDDocument document , XmpPreferences xmpPreferences ) {
66
- List <BibEntry > result = new ArrayList <>();
66
+ SequencedCollection <BibEntry > result = new LinkedHashSet <>();
67
67
68
- List <XMPMetadata > xmpMetaList = getXmpMetadata (document );
68
+ // We add PDDocumentInformation in call cases
69
+ PDDocumentInformation documentInformation = document .getDocumentInformation ();
70
+ new DocumentInformationExtractor (documentInformation ).extractBibtexEntry ().ifPresent (result ::add );
69
71
72
+ List <XMPMetadata > xmpMetaList = getXmpMetadata (document );
70
73
if (!xmpMetaList .isEmpty ()) {
71
74
// Only support Dublin Core since JabRef 4.2
72
75
for (XMPMetadata xmpMeta : xmpMetaList ) {
73
76
DublinCoreSchema dcSchema = DublinCoreSchemaCustom .copyDublinCoreSchema (xmpMeta .getDublinCoreSchema ());
74
77
if (dcSchema != null ) {
75
78
DublinCoreExtractor dcExtractor = new DublinCoreExtractor (dcSchema , xmpPreferences , new BibEntry ());
76
- Optional <BibEntry > entry = dcExtractor .extractBibtexEntry ();
77
- entry .ifPresent (result ::add );
79
+ dcExtractor .extractBibtexEntry ().ifPresent (result ::add );
78
80
}
79
81
}
80
82
}
81
83
82
- if (result .isEmpty ()) {
83
- // If we did not find any XMP metadata, search for non XMP metadata
84
- PDDocumentInformation documentInformation = document .getDocumentInformation ();
85
- DocumentInformationExtractor diExtractor = new DocumentInformationExtractor (documentInformation );
86
- Optional <BibEntry > entry = diExtractor .extractBibtexEntry ();
87
- entry .ifPresent (result ::add );
88
- }
89
-
90
84
result .forEach (entry -> entry .addFile (new LinkedFile ("" , path .toAbsolutePath (), "PDF" )));
91
-
92
- return result ;
85
+ return result .stream ().toList ();
93
86
}
94
87
95
88
/**
@@ -108,34 +101,12 @@ private List<XMPMetadata> getXmpMetadata(PDDocument document) {
108
101
return List .of ();
109
102
}
110
103
111
- List <XMPMetadata > metaList = new ArrayList <>();
112
-
113
- String xmp = metaRaw .getCOSObject ().toTextString ();
114
-
115
- int startDescriptionSection = xmp .indexOf (START_TAG );
116
- int endDescriptionSection = xmp .lastIndexOf (END_TAG ) + END_TAG .length ();
117
-
118
- if ((startDescriptionSection < 0 ) || (startDescriptionSection > endDescriptionSection ) || (endDescriptionSection == (END_TAG .length () - 1 ))) {
119
- return metaList ;
120
- }
121
-
122
- // XML header for the xmpDomParser
123
- String start = xmp .substring (0 , startDescriptionSection );
124
- // descriptionArray - mid part of the textual metadata
125
- String [] descriptionsArray = xmp .substring (startDescriptionSection , endDescriptionSection ).split (END_TAG );
126
- // XML footer for the xmpDomParser
127
- String end = xmp .substring (endDescriptionSection );
128
-
129
- for (String s : descriptionsArray ) {
130
- // END_TAG is appended, because of the split operation above
131
- String xmpMetaString = start + s + END_TAG + end ;
132
- try {
133
- metaList .add (XmpUtilShared .parseXmpMetadata (new ByteArrayInputStream (xmpMetaString .getBytes ())));
134
- } catch (IOException ex ) {
135
- LOGGER .debug ("Problem parsing XMP schema. Continuing with other schemas." , ex );
136
- }
104
+ try (InputStream is = metaRaw .exportXMPMetadata ()) {
105
+ return List .of (XmpUtilShared .parseXmpMetadata (is ));
106
+ } catch (IOException e ) {
107
+ LOGGER .debug ("Problem parsing XMP metadata." , e );
108
+ return List .of ();
137
109
}
138
- return metaList ;
139
110
}
140
111
141
112
/**
0 commit comments