JabRef
diff --git a/‎CHANGELOG.md
+2 b/‎CHANGELOG.md
+2
diff --git a/‎docs/code-howtos/testing.md
+16 b/‎docs/code-howtos/testing.md
+16
diff --git a/‎docs/code-howtos/xmp-parsing.md
+45 b/‎docs/code-howtos/xmp-parsing.md
+45
diff --git a/‎src/main/java/org/jabref/logic/bibtex/comparator/BibEntryCompare.java
+66 b/‎src/main/java/org/jabref/logic/bibtex/comparator/BibEntryCompare.java
+66
diff --git a/‎src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java
+58-64 b/‎src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java
+58-64
diff --git a/‎src/main/java/org/jabref/logic/importer/fileformat/PicaXmlParser.java
+1-1 b/‎src/main/java/org/jabref/logic/importer/fileformat/PicaXmlParser.java
+1-1
diff --git a/‎src/main/java/org/jabref/logic/importer/fileformat/pdf/PdfXmpImporter.java
+3-1 b/‎src/main/java/org/jabref/logic/importer/fileformat/pdf/PdfXmpImporter.java
+3-1
@@ -25,6 +25,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
 - We added a feature for enabling drag-and-drop of files into groups  [#12540](https://github.com/JabRef/jabref/issues/12540)
 - We added support for reordering keywords via drag and drop, automatic alphabetical ordering, and improved pasting and editing functionalities in the keyword editor. [#10984](https://github.com/JabRef/jabref/issues/10984)
 - We added a new functionality where author names having multiple spaces in-between will be considered as separate user block as it does for " and ". [#12701](https://github.com/JabRef/jabref/issues/12701)
+- We enhanced support for parsing XMP metadata from PDF files. [#12829](https://github.com/JabRef/jabref/issues/12829)
 - We added a "Preview" header in the JStyles tab in the "Select style" dialog, to make it consistent with the CSL styles tab. [#12838](https://github.com/JabRef/jabref/pull/12838)
 
 ### Changed
@@ -86,6 +87,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
 - We fixed an issue where duplicate items cannot be removed correctly when merging groups or keywords. [#12585](https://github.com/JabRef/jabref/issues/12585)
 - We fixed an issue where JabRef displayed an incorrect deletion notification when canceling entry deletion [#12645](https://github.com/JabRef/jabref/issues/12645)
 - We fixed an issue where JabRef displayed an incorrect deletion notification when canceling entry deletion. [#12645](https://github.com/JabRef/jabref/issues/12645)
+- We fixed an issue where JabRref wrote wrong field names into the PDF. [#12833](https://github.com/JabRef/jabref/pulls/12833)
 - We fixed an issue where an exception would occur when running abbreviate journals for multiple entries. [#12634](https://github.com/JabRef/jabref/issues/12634)
 - We fixed an issue where JabRef displayed dropdown triangle in wrong place in "Search for unlinked local files" dialog [#12713](https://github.com/JabRef/jabref/issues/12713)
 - We fixed an issue where JabRef would not open if an invalid external journal abbreviation path was encountered. [#12776](https://github.com/JabRef/jabref/issues/12776)
 
@@ -150,6 +150,22 @@ Fetcher tests can be run locally by executing the Gradle task `fetcherTest`. Thi
 
 Alternatively, if one is using IntelliJ, this can also be done by double-clicking the `fetcherTest` task under the `other` group in the Gradle Tool window (`JabRef > Tasks > other > fetcherTest`).
 
+## "No matching tests found"
+
+In case the output is "No matching tests found", the wrong test category is used.
+
+Check "Run/Debug Configurations"
+
+Example
+
+```gradle
+:databaseTest --tests "org.jabref.logic.importer.fileformat.pdf.PdfMergeMetadataImporterTest.pdfMetadataExtractedFrom2024SPLCBecker"
+```
+
+This tells Gradle that `PdfMergeMetadataImporterTest` should be executed as database test.
+However, it is marked as `@FetcherTest`.
+Thus, change `:databaseTest` to `:fetcherTest` to get the test running.
+
 ## Advanced testing and further reading
 
 On top of basic unit testing, there are more ways to test a software:
 
@@ -0,0 +1,45 @@
+---
+parent: Code Howtos
+---
+# XMP Parsing
+
+Example XMP metadata from a PDF file (src/test/resources/org/jabref/logic/importer/fileformat/pdf/2024_SPLC_Becker.pdf):
+
+```xml
+<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/">
+  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+    <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
+      <dc:format>application/pdf</dc:format>
+      <dc:identifier>doi:10.1145/3646548.3672587</dc:identifier>
+    </rdf:Description>
+    <rdf:Description rdf:about="" xmlns:prism="http://prismstandard.org/namespaces/basic/2.1/">
+      <prism:doi>10.1145/3646548.3672587</prism:doi>
+      <prism:url>https://doi.org/10.1145/3646548.3672587</prism:url>
+    </rdf:Description>
+    <rdf:Description rdf:about="" xmlns:crossmark="http://crossref.org/crossmark/1.0/">
+      <crossmark:MajorVersionDate>2024-09-02</crossmark:MajorVersionDate>
+      <crossmark:CrossmarkDomainExclusive>true</crossmark:CrossmarkDomainExclusive>
+      <crossmark:CrossMarkDomains>
+        <rdf:Seq>
+          <rdf:li>dl.acm.org</rdf:li>
+        </rdf:Seq>
+      </crossmark:CrossMarkDomains>
+      <crossmark:DOI>10.1145/3646548.3672587</crossmark:DOI>
+    </rdf:Description>
+    <rdf:Description rdf:about="" xmlns:pdfx="http://ns.adobe.com/pdfx/1.3/">
+      <pdfx:CrossMarkDomains>
+        <rdf:Seq>
+          <rdf:li>dl.acm.org</rdf:li>
+        </rdf:Seq>
+      </pdfx:CrossMarkDomains>
+      <pdfx:CrossmarkDomainExclusive>true</pdfx:CrossmarkDomainExclusive>
+      <pdfx:doi>10.1145/3646548.3672587</pdfx:doi>
+      <pdfx:CrossmarkMajorVersionDate>2024-09-02</pdfx:CrossmarkMajorVersionDate>
+    </rdf:Description>
+  </rdf:RDF>
+</x:xmpmeta>
+<?xpacket end="w"?>
+```
+
+`org.apache.xmpbox.xml.DomXmpParser` cannot ignore unknown namespaces. Therefore, we need to exact the known elements.
@@ -0,0 +1,66 @@
+package org.jabref.logic.bibtex.comparator;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.SequencedSet;
+
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.Field;
+
+public class BibEntryCompare {
+    public enum Result { SUBSET, EQUAL, SUPERSET, DISJUNCT, DISJUNCT_OR_EQUAL_FIELDS, DIFFERENT }
+
+    /**
+     * @return first {Result} second, e.g., if first is a subset of second, then Result.SUBSET is returned.
+     */
+    public static Result compareEntries(BibEntry first, BibEntry second) {
+        if (first.equals(second)) {
+            return Result.EQUAL;
+        }
+
+        SequencedSet<Field> fieldsFirst = first.getFields();
+        SequencedSet<Field> secondFields = second.getFields();
+
+        if (fieldsFirst.containsAll(secondFields)) {
+            if (isSubSet(second, first)) {
+                return Result.SUPERSET;
+            }
+            return Result.DIFFERENT;
+        }
+
+        if (secondFields.containsAll(fieldsFirst)) {
+            if (isSubSet(first, second)) {
+                return Result.SUBSET;
+            }
+            return Result.DIFFERENT;
+        }
+
+        if (Collections.disjoint(fieldsFirst, secondFields)) {
+            return Result.DISJUNCT;
+        }
+
+        fieldsFirst.retainAll(secondFields);
+        if (isSubSet(first, second, fieldsFirst)) {
+            return Result.DISJUNCT_OR_EQUAL_FIELDS;
+        }
+
+        return Result.DIFFERENT;
+    }
+
+    private static boolean isSubSet(BibEntry candidateSubSet, BibEntry candidateSuperSet) {
+        return isSubSet(candidateSubSet, candidateSuperSet, candidateSubSet.getFields());
+    }
+
+    private static boolean isSubSet(BibEntry candidateSubSet, BibEntry candidateSuperSet, Collection<Field> fields) {
+        for (Field field: fields) {
+            String subValue = candidateSubSet.getField(field).get();
+            boolean isEqualValue = candidateSuperSet.getField(field)
+                    .filter(superValue -> superValue.equals(subValue))
+                    .isPresent();
+            if (!isEqualValue) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
@@ -3,15 +3,18 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Stream;
 
 import org.jabref.logic.FilePreferences;
+import org.jabref.logic.cleanup.RelativePathsCleanup;
 import org.jabref.logic.importer.EntryBasedFetcher;
 import org.jabref.logic.importer.FetcherException;
+import org.jabref.logic.importer.IdBasedFetcher;
 import org.jabref.logic.importer.ImportFormatPreferences;
 import org.jabref.logic.importer.ParseException;
 import org.jabref.logic.importer.ParserResult;
@@ -24,14 +27,12 @@
 import org.jabref.logic.importer.fileformat.pdf.PdfImporter;
 import org.jabref.logic.importer.fileformat.pdf.PdfVerbatimBibtexImporter;
 import org.jabref.logic.importer.fileformat.pdf.PdfXmpImporter;
-import org.jabref.logic.importer.util.FileFieldParser;
 import org.jabref.logic.l10n.Localization;
 import org.jabref.logic.util.StandardFileType;
 import org.jabref.logic.util.io.FileUtil;
 import org.jabref.model.database.BibDatabaseContext;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.LinkedFile;
-import org.jabref.model.entry.field.Field;
 import org.jabref.model.entry.field.StandardField;
 
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -48,12 +49,13 @@ public class PdfMergeMetadataImporter extends PdfImporter {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(PdfMergeMetadataImporter.class);
 
-    private final ImportFormatPreferences importFormatPreferences;
     private final List<PdfImporter> metadataImporters;
 
-    public PdfMergeMetadataImporter(ImportFormatPreferences importFormatPreferences) {
-        this.importFormatPreferences = importFormatPreferences;
+    private final DoiFetcher doiFetcher;
+    private final ArXivFetcher arXivFetcher;
+    private final IsbnFetcher isbnFetcher;
 
+    public PdfMergeMetadataImporter(ImportFormatPreferences importFormatPreferences) {
         // TODO: Evaluate priorities of these {@link PdfBibExtractor}s.
         this.metadataImporters = new ArrayList<>(List.of(
                 new PdfVerbatimBibtexImporter(importFormatPreferences),
@@ -65,6 +67,12 @@ public PdfMergeMetadataImporter(ImportFormatPreferences importFormatPreferences)
         if (importFormatPreferences.grobidPreferences().isGrobidEnabled()) {
             this.metadataImporters.add(2, new PdfGrobidImporter(importFormatPreferences));
         }
+        doiFetcher = new DoiFetcher(importFormatPreferences);
+        arXivFetcher = new ArXivFetcher(importFormatPreferences);
+
+        isbnFetcher = new IsbnFetcher(importFormatPreferences);
+        // .addRetryFetcher(new EbookDeIsbnFetcher(importFormatPreferences))
+        // .addRetryFetcher(new DoiToBibtexConverterComIsbnFetcher(importFormatPreferences))
     }
 
     /**
@@ -99,6 +107,7 @@ private List<BibEntry> extractCandidatesFromPdf(Path filePath, PDDocument docume
         for (PdfImporter metadataImporter : metadataImporters) {
             try {
                 List<BibEntry> extractedEntries = metadataImporter.importDatabase(filePath, document);
+                LOGGER.debug("Importer {} extracted {}", metadataImporter.getName(), extractedEntries);
                 candidates.addAll(extractedEntries);
             } catch (Exception e) {
                 LOGGER.error("Got an exception while importing PDF file", e);
@@ -111,43 +120,17 @@ private List<BibEntry> extractCandidatesFromPdf(Path filePath, PDDocument docume
     private List<BibEntry> fetchIdsOfCandidates(List<BibEntry> candidates) {
         List<BibEntry> fetchedCandidates = new ArrayList<>();
 
+        // Collects Ids already looked for - to avoid multiple calls for one id
+        final Set<String> fetchedIds = new HashSet<>();
+
         for (BibEntry candidate : candidates) {
-            Optional<String> doi = candidate.getField(StandardField.DOI);
-            if (doi.isPresent()) {
-                try {
-                    new DoiFetcher(importFormatPreferences)
-                            .performSearchById(doi.get())
-                            .ifPresent(fetchedCandidates::add);
-                } catch (FetcherException e) {
-                    LOGGER.error("Fetching failed for DOI \"{}\".", doi, e);
-                }
-            }
+            fetchData(candidate, StandardField.DOI, doiFetcher, fetchedIds, fetchedCandidates);
 
-            Optional<String> eprint = candidate.getField(StandardField.EPRINT);
-            if (eprint.isPresent()) {
-                // This code assumes that `eprint` field refers to an arXiv preprint, which is not correct.
-                // One should also check if `archivePrefix` is equal to `arXiv`, and handle other cases too.
-                try {
-                    new ArXivFetcher(importFormatPreferences)
-                            .performSearchById(eprint.get())
-                            .ifPresent(fetchedCandidates::add);
-                } catch (FetcherException e) {
-                    LOGGER.error("Fetching failed for arXiv ID \"{}\".", eprint.get(), e);
-                }
-            }
+            // This code assumes that `eprint` field refers to an arXiv preprint, which is not correct.
+            // One should also check if `archivePrefix` is equal to `arXiv`, and handle other cases too.
+            fetchData(candidate, StandardField.EPRINT, arXivFetcher, fetchedIds, fetchedCandidates);
 
-            Optional<String> isbn = candidate.getField(StandardField.ISBN);
-            if (isbn.isPresent()) {
-                try {
-                    new IsbnFetcher(importFormatPreferences)
-                            // .addRetryFetcher(new EbookDeIsbnFetcher(importFormatPreferences))
-                            // .addRetryFetcher(new DoiToBibtexConverterComIsbnFetcher(importFormatPreferences))
-                            .performSearchById(isbn.get())
-                            .ifPresent(fetchedCandidates::add);
-                } catch (FetcherException e) {
-                    LOGGER.error("Fetching failed for ISBN \"{}\".", isbn.get(), e);
-                }
-            }
+            fetchData(candidate, StandardField.ISBN, isbnFetcher, fetchedIds, fetchedCandidates);
 
             // TODO: Handle URLs too.
             // However, it may have problems if URL refers to the same identifier in DOI, ISBN, or arXiv.
@@ -156,41 +139,52 @@ private List<BibEntry> fetchIdsOfCandidates(List<BibEntry> candidates) {
         return fetchedCandidates;
     }
 
-    private static BibEntry mergeCandidates(Stream<BibEntry> candidates) {
-        BibEntry entry = new BibEntry();
-
-        // Functional style is used here instead of imperative like in `extractCandidatesFromPdf` or `fetchIdsOfCandidates`,
-        // because they have checked exceptions.
+    /**
+     * @param candidate         The BibEntry to look for the field
+     * @param field             The field to look for
+     * @param fetcher           The fetcher to use
+     * @param fetchedIds        The already fetched ids (will be updated)
+     * @param fetchedCandidates New candidate (will be updated)
+     */
+    private void fetchData(BibEntry candidate, StandardField field, IdBasedFetcher fetcher, Set<String> fetchedIds, List<BibEntry> fetchedCandidates) {
+        candidate.getField(field)
+                 .filter(id -> !fetchedIds.contains(id))
+                 .ifPresent(id -> {
+                     fetchedIds.add(id);
+                     try {
+                         fetcher.performSearchById(id)
+                                .ifPresent(fetchedCandidates::add);
+                     } catch (FetcherException e) {
+                         LOGGER.error("Fetching failed for id \"{}\".", id, e);
+                     }
+                 });
+    }
 
-        candidates.forEach(candidate -> {
-            if (BibEntry.DEFAULT_TYPE.equals(entry.getType())) {
-                entry.setType(candidate.getType());
-            }
+    private static BibEntry mergeCandidates(Stream<BibEntry> candidates) {
+        final BibEntry entry = new BibEntry();
+        candidates.forEach(entry::mergeWith);
 
-            Set<Field> presentFields = entry.getFields();
-
-            candidate
-                    .getFieldMap()
-                    .entrySet()
-                    .stream()
-                    // Don't merge FILE fields that point to a stored file as we set that to filePath anyway.
-                    // Nevertheless, retain online links.
-                    .filter(fieldEntry ->
-                            !(StandardField.FILE == fieldEntry.getKey()
-                                    && FileFieldParser.parse(fieldEntry.getValue()).stream().noneMatch(LinkedFile::isOnlineLink)))
-                    // Only overwrite non-present fields
-                    .filter(fieldEntry -> !presentFields.contains(fieldEntry.getKey()))
-                    .forEach(fieldEntry -> entry.setField(fieldEntry.getKey(), fieldEntry.getValue()));
-        });
+        // Retain online links only
+        List<LinkedFile> onlineLinks = entry.getFiles().stream().filter(LinkedFile::isOnlineLink).toList();
+        entry.clearField(StandardField.FILE);
+        entry.addFiles(onlineLinks);
 
         return entry;
     }
 
+    /**
+     * Imports the BibTeX data from the given PDF file and relativized the paths of each linked file based on the context and the file preferences.
+     */
     public ParserResult importDatabase(Path filePath, BibDatabaseContext context, FilePreferences filePreferences) throws IOException {
         Objects.requireNonNull(context);
         Objects.requireNonNull(filePreferences);
 
-        return importDatabase(filePath);
+        ParserResult parserResult = importDatabase(filePath);
+
+        RelativePathsCleanup relativePathsCleanup = new RelativePathsCleanup(context, filePreferences);
+        parserResult.getDatabase().getEntries().forEach(entry -> relativePathsCleanup.cleanup(entry));
+
+        return parserResult;
     }
 
     @Override
 
@@ -98,7 +98,7 @@ private BibEntry parseEntry(Element e) {
         List<Element> datafields = getChildren("datafield", e);
         for (Element datafield : datafields) {
             String tag = datafield.getAttribute("tag");
-            LOGGER.debug("tag: {}", tag);
+            LOGGER.trace("tag: {}", tag);
 
             // genre/type of the entry https://swbtools.bsz-bw.de/cgi-bin/k10plushelp.pl?cmd=kat&val=0500&katalog=Standard
             if ("002@".equals(tag)) {
 
@@ -17,13 +17,15 @@
 public class PdfXmpImporter extends PdfImporter {
 
     private final XmpPreferences xmpPreferences;
+    private final XmpUtilReader xmpUtilReader;
 
     public PdfXmpImporter(XmpPreferences xmpPreferences) {
         this.xmpPreferences = xmpPreferences;
+        xmpUtilReader = new XmpUtilReader();
     }
 
     public List<BibEntry> importDatabase(Path filePath, PDDocument document) throws IOException {
-        return new XmpUtilReader().readXmp(filePath, xmpPreferences);
+        return xmpUtilReader.readXmp(filePath, document, xmpPreferences);
     }
 
     @Override
Original file line number	Diff line number	Diff line change
`@@ -17,13 +17,15 @@`
`17`	`17`	`public class PdfXmpImporter extends PdfImporter {`
`18`	`18`
`19`	`19`	`private final XmpPreferences xmpPreferences;`
	`20`	`+ private final XmpUtilReader xmpUtilReader;`
`20`	`21`
`21`	`22`	`public PdfXmpImporter(XmpPreferences xmpPreferences) {`
`22`	`23`	`this.xmpPreferences = xmpPreferences;`
	`24`	`+ xmpUtilReader = new XmpUtilReader();`
`23`	`25`	`}`
`24`	`26`
`25`	`27`	`public List<BibEntry> importDatabase(Path filePath, PDDocument document) throws IOException {`
`26`		`- return new XmpUtilReader().readXmp(filePath, xmpPreferences);`
	`28`	`+ return xmpUtilReader.readXmp(filePath, document, xmpPreferences);`
`27`	`29`	`}`
`28`	`30`
`29`	`31`	`@Override`