Add APS Fetcher (refactored) (JabRef#6143)

Siedlerchr · augustjanse · web-flow · commit 99183e1680fc · 2020-03-18T20:28:36.000+01:00
* Add APS fetcher

* Fix case sensitivity bug

* Refactor ApsFetcher

* Add note about APS fetcher

* Refactor findFulltext()

* Refactor getId()

* Parameterize ApsFetcherTest

* Add link to APS changelog entry

* Refactor APS Fetcher

* make separate tests

Co-authored-by: August Janse &lt;augustj@kth.se&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -40,6 +40,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 - Filenames of external files can no longer contain curly braces. [#5926](https://github.com/JabRef/jabref/pull/5926)
 - We made the filters more easily accessible in the integrity check dialog. [#5955](https://github.com/JabRef/jabref/pull/5955)
 - We reimplemented and improved the dialog "Customize entry types". [#4719](https://github.com/JabRef/jabref/issues/4719)
+- We reimplemented and improved the dialog "Customize entry types" [#4719](https://github.com/JabRef/jabref/issues/4719)
+- We added an [American Physical Society](https://journals.aps.org/) fetcher. [#818](https://github.com/JabRef/jabref/issues/818)
 
 ### Fixed
 
diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java
@@ -8,6 +8,7 @@
 import java.util.TreeSet;
 
 import org.jabref.logic.importer.fetcher.ACS;
+import org.jabref.logic.importer.fetcher.ApsFetcher;
 import org.jabref.logic.importer.fetcher.ArXiv;
 import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem;
 import org.jabref.logic.importer.fetcher.CiteSeer;
@@ -159,6 +160,7 @@ public static Set<FulltextFetcher> getFullTextFetchers(ImportFormatPreferences i
         fetchers.add(new ACS());
         fetchers.add(new ArXiv(importFormatPreferences));
         fetchers.add(new IEEE(importFormatPreferences));
+        fetchers.add(new ApsFetcher());
         // Meta search
         fetchers.add(new GoogleScholar(importFormatPreferences));
         fetchers.add(new OpenAccessDoi());
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/ApsFetcher.java
@@ -0,0 +1,93 @@
+package org.jabref.logic.importer.fetcher;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Objects;
+import java.util.Optional;
+
+import org.jabref.logic.importer.FulltextFetcher;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.entry.identifier.DOI;
+
+import kong.unirest.Unirest;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * FulltextFetcher implementation that attempts to find a PDF URL at APS. Also see the <a
+ * href="https://harvest.aps.org/docs/harvest-api">API</a>, although it isn't currently used.
+ */
+public class ApsFetcher implements FulltextFetcher {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(ApsFetcher.class);
+
+    // The actual API needs either an API key or a header. This is a workaround.
+    private static final String DOI_URL = "https://www.doi.org/";
+    private static final String PDF_URL = "https://journals.aps.org/prl/pdf/";
+
+    @Override
+    public Optional<URL> findFullText(BibEntry entry) throws IOException {
+        Objects.requireNonNull(entry);
+
+        Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);
+
+        if (!doi.isPresent()) {
+            return Optional.empty();
+        }
+
+        Optional<String> id = getId(doi.get().getDOI());
+
+        if (id.isPresent()) {
+
+            String pdfRequestUrl = PDF_URL + id.get();
+            int code = Unirest.head(pdfRequestUrl).asJson().getStatus();
+
+            if (code == 200) {
+                LOGGER.info("Fulltext PDF found @ APS.");
+                try {
+                    return Optional.of(new URL(pdfRequestUrl));
+                } catch (MalformedURLException e) {
+                    LOGGER.warn("APS returned malformed URL, cannot find PDF.");
+                }
+            }
+        }
+        return Optional.empty();
+    }
+
+    @Override
+    public TrustLevel getTrustLevel() {
+        return TrustLevel.PUBLISHER;
+    }
+
+    /**
+     * Convert a DOI into an appropriate APS id.
+     *
+     * @param doi A case insensitive DOI
+     * @return A DOI cased as APS likes it
+     */
+    private Optional<String> getId(String doi) {
+        // DOI is not case sensitive, but the id for the PDF URL is,
+        // so we follow DOI.org redirects to get the proper id.
+        // https://stackoverflow.com/a/5270162/1729441
+
+        String doiRequest = DOI_URL + doi;
+
+        URLConnection con;
+        try {
+            con = new URL(doiRequest).openConnection();
+            con.connect();
+            con.getInputStream();
+            String[] urlParts = con.getURL().toString().split("abstract/");
+            if (urlParts.length == 2) {
+                return Optional.of(urlParts[1]);
+            }
+
+        } catch (IOException e) {
+            LOGGER.warn("Error connecting to APS", e);
+        }
+        return Optional.empty();
+    }
+}
diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ApsFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ApsFetcherTest.java
@@ -0,0 +1,48 @@
+package org.jabref.logic.importer.fetcher;
+
+import java.net.URL;
+import java.util.Optional;
+
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.StandardField;
+import org.jabref.testutils.category.FetcherTest;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+@FetcherTest
+class ApsFetcherTest {
+
+    private ApsFetcher finder;
+
+    @BeforeEach
+    void setUp() {
+        finder = new ApsFetcher();
+    }
+
+    @Test
+    void findFullTextFromDoi() throws Exception {
+        BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.116.061102");
+        assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.116.061102")), finder.findFullText(entry));
+    }
+
+    @Test
+    void findFullTextFromLowercaseDoi() throws Exception {
+        BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/physrevlett.124.029002");
+        assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.124.029002")), finder.findFullText(entry));
+    }
+
+    @Test
+    void notFindFullTextForUnauthorized() throws Exception {
+        BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.89.127401");
+        assertEquals(Optional.empty(), finder.findFullText(entry));
+    }
+
+    @Test
+    void notFindFullTextForUnknownEntry() throws Exception {
+        BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002");
+        assertEquals(Optional.empty(), finder.findFullText(entry));
+    }
+}