Skip to content

Commit 99183e1

Browse files
Add APS Fetcher (refactored) (JabRef#6143)
* Add APS fetcher * Fix case sensitivity bug * Refactor ApsFetcher * Add note about APS fetcher * Refactor findFulltext() * Refactor getId() * Parameterize ApsFetcherTest * Add link to APS changelog entry * Refactor APS Fetcher * make separate tests Co-authored-by: August Janse <[email protected]>
1 parent edec608 commit 99183e1

File tree

4 files changed

+145
-0
lines changed

4 files changed

+145
-0
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
4040
- Filenames of external files can no longer contain curly braces. [#5926](https://github.com/JabRef/jabref/pull/5926)
4141
- We made the filters more easily accessible in the integrity check dialog. [#5955](https://github.com/JabRef/jabref/pull/5955)
4242
- We reimplemented and improved the dialog "Customize entry types". [#4719](https://github.com/JabRef/jabref/issues/4719)
43+
- We reimplemented and improved the dialog "Customize entry types" [#4719](https://github.com/JabRef/jabref/issues/4719)
44+
- We added an [American Physical Society](https://journals.aps.org/) fetcher. [#818](https://github.com/JabRef/jabref/issues/818)
4345

4446
### Fixed
4547

src/main/java/org/jabref/logic/importer/WebFetchers.java

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.util.TreeSet;
99

1010
import org.jabref.logic.importer.fetcher.ACS;
11+
import org.jabref.logic.importer.fetcher.ApsFetcher;
1112
import org.jabref.logic.importer.fetcher.ArXiv;
1213
import org.jabref.logic.importer.fetcher.AstrophysicsDataSystem;
1314
import org.jabref.logic.importer.fetcher.CiteSeer;
@@ -159,6 +160,7 @@ public static Set<FulltextFetcher> getFullTextFetchers(ImportFormatPreferences i
159160
fetchers.add(new ACS());
160161
fetchers.add(new ArXiv(importFormatPreferences));
161162
fetchers.add(new IEEE(importFormatPreferences));
163+
fetchers.add(new ApsFetcher());
162164
// Meta search
163165
fetchers.add(new GoogleScholar(importFormatPreferences));
164166
fetchers.add(new OpenAccessDoi());
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
package org.jabref.logic.importer.fetcher;
2+
3+
import java.io.IOException;
4+
import java.net.MalformedURLException;
5+
import java.net.URL;
6+
import java.net.URLConnection;
7+
import java.util.Objects;
8+
import java.util.Optional;
9+
10+
import org.jabref.logic.importer.FulltextFetcher;
11+
import org.jabref.model.entry.BibEntry;
12+
import org.jabref.model.entry.field.StandardField;
13+
import org.jabref.model.entry.identifier.DOI;
14+
15+
import kong.unirest.Unirest;
16+
import org.slf4j.Logger;
17+
import org.slf4j.LoggerFactory;
18+
19+
/**
20+
* FulltextFetcher implementation that attempts to find a PDF URL at APS. Also see the <a
21+
* href="https://harvest.aps.org/docs/harvest-api">API</a>, although it isn't currently used.
22+
*/
23+
public class ApsFetcher implements FulltextFetcher {
24+
25+
private static final Logger LOGGER = LoggerFactory.getLogger(ApsFetcher.class);
26+
27+
// The actual API needs either an API key or a header. This is a workaround.
28+
private static final String DOI_URL = "https://www.doi.org/";
29+
private static final String PDF_URL = "https://journals.aps.org/prl/pdf/";
30+
31+
@Override
32+
public Optional<URL> findFullText(BibEntry entry) throws IOException {
33+
Objects.requireNonNull(entry);
34+
35+
Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);
36+
37+
if (!doi.isPresent()) {
38+
return Optional.empty();
39+
}
40+
41+
Optional<String> id = getId(doi.get().getDOI());
42+
43+
if (id.isPresent()) {
44+
45+
String pdfRequestUrl = PDF_URL + id.get();
46+
int code = Unirest.head(pdfRequestUrl).asJson().getStatus();
47+
48+
if (code == 200) {
49+
LOGGER.info("Fulltext PDF found @ APS.");
50+
try {
51+
return Optional.of(new URL(pdfRequestUrl));
52+
} catch (MalformedURLException e) {
53+
LOGGER.warn("APS returned malformed URL, cannot find PDF.");
54+
}
55+
}
56+
}
57+
return Optional.empty();
58+
}
59+
60+
@Override
61+
public TrustLevel getTrustLevel() {
62+
return TrustLevel.PUBLISHER;
63+
}
64+
65+
/**
66+
* Convert a DOI into an appropriate APS id.
67+
*
68+
* @param doi A case insensitive DOI
69+
* @return A DOI cased as APS likes it
70+
*/
71+
private Optional<String> getId(String doi) {
72+
// DOI is not case sensitive, but the id for the PDF URL is,
73+
// so we follow DOI.org redirects to get the proper id.
74+
// https://stackoverflow.com/a/5270162/1729441
75+
76+
String doiRequest = DOI_URL + doi;
77+
78+
URLConnection con;
79+
try {
80+
con = new URL(doiRequest).openConnection();
81+
con.connect();
82+
con.getInputStream();
83+
String[] urlParts = con.getURL().toString().split("abstract/");
84+
if (urlParts.length == 2) {
85+
return Optional.of(urlParts[1]);
86+
}
87+
88+
} catch (IOException e) {
89+
LOGGER.warn("Error connecting to APS", e);
90+
}
91+
return Optional.empty();
92+
}
93+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package org.jabref.logic.importer.fetcher;
2+
3+
import java.net.URL;
4+
import java.util.Optional;
5+
6+
import org.jabref.model.entry.BibEntry;
7+
import org.jabref.model.entry.field.StandardField;
8+
import org.jabref.testutils.category.FetcherTest;
9+
10+
import org.junit.jupiter.api.BeforeEach;
11+
import org.junit.jupiter.api.Test;
12+
13+
import static org.junit.jupiter.api.Assertions.assertEquals;
14+
15+
@FetcherTest
16+
class ApsFetcherTest {
17+
18+
private ApsFetcher finder;
19+
20+
@BeforeEach
21+
void setUp() {
22+
finder = new ApsFetcher();
23+
}
24+
25+
@Test
26+
void findFullTextFromDoi() throws Exception {
27+
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.116.061102");
28+
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.116.061102")), finder.findFullText(entry));
29+
}
30+
31+
@Test
32+
void findFullTextFromLowercaseDoi() throws Exception {
33+
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/physrevlett.124.029002");
34+
assertEquals(Optional.of(new URL("https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.124.029002")), finder.findFullText(entry));
35+
}
36+
37+
@Test
38+
void notFindFullTextForUnauthorized() throws Exception {
39+
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1103/PhysRevLett.89.127401");
40+
assertEquals(Optional.empty(), finder.findFullText(entry));
41+
}
42+
43+
@Test
44+
void notFindFullTextForUnknownEntry() throws Exception {
45+
BibEntry entry = new BibEntry().withField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002");
46+
assertEquals(Optional.empty(), finder.findFullText(entry));
47+
}
48+
}

0 commit comments

Comments
 (0)