Skip to content

Commit 5cef645

Browse files
committed
Merge branch 'develop' into 8740-file-recognition-based-on-filename #8740
2 parents 734f467 + 86f69bd commit 5cef645

File tree

6 files changed

+90
-8
lines changed

6 files changed

+90
-8
lines changed

src/main/java/edu/harvard/iq/dataverse/Dataverse.java

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
66
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch;
77
import edu.harvard.iq.dataverse.util.BundleUtil;
8+
import edu.harvard.iq.dataverse.util.SystemConfig;
9+
810
import java.util.ArrayList;
911
import java.util.HashSet;
1012
import java.util.Iterator;
@@ -765,4 +767,8 @@ public boolean isAncestorOf( DvObject other ) {
765767
}
766768
return false;
767769
}
770+
771+
public String getLocalURL() {
772+
return SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + this.getAlias();
773+
}
768774
}

src/main/java/edu/harvard/iq/dataverse/GlobalId.java

+23
Original file line numberDiff line numberDiff line change
@@ -254,4 +254,27 @@ public static boolean verifyImportCharacters(String pidParam) {
254254

255255
return m.matches();
256256
}
257+
258+
/**
259+
* Convenience method to get the internal form of a PID string when it may be in
260+
* the https:// or http:// form ToDo -refactor class to allow creating a
261+
* GlobalID from any form (which assures it has valid syntax) and then have methods to get
262+
* the form you want.
263+
*
264+
* @param pidUrlString - a string assumed to be a valid PID in some form
265+
* @return the internal form as a String
266+
*/
267+
public static String getInternalFormOfPID(String pidUrlString) {
268+
String pidString = pidUrlString;
269+
if(pidUrlString.startsWith(GlobalId.DOI_RESOLVER_URL)) {
270+
pidString = pidUrlString.replace(GlobalId.DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":"));
271+
} else if(pidUrlString.startsWith(GlobalId.HDL_RESOLVER_URL)) {
272+
pidString = pidUrlString.replace(GlobalId.HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":"));
273+
} else if(pidUrlString.startsWith(GlobalId.HTTP_DOI_RESOLVER_URL)) {
274+
pidString = pidUrlString.replace(GlobalId.HTTP_DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":"));
275+
} else if(pidUrlString.startsWith(GlobalId.HTTP_HDL_RESOLVER_URL)) {
276+
pidString = pidUrlString.replace(GlobalId.HTTP_HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":"));
277+
}
278+
return pidString;
279+
}
257280
}

src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272

7373
import edu.harvard.iq.dataverse.DataFile;
7474
import edu.harvard.iq.dataverse.DataFile.ChecksumType;
75+
import edu.harvard.iq.dataverse.GlobalId;
7576
import edu.harvard.iq.dataverse.util.json.JsonLDTerm;
7677

7778
public class BagGenerator {
@@ -204,7 +205,9 @@ public boolean generateBag(OutputStream outputStream) throws Exception {
204205
// The oremapObject is javax.json.JsonObject and we need com.google.gson.JsonObject for the aggregation object
205206
aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString());
206207

207-
bagID = aggregation.get("@id").getAsString() + "v."
208+
String pidUrlString = aggregation.get("@id").getAsString();
209+
String pidString=GlobalId.getInternalFormOfPID(pidUrlString);
210+
bagID = pidString + "v."
208211
+ aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString();
209212

210213
logger.info("Generating Bag: " + bagID);

src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java

+16-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
99
import edu.harvard.iq.dataverse.DatasetFieldType;
1010
import edu.harvard.iq.dataverse.DatasetVersion;
11+
import edu.harvard.iq.dataverse.Dataverse;
1112
import edu.harvard.iq.dataverse.DvObjectContainer;
1213
import edu.harvard.iq.dataverse.FileMetadata;
1314
import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
@@ -86,7 +87,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except
8687
localContext.putIfAbsent(JsonLDNamespace.schema.getPrefix(), JsonLDNamespace.schema.getUrl());
8788

8889
Dataset dataset = version.getDataset();
89-
String id = dataset.getGlobalId().asString();
90+
String id = dataset.getGlobalId().toURL().toExternalForm();
9091
JsonArrayBuilder fileArray = Json.createArrayBuilder();
9192
// The map describes an aggregation
9293
JsonObjectBuilder aggBuilder = Json.createObjectBuilder();
@@ -214,7 +215,9 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except
214215
}
215216

216217
aggBuilder.add(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel(),
217-
BrandingUtil.getRootDataverseCollectionName());
218+
BrandingUtil.getInstallationBrandName());
219+
220+
aggBuilder.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(dataset.getOwner()));
218221
String mdl = dataset.getMetadataLanguage();
219222
if(!mdl.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) {
220223
aggBuilder.add(JsonLDTerm.schemaOrg("inLanguage").getLabel(), mdl);
@@ -320,6 +323,17 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except
320323
}
321324
}
322325

326+
private JsonObjectBuilder getDataverseDescription(Dataverse dv) {
327+
//Schema.org is already in local context, no updates needed as long as we only use chemaOrg and "@id" here
328+
JsonObjectBuilder dvjob = Json.createObjectBuilder().add(JsonLDTerm.schemaOrg("name").getLabel(), dv.getCurrentName()).add("@id", dv.getLocalURL());
329+
addIfNotNull(dvjob, JsonLDTerm.schemaOrg("description"), dv.getDescription());
330+
Dataverse owner = dv.getOwner();
331+
if(owner!=null) {
332+
dvjob.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(owner));
333+
}
334+
return dvjob;
335+
}
336+
323337
/*
324338
* Simple methods to only add an entry to JSON if the value of the term is
325339
* non-null. Methods created for string, JsonValue, boolean, and long

src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public class BagItFileHandlerPostProcessor {
1515

1616
private static final Logger logger = Logger.getLogger(BagItFileHandlerPostProcessor.class.getCanonicalName());
1717

18-
public static final List<String> FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store", "._.DS_Store");
18+
public static final List<String> FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store");
1919

2020
public List<DataFile> process(List<DataFile> items) {
2121
if(items == null) {
@@ -26,7 +26,11 @@ public List<DataFile> process(List<DataFile> items) {
2626

2727
for(DataFile item: items) {
2828
String fileName = item.getCurrentName();
29-
if(FILES_TO_IGNORE.contains(fileName)) {
29+
if(fileName == null || fileName.isEmpty()) {
30+
continue;
31+
}
32+
33+
if(FILES_TO_IGNORE.stream().anyMatch(prefix -> fileName.startsWith(prefix))) {
3034
logger.fine(String.format("action=BagItFileHandlerPostProcessor result=ignore-entry file=%s", fileName));
3135
continue;
3236
}

src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java

+35-3
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,43 @@ public void should_return_null_when_datafiles_are_null() throws Exception {
2929
@Test
3030
public void should_ignore_mac_control_files() throws Exception {
3131
String bagEntry = UUID.randomUUID().toString();
32-
String macFile01 = "__";
33-
String macFile02 = "._";
3432
String macFile03 = ".DS_Store";
3533
String macFile04 = "._.DS_Store";
36-
List<DataFile> dataFiles = createDataFiles(bagEntry, macFile01, macFile02, macFile03, macFile04);
34+
List<DataFile> dataFiles = createDataFiles(bagEntry, macFile03, macFile04);
35+
36+
List<DataFile> result = target.process(dataFiles);
37+
MatcherAssert.assertThat(result.size(), Matchers.is(1));
38+
MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry));
39+
}
40+
41+
@Test
42+
public void should_ignore_empty_files() throws Exception {
43+
String bagEntry = UUID.randomUUID().toString();
44+
String fileToIgnore = "";
45+
List<DataFile> dataFiles = createDataFiles(bagEntry, fileToIgnore);
46+
47+
List<DataFile> result = target.process(dataFiles);
48+
MatcherAssert.assertThat(result.size(), Matchers.is(1));
49+
MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry));
50+
}
51+
52+
@Test
53+
public void should_ignore_files_that_start_with_dot_underscore() throws Exception {
54+
String bagEntry = UUID.randomUUID().toString();
55+
String fileToIgnore = "._FileNameToIgnore";
56+
List<DataFile> dataFiles = createDataFiles(bagEntry, fileToIgnore);
57+
58+
List<DataFile> result = target.process(dataFiles);
59+
MatcherAssert.assertThat(result.size(), Matchers.is(1));
60+
MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry));
61+
}
62+
63+
@Test
64+
public void should_ignore_files_that_start_with_double_underscore() throws Exception {
65+
String bagEntry = UUID.randomUUID().toString();
66+
String fileToIgnore = "__FileNameToIgnore";
67+
String validFile = "validName";
68+
List<DataFile> dataFiles = createDataFiles(bagEntry, fileToIgnore);
3769

3870
List<DataFile> result = target.process(dataFiles);
3971
MatcherAssert.assertThat(result.size(), Matchers.is(1));

0 commit comments

Comments
 (0)