Skip to content

Commit 2b83f22

Browse files
committed
make dataset type searchable and facetable #10517
1 parent 25b2ea5 commit 2b83f22

File tree

8 files changed

+74
-6
lines changed

8 files changed

+74
-6
lines changed

doc/sphinx-guides/source/_static/api/dataset-create-software-ddi.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
<collDate cycle="P1" event="end" date="1006-01-01">1006-01-01</collDate>
8181
<collDate cycle="P2" event="start" date="1006-02-01">1006-02-01</collDate>
8282
<collDate cycle="P2" event="end" date="1006-02-02">1006-02-02</collDate>
83-
<dataKind>software</dataKind>
83+
<dataKind>workflow</dataKind>
8484
<nation>Afghanistan</nation>
8585
<geogCover>GeographicCoverageCity1</geogCover>
8686
<geogCover>GeographicCoverageStateProvince1</geogCover>

doc/sphinx-guides/source/user/dataset-types.rst

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ Intro
1111

1212
Datasets can have a dataset type such as "dataset", "software", or "workflow".
1313

14+
When browsing or searching, these types appear under a facet called "Dataset Type".
15+
1416
Enabling Dataset Types
1517
======================
1618

src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
99
import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest;
1010
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
11+
import edu.harvard.iq.dataverse.dataset.DatasetType;
1112
import edu.harvard.iq.dataverse.datavariable.DataVariable;
1213
import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
1314
import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil;
@@ -1000,6 +1001,13 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
10001001
solrInputDocument.addField(SearchFields.METADATA_SOURCE, rdvName); //rootDataverseName);
10011002
}
10021003

1004+
if (FeatureFlags.DATASET_TYPES.enabled()) {
1005+
DatasetType datasetType = dataset.getDatasetType();
1006+
if (datasetType != null) {
1007+
solrInputDocument.addField(SearchFields.DATASET_TYPE, datasetType.getBaseType().toString());
1008+
}
1009+
}
1010+
10031011
DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
10041012
String parentDatasetTitle = "TBD";
10051013
if (datasetVersion != null) {

src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java

+4
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,10 @@ more targeted results for just datasets. The format is YYYY (i.e.
264264
public static final String DATASET_PUBLICATION_DATE = "dsPublicationDate";
265265
public static final String DATASET_PERSISTENT_ID = "dsPersistentId";
266266
public static final String DATASET_VERSION_ID = "datasetVersionId";
267+
/**
268+
* Datasets can be software, workflow, etc. See the DatasetType object.
269+
*/
270+
public static final String DATASET_TYPE = "datasetType_s";
267271

268272
public static final String VARIABLE_NAME = "variableName";
269273
public static final String VARIABLE_LABEL = "variableLabel";

src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java

+7
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,9 @@ public SolrQueryResponse search(
224224
// Facets to Retrieve
225225
// -----------------------------------
226226
solrQuery.addFacetField(SearchFields.METADATA_TYPES);
227+
if (FeatureFlags.DATASET_TYPES.enabled()) {
228+
solrQuery.addFacetField(SearchFields.DATASET_TYPE);
229+
}
227230
solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
228231
solrQuery.addFacetField(SearchFields.METADATA_SOURCE);
229232
solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR);
@@ -484,6 +487,7 @@ public SolrQueryResponse search(
484487
String identifier = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER);
485488
String citation = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION);
486489
String citationPlainHtml = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION_HTML);
490+
String datasetType = (String) solrDocument.getFieldValue(SearchFields.DATASET_TYPE);
487491
String persistentUrl = (String) solrDocument.getFieldValue(SearchFields.PERSISTENT_URL);
488492
String name = (String) solrDocument.getFieldValue(SearchFields.NAME);
489493
String nameSort = (String) solrDocument.getFieldValue(SearchFields.NAME_SORT);
@@ -641,6 +645,9 @@ public SolrQueryResponse search(
641645
if (authors != null) {
642646
solrSearchResult.setDatasetAuthors(authors);
643647
}
648+
if (datasetType != null) {
649+
solrSearchResult.setDatasetType(datasetType);
650+
}
644651
} else if (type.equals("files")) {
645652
String parentGlobalId = null;
646653
Object parentGlobalIdObject = solrDocument.getFieldValue(SearchFields.PARENT_IDENTIFIER);

src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java

+10
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
2727

2828
public class SolrSearchResult {
29+
// TODO: remove all tabs from this file
2930
private static final Logger logger = Logger.getLogger(SolrSearchResult.class.getCanonicalName());
3031

3132
private String id;
@@ -72,6 +73,7 @@ public class SolrSearchResult {
7273
private String dataverseAffiliation;
7374
private String citation;
7475
private String citationHtml;
76+
private String datasetType;
7577
/**
7678
* Files and datasets might have a UNF. Dataverses don't.
7779
*/
@@ -948,6 +950,14 @@ public void setCitationHtml(String citationHtml) {
948950
this.citationHtml = citationHtml;
949951
}
950952

953+
public String getDatasetType() {
954+
return datasetType;
955+
}
956+
957+
public void setDatasetType(String datasetType) {
958+
this.datasetType = datasetType;
959+
}
960+
951961
public String getFiletype() {
952962
return filetype;
953963
}

src/main/java/propertyFiles/staticSearchFields.properties

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ staticSearchFields.dvObjectType=Type
99
staticSearchFields.fileTag=File Tag
1010
staticSearchFields.fileAccess=Access
1111
staticSearchFields.publicationStatus=Publication Status
12-
staticSearchFields.subject_ss=Subject
12+
staticSearchFields.subject_ss=Subject
13+
staticSearchFields.datasetType_s=Dataset Type

src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java

+40-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
package edu.harvard.iq.dataverse.api;
22

3+
import edu.harvard.iq.dataverse.search.SearchFields;
34
import io.restassured.RestAssured;
45
import io.restassured.path.json.JsonPath;
56
import io.restassured.response.Response;
67
import static jakarta.ws.rs.core.Response.Status.CREATED;
78
import static jakarta.ws.rs.core.Response.Status.OK;
9+
import org.hamcrest.CoreMatchers;
10+
import static org.hamcrest.CoreMatchers.equalTo;
811
import static org.junit.jupiter.api.Assertions.assertEquals;
912
import org.junit.jupiter.api.BeforeAll;
1013
import org.junit.jupiter.api.Test;
@@ -45,10 +48,31 @@ public void testCreateSoftwareDatasetNative() {
4548
String datasetType = JsonPath.from(getDatasetJson.getBody().asString()).getString("data.datasetType");
4649
System.out.println("datasetType: " + datasetType);
4750
assertEquals("software", datasetType);
51+
52+
Response searchDraft = UtilIT.searchAndShowFacets("id:dataset_" + datasetId + "_draft", apiToken);
53+
searchDraft.prettyPrint();
54+
searchDraft.then().assertThat()
55+
.body("data.total_count", CoreMatchers.is(1))
56+
.body("data.count_in_response", CoreMatchers.is(1))
57+
.body("data.facets[0].datasetType_s.friendly", CoreMatchers.is("Dataset Type"))
58+
.body("data.facets[0].datasetType_s.labels[0].software", CoreMatchers.is(1))
59+
.statusCode(OK.getStatusCode());
60+
61+
UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode());
62+
UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode());
63+
64+
// Response searchAsGuest = UtilIT.search(SearchFields.DATASET_TYPE + ":software", null);
65+
// searchAsGuest.prettyPrint();
66+
// searchAsGuest.then().assertThat()
67+
// .body("data.total_count", CoreMatchers.is(1))
68+
// .body("data.count_in_response", CoreMatchers.is(1))
69+
// .body("data.facets[0].datasetType_s.friendly", CoreMatchers.is("Dataset Type"))
70+
// .body("data.facets[0].datasetType_s.labels[0].software", CoreMatchers.is(1))
71+
// .statusCode(OK.getStatusCode());
4872
}
4973

5074
@Test
51-
public void testCreateSoftwareDatasetSemantic() {
75+
public void testCreateWorkflowDatasetSemantic() {
5276
Response createUser = UtilIT.createRandomUser();
5377
createUser.then().assertThat().statusCode(OK.getStatusCode());
5478
String username = UtilIT.getUsernameFromResponse(createUser);
@@ -76,6 +100,7 @@ public void testCreateSoftwareDatasetSemantic() {
76100
String datasetType = JsonPath.from(getDatasetJson.getBody().asString()).getString("data.datasetType");
77101
System.out.println("datasetType: " + datasetType);
78102
assertEquals("software", datasetType);
103+
79104
}
80105

81106
@Test
@@ -113,7 +138,7 @@ public void testImportJson() {
113138
}
114139

115140
@Test
116-
public void testImportDDI() {
141+
public void testImportDdiWorkflow() {
117142
Response createUser = UtilIT.createRandomUser();
118143
createUser.then().assertThat().statusCode(OK.getStatusCode());
119144
String username = UtilIT.getUsernameFromResponse(createUser);
@@ -126,11 +151,13 @@ public void testImportDDI() {
126151
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse);
127152
Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse);
128153

154+
UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode());
155+
129156
String jsonIn = UtilIT.getDatasetJson("doc/sphinx-guides/source/_static/api/dataset-create-software-ddi.xml");
130157

131158
String randomString = UtilIT.getRandomString(6);
132159

133-
Response importJson = UtilIT.importDatasetDDIViaNativeApi(apiToken, dataverseAlias, jsonIn, "doi:10.5072/FK2/" + randomString, "no");
160+
Response importJson = UtilIT.importDatasetDDIViaNativeApi(apiToken, dataverseAlias, jsonIn, "doi:10.5072/FK2/" + randomString, "yes");
134161
importJson.prettyPrint();
135162
importJson.then().assertThat().statusCode(CREATED.getStatusCode());
136163

@@ -142,7 +169,16 @@ public void testImportDDI() {
142169
getDatasetJson.then().assertThat().statusCode(OK.getStatusCode());
143170
String datasetType = JsonPath.from(getDatasetJson.getBody().asString()).getString("data.datasetType");
144171
System.out.println("datasetType: " + datasetType);
145-
assertEquals("software", datasetType);
172+
assertEquals("workflow", datasetType);
173+
174+
Response search = UtilIT.searchAndShowFacets("id:dataset_" + datasetId, apiToken);
175+
search.prettyPrint();
176+
search.then().assertThat()
177+
.body("data.total_count", CoreMatchers.is(1))
178+
.body("data.count_in_response", CoreMatchers.is(1))
179+
.body("data.facets[0].datasetType_s.friendly", CoreMatchers.is("Dataset Type"))
180+
.body("data.facets[0].datasetType_s.labels[0].workflow", CoreMatchers.is(1))
181+
.statusCode(OK.getStatusCode());
146182

147183
}
148184

0 commit comments

Comments
 (0)