Skip to content

Commit cc68c7d

Browse files
committed
allow dataset type to be specified in DDI import #10517
1 parent 47c5b30 commit cc68c7d

File tree

5 files changed

+275
-3
lines changed

5 files changed

+275
-3
lines changed
+5-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
### Initial Support for Dataset Types (Dataset, Software, Workflow)
22

3-
Datasets now have types. By default the dataset type will be "dataset" but if you turn on support for additional types, datasets can have a type of "software" or "workflow" as well. For more details see doc/sphinx-guides/source/user/dataset-types.rst and #10517. Please note that this feature is highly experimental.
3+
Datasets now have types. By default the dataset type will be "dataset" but if you turn on support for additional types, datasets can have a type of "software" or "workflow" as well. For more details see <https://dataverse-guide--10694.org.readthedocs.build/en/10694/user/dataset-types.html> and #10517. Please note that this feature is highly experimental.
4+
5+
next:
6+
7+
- create with DDI
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<codeBook xmlns="ddi:codebook:2_5" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:codebook:2_5 https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" version="2.5">
3+
<docDscr>
4+
<citation>
5+
<titlStmt>
6+
<titl>Replication Data for: Title</titl>
7+
</titlStmt>
8+
<distStmt>
9+
<distrbtr source="archive">Root</distrbtr>
10+
<distDate>2020-02-19</distDate>
11+
</distStmt>
12+
<verStmt source="archive">
13+
<version date="2020-02-19" type="RELEASED">1</version>
14+
</verStmt>
15+
<biblCit>LastAuthor1, FirstAuthor1; LastAuthor2, FirstAuthor2, 2020, "Replication Data for: Title", Root, V1</biblCit>
16+
</citation>
17+
</docDscr>
18+
<stdyDscr>
19+
<citation>
20+
<titlStmt>
21+
<titl>Replication Data for: Title</titl>
22+
<subTitl>Subtitle</subTitl>
23+
<altTitl>Alternative Title</altTitl>
24+
<IDNo agency="OtherIDAgency1">OtherIDIdentifier1</IDNo>
25+
<IDNo agency="OtherIDAgency2">OtherIDIdentifier2</IDNo>
26+
</titlStmt>
27+
<rspStmt>
28+
<AuthEnty affiliation="AuthorAffiliation1">LastAuthor1, FirstAuthor1</AuthEnty>
29+
<AuthEnty affiliation="AuthorAffiliation2">LastAuthor2, FirstAuthor2</AuthEnty>
30+
<othId role="Data Collector">LastContributor1, FirstContributor1</othId>
31+
<othId role="Data Curator">LastContributor2, FirstContributor2</othId>
32+
</rspStmt>
33+
<prodStmt>
34+
<producer affiliation="ProducerAffiliation1" abbr="ProducerAbbreviation1" role="http://ProducerLogoURL1.org" URI="http://ProducerURL1.org">LastProducer1, FirstProducer1</producer>
35+
<producer affiliation="ProducerAffiliation2" abbr="ProducerAbbreviation2" role="http://ProducerLogoURL2.org" URI="http://ProducerURL2.org">LastProducer2, FirstProducer2</producer>
36+
<prodDate>1003-01-01</prodDate>
37+
<prodPlac>ProductionPlace One</prodPlac>
38+
<prodPlac>ProductionPlace Two</prodPlac>
39+
<software version="SoftwareVersion1">SoftwareName1</software>
40+
<software version="SoftwareVersion2">SoftwareName2</software>
41+
<grantNo agency="GrantInformationGrantAgency1">GrantInformationGrantNumber1</grantNo>
42+
<grantNo agency="GrantInformationGrantAgency2">GrantInformationGrantNumber2</grantNo>
43+
</prodStmt>
44+
<distStmt>
45+
<distrbtr source="archive">Root</distrbtr>
46+
<distrbtr affiliation="DistributorAffiliation1" abbr="DistributorAbbreviation1" URI="http://DistributorURL1.org" role="http://DistributorLogoURL1.org">LastDistributor1, FirstDistributor1</distrbtr>
47+
<distrbtr affiliation="DistributorAffiliation2" abbr="DistributorAbbreviation2" URI="http://DistributorURL2.org" role="http://DistributorLogoURL2.org">LastDistributor2, FirstDistributor2</distrbtr>
48+
<contact affiliation="ContactAffiliation1" email="[email protected]">LastContact1, FirstContact1</contact>
49+
<contact affiliation="ContactAffiliation2" email="[email protected]">LastContact2, FirstContact2</contact>
50+
<distDate>1004-01-01</distDate>
51+
<depositr>LastDepositor, FirstDepositor</depositr>
52+
<depDate>1002-01-01</depDate>
53+
</distStmt>
54+
<serStmt>
55+
<serName>SeriesName One</serName>
56+
<serInfo>SeriesInformation One</serInfo>
57+
</serStmt>
58+
<serStmt>
59+
<serName>SeriesName Two</serName>
60+
<serInfo>SeriesInformation Two</serInfo>
61+
</serStmt>
62+
</citation>
63+
<stdyInfo>
64+
<subject>
65+
<keyword xml:lang="en">Agricultural Sciences</keyword>
66+
<keyword xml:lang="en">Business and Management</keyword>
67+
<keyword xml:lang="en">Engineering</keyword>
68+
<keyword xml:lang="en">Law</keyword>
69+
<keyword vocab="KeywordVocabulary1" vocabURI="http://KeywordVocabularyURL1.org">KeywordTerm1</keyword>
70+
<keyword vocab="KeywordVocabulary2" vocabURI="http://KeywordVocabularyURL2.org">KeywordTerm2</keyword>
71+
</subject>
72+
<abstract date="1000-01-01">DescriptionText 1</abstract>
73+
<abstract date="1000-02-02">DescriptionText2</abstract>
74+
<sumDscr>
75+
<timePrd cycle="P1" event="start" date="1005-01-01">1005-01-01</timePrd>
76+
<timePrd cycle="P1" event="end" date="1005-01-02">1005-01-02</timePrd>
77+
<timePrd cycle="P2" event="start" date="1005-02-01">1005-02-01</timePrd>
78+
<timePrd cycle="P2" event="end" date="1005-02-02">1005-02-02</timePrd>
79+
<collDate cycle="P1" event="start" date="1006-01-01">1006-01-01</collDate>
80+
<collDate cycle="P1" event="end" date="1006-01-01">1006-01-01</collDate>
81+
<collDate cycle="P2" event="start" date="1006-02-01">1006-02-01</collDate>
82+
<collDate cycle="P2" event="end" date="1006-02-02">1006-02-02</collDate>
83+
<dataKind>software</dataKind>
84+
<nation>Afghanistan</nation>
85+
<geogCover>GeographicCoverageCity1</geogCover>
86+
<geogCover>GeographicCoverageStateProvince1</geogCover>
87+
<geogCover>GeographicCoverageOther1</geogCover>
88+
<nation>Albania</nation>
89+
<geogCover>GeographicCoverageCity2</geogCover>
90+
<geogCover>GeographicCoverageStateProvince2</geogCover>
91+
<geogCover>GeographicCoverageOther2</geogCover>
92+
<geoBndBox>
93+
<westBL>10</westBL>
94+
<eastBL>20</eastBL>
95+
<northBL>40</northBL>
96+
<southBL>30</southBL>
97+
</geoBndBox>
98+
<geoBndBox>
99+
<southBL>70</southBL>
100+
<northBL>80</northBL>
101+
<eastBL>60</eastBL>
102+
<westBL>50</westBL>
103+
</geoBndBox>
104+
<geogUnit>GeographicUnit1</geogUnit>
105+
<geogUnit>GeographicUnit2</geogUnit>
106+
<anlyUnit>UnitOfAnalysis1</anlyUnit>
107+
<anlyUnit>UnitOfAnalysis2</anlyUnit>
108+
<universe>Universe1</universe>
109+
<universe>Universe2</universe>
110+
</sumDscr>
111+
<notes>Notes1</notes>
112+
</stdyInfo>
113+
<method>
114+
<dataColl>
115+
<timeMeth>TimeMethod</timeMeth>
116+
<dataCollector>LastDataCollector1, FirstDataCollector1</dataCollector>
117+
<collectorTraining>CollectorTraining</collectorTraining>
118+
<frequenc>Frequency</frequenc>
119+
<sampProc>SamplingProcedure</sampProc>
120+
<targetSampleSize>
121+
<sampleSizeFormula>TargetSampleSizeFormula</sampleSizeFormula>
122+
<sampleSize>100</sampleSize>
123+
</targetSampleSize>
124+
<deviat>MajorDeviationsForSampleDesign</deviat>
125+
<sources>
126+
<dataSrc>DataSources1</dataSrc>
127+
<dataSrc>DataSources2</dataSrc>
128+
<srcOrig>OriginOfSources</srcOrig>
129+
<srcChar>CharacteristicOfSourcesNoted</srcChar>
130+
<srcDocu>DocumentationAndAccessToSources</srcDocu>
131+
</sources>
132+
<collMode>CollectionMode</collMode>
133+
<resInstru>TypeOfResearchInstrument</resInstru>
134+
<collSitu>CharacteristicsOfDataCollectionSituation</collSitu>
135+
<actMin>ActionsToMinimizeLosses</actMin>
136+
<conOps>ControlOperations</conOps>
137+
<weight>Weighting</weight>
138+
<cleanOps>CleaningOperations</cleanOps>
139+
</dataColl>
140+
<anlyInfo>
141+
<respRate>ResponseRate</respRate>
142+
<EstSmpErr>EstimatesOfSamplingError</EstSmpErr>
143+
<dataAppr>OtherFormsOfDataAppraisal</dataAppr>
144+
</anlyInfo>
145+
<notes type="NotesType" subject="NotesSubject">NotesText</notes>
146+
</method>
147+
<dataAccs>
148+
<notes type="DVN:TOA" level="dv">Terms of Access</notes>
149+
<notes type="DVN:TOU" level="dv">Terms of Use</notes>
150+
<setAvail>
151+
<accsPlac>Data Access Place</accsPlac>
152+
<origArch>Original Archive</origArch>
153+
<avlStatus>Availability Status</avlStatus>
154+
<collSize>Size of Collection</collSize>
155+
<complete>Study Completion</complete>
156+
</setAvail>
157+
<useStmt>
158+
<confDec>Confidentiality Declaration</confDec>
159+
<specPerm>Special Permissions</specPerm>
160+
<restrctn>Restrictions</restrctn>
161+
<contact>Contact for Access</contact>
162+
<citReq>Citation Requirements</citReq>
163+
<deposReq>Depositor Requirements</deposReq>
164+
<conditions>Conditions </conditions>
165+
<disclaimer>Disclaimer</disclaimer>
166+
</useStmt>
167+
</dataAccs>
168+
<othrStdyMat>
169+
<relMat>RelatedMaterial1</relMat>
170+
<relMat>RelatedMaterial2</relMat>
171+
<relStdy>RelatedDatasets1</relStdy>
172+
<relStdy>RelatedDatasets2</relStdy>
173+
<relPubl>
174+
<citation>
175+
<titlStmt>
176+
<IDNo agency="ark">RelatedPublicationIDNumber1</IDNo>
177+
</titlStmt>
178+
<biblCit>RelatedPublicationCitation1</biblCit>
179+
</citation>
180+
<ExtLink URI="http://RelatedPublicationURL1.org"/>
181+
</relPubl>
182+
<relPubl>
183+
<citation>
184+
<titlStmt>
185+
<IDNo agency="arXiv">RelatedPublicationIDNumber2</IDNo>
186+
</titlStmt>
187+
<biblCit>RelatedPublicationCitation2</biblCit>
188+
</citation>
189+
<ExtLink URI="http://RelatedPublicationURL2.org"/>
190+
</relPubl>
191+
<othRefs>OtherReferences1</othRefs>
192+
<othRefs>OtherReferences2</othRefs>
193+
</othrStdyMat>
194+
<notes>StudyLevelErrorNotes</notes>
195+
</stdyDscr>
196+
</codeBook>

doc/sphinx-guides/source/user/dataset-types.rst

+7
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,10 @@ Semantic API
2828
---------------------------------
2929

3030
An example JSON-LD file is available at :download:`dataset-create-software.jsonld <../_static/api/dataset-create-software.jsonld>`
31+
32+
DDI Import
33+
----------
34+
35+
An example DDI file is available at :download:`dataset-create-software-ddi.xml <../_static/api/dataset-create-software-ddi.xml>`
36+
37+
Note that for DDI import to work ``dataKind`` must be set to one of the valid types. The first valid type wins.

src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java

+32-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean;
1919
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
2020
import edu.harvard.iq.dataverse.authorization.users.User;
21+
import edu.harvard.iq.dataverse.dataset.DatasetType;
2122
import edu.harvard.iq.dataverse.dataverse.DataverseUtil;
2223
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
2324
import edu.harvard.iq.dataverse.engine.command.impl.*;
@@ -429,8 +430,13 @@ public Response importDatasetDdi(@Context ContainerRequestContext crc, String xm
429430
Dataverse owner = findDataverseOrDie(parentIdtf);
430431
Dataset ds = null;
431432
try {
432-
ds = jsonParser().parseDataset(importService.ddiToJson(xml));
433+
JsonObject jsonObject = importService.ddiToJson(xml);
434+
ds = jsonParser().parseDataset(jsonObject);
433435
DataverseUtil.checkMetadataLangauge(ds, owner, settingsService.getBaseMetadataLanguageMap(null, true));
436+
DatasetType datasetType = getDatasetTypeFromJson(jsonObject);
437+
if (datasetType != null) {
438+
ds.setDatasetType(datasetType);
439+
}
434440
} catch (JsonParseException jpe) {
435441
return badRequest("Error parsing data as Json: "+jpe.getMessage());
436442
} catch (ImportException e) {
@@ -491,7 +497,31 @@ public Response importDatasetDdi(@Context ContainerRequestContext crc, String xm
491497
return ex.getResponse();
492498
}
493499
}
494-
500+
501+
public DatasetType getDatasetTypeFromJson(JsonObject jsonObject) {
502+
JsonArray citationFields = jsonObject.getJsonObject("datasetVersion")
503+
.getJsonObject("metadataBlocks")
504+
.getJsonObject("citation")
505+
.getJsonArray("fields");
506+
for (JsonValue citationField : citationFields) {
507+
JsonObject field = (JsonObject) citationField;
508+
String name = field.getString("typeName");
509+
if (name.equals(DatasetFieldConstant.kindOfData)) {
510+
JsonArray values = field.getJsonArray("value");
511+
for (JsonString value : values.getValuesAs(JsonString.class)) {
512+
try {
513+
// return the first DatasetType you find
514+
DatasetType.Type type = DatasetType.Type.fromString(value.getString());
515+
return new DatasetType(type);
516+
} catch (IllegalArgumentException ex) {
517+
// No worries, it's just some other kind of data.
518+
}
519+
}
520+
}
521+
}
522+
return null;
523+
}
524+
495525
@POST
496526
@AuthRequired
497527
@Path("{identifier}/datasets/:startmigration")

src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java

+35
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,39 @@ public void testImportJson() {
111111
assertEquals("software", datasetType);
112112

113113
}
114+
115+
@Test
116+
public void testImportDDI() {
117+
Response createUser = UtilIT.createRandomUser();
118+
createUser.then().assertThat().statusCode(OK.getStatusCode());
119+
String username = UtilIT.getUsernameFromResponse(createUser);
120+
String apiToken = UtilIT.getApiTokenFromResponse(createUser);
121+
122+
UtilIT.setSuperuserStatus(username, true).then().assertThat().statusCode(OK.getStatusCode());
123+
124+
Response createDataverse = UtilIT.createRandomDataverse(apiToken);
125+
createDataverse.then().assertThat().statusCode(CREATED.getStatusCode());
126+
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse);
127+
Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse);
128+
129+
String jsonIn = UtilIT.getDatasetJson("doc/sphinx-guides/source/_static/api/dataset-create-software-ddi.xml");
130+
131+
String randomString = UtilIT.getRandomString(6);
132+
133+
Response importJson = UtilIT.importDatasetDDIViaNativeApi(apiToken, dataverseAlias, jsonIn, "doi:10.5072/FK2/" + randomString, "no");
134+
importJson.prettyPrint();
135+
importJson.then().assertThat().statusCode(CREATED.getStatusCode());
136+
137+
Integer datasetId = JsonPath.from(importJson.getBody().asString()).getInt("data.id");
138+
String datasetPid = JsonPath.from(importJson.getBody().asString()).getString("data.persistentId");
139+
140+
Response getDatasetJson = UtilIT.nativeGet(datasetId, apiToken);
141+
getDatasetJson.prettyPrint();
142+
getDatasetJson.then().assertThat().statusCode(OK.getStatusCode());
143+
String datasetType = JsonPath.from(getDatasetJson.getBody().asString()).getString("data.datasetType");
144+
System.out.println("datasetType: " + datasetType);
145+
assertEquals("software", datasetType);
146+
147+
}
148+
114149
}

0 commit comments

Comments
 (0)