Skip to content

Commit cf174b2

Browse files
qqmyersdjbrooke
andauthored
IQSS/7068 Reserve File Pids (#7334)
* file pid reservation * add file pid reservation step to publish (analogous to dataset pid register if needed) Conflicts: src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java src/main/java/propertyFiles/Bundle.properties * comment change * check if file PIDs used once, use constants - per comments * adding release note * release notes, API doc update * reflecting datasets and files for the PID endpoint * removing release note about pre-reg for file PIDs as this is not supported * file pid pre-reservation Conflicts: src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java src/main/java/propertyFiles/Bundle.properties * avoid problem when GlobalIDServiceBean implicitly merges @kcondon sees a DB error persisting a file with null createtime during the GlobalIDServiceBean.getBean call which uses a set namedQuery to find the :DoiProvider. Create times for files are set above, but not merged prior to calling registerFilePidsIfNeeded. Assuming the namedQuery is forcing the file (without a merge) to persist which triggers the error. In #7337, the code is reworked so there is a merge prior to registerFilePidsIfNeeded. This commit adds one temporarily so this PR works indepdently of the other. * update theDataset * noting that PID reservation can cause old timeouts to be too short * more specifics * release note update * cleanup reformatting * further cleanup * set createTime earlier --------- Co-authored-by: Danny Brooke <[email protected]>
1 parent 8205f13 commit cf174b2

File tree

10 files changed

+110
-62
lines changed

10 files changed

+110
-62
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
## Release Highlights
2+
3+
### Pre-Publish File DOI Reservation with DataCite
4+
5+
Dataverse installations using DataCite (or other persistent identifier (PID) Providers that support reserving PIDs) will be able to reserve PIDs for files when they are uploaded (rather than at publication time). Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration).
6+
7+
## Major Use Cases
8+
9+
- Users will have DOIs/PIDs reserved for their files as part of file upload instead of at publication time. (Issue #7068, PR #7334)

doc/sphinx-guides/source/api/native-api.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -5171,7 +5171,7 @@ The fully expanded example above (without environment variables) looks like this
51715171
Reserve a PID
51725172
~~~~~~~~~~~~~
51735173
5174-
Reserved a PID for a dataset. A superuser API token is required.
5174+
Reserve a PID for a dataset if not yet registered, and, if FilePIDs are enabled, reserve any file PIDs that are not yet registered. A superuser API token is required.
51755175
51765176
.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below.
51775177

src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java

-8
Original file line numberDiff line numberDiff line change
@@ -1248,14 +1248,6 @@ public List<Long> selectFilesWithMissingOriginalSizes() {
12481248
}
12491249

12501250

1251-
/**
1252-
* Check that a identifier entered by the user is unique (not currently used
1253-
* for any other study in this Dataverse Network). Also check for duplicate
1254-
* in the remote PID service if needed
1255-
* @param datafileId
1256-
* @param storageLocation
1257-
* @return {@code true} iff the global identifier is unique.
1258-
*/
12591251
public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException {
12601252
// Verify that the DataFile no longer exists:
12611253
if (find(dataFileId) != null) {

src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java

+76-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package edu.harvard.iq.dataverse.engine.command.impl;
22

3+
import edu.harvard.iq.dataverse.DataFile;
34
import edu.harvard.iq.dataverse.Dataset;
45
import edu.harvard.iq.dataverse.DatasetField;
56
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
@@ -18,9 +19,11 @@
1819
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
1920
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
2021
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
22+
import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider;
2123
import edu.harvard.iq.dataverse.util.BundleUtil;
2224

2325
import java.sql.Timestamp;
26+
import java.util.Arrays;
2427
import java.util.Date;
2528
import java.util.Set;
2629
import java.util.logging.Level;
@@ -169,13 +172,12 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx
169172
} while (pidProvider.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT);
170173
}
171174
if(!retry) {
172-
logger.warning("Reserving PID for: " + getDataset().getId() + " during publication failed.");
173-
throw new IllegalCommandException(BundleUtil.getStringFromBundle("publishDatasetCommand.pidNotReserved"), this);
175+
logger.warning("Reserving PID for: " + getDataset().getId() + " failed.");
176+
throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidNotReserved", Arrays.asList(theDataset.getIdentifier())), this);
174177
}
175178
if(attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) {
176179
//Didn't work - we existed the loop with too many tries
177-
throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; "
178-
+ "gave up after " + attempts + " attempts. Current (last requested) identifier: " + theDataset.getIdentifier(), this);
180+
throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidReservationRetryExceeded", Arrays.asList(Integer.toString(attempts), theDataset.getIdentifier())), this);
179181
}
180182
}
181183
// Invariant: Dataset identifier does not exist in the remote registry
@@ -188,6 +190,9 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx
188190
}
189191

190192
} catch (Throwable e) {
193+
if (e instanceof CommandException) {
194+
throw (CommandException) e;
195+
}
191196
throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), this);
192197
}
193198
} else {
@@ -217,6 +222,73 @@ protected Timestamp getTimestamp() {
217222
return timestamp;
218223
}
219224

225+
protected void registerFilePidsIfNeeded(Dataset theDataset, CommandContext ctxt, boolean b) throws CommandException {
226+
// Register file PIDs if needed
227+
PidProvider pidGenerator = ctxt.dvObjects().getEffectivePidGenerator(getDataset());
228+
boolean shouldRegister = !pidGenerator.registerWhenPublished() &&
229+
ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) &&
230+
pidGenerator.canCreatePidsLike(getDataset().getGlobalId());
231+
if (shouldRegister) {
232+
for (DataFile dataFile : theDataset.getFiles()) {
233+
logger.fine(dataFile.getId() + " is registered?: " + dataFile.isIdentifierRegistered());
234+
if (!dataFile.isIdentifierRegistered()) {
235+
// pre-register a persistent id
236+
registerFileExternalIdentifier(dataFile, pidGenerator, ctxt, true);
237+
}
238+
}
239+
}
240+
}
241+
242+
private void registerFileExternalIdentifier(DataFile dataFile, PidProvider pidProvider, CommandContext ctxt, boolean retry) throws CommandException {
243+
244+
if (!dataFile.isIdentifierRegistered()) {
245+
246+
if (pidProvider instanceof FakeDOIProvider) {
247+
retry = false; // No reason to allow a retry with the FakeProvider (even if it allows
248+
// pre-registration someday), so set false for efficiency
249+
}
250+
try {
251+
if (pidProvider.alreadyRegistered(dataFile)) {
252+
int attempts = 0;
253+
if (retry) {
254+
do {
255+
pidProvider.generatePid(dataFile);
256+
logger.log(Level.INFO, "Attempting to register external identifier for datafile {0} (trying: {1}).",
257+
new Object[] { dataFile.getId(), dataFile.getIdentifier() });
258+
attempts++;
259+
} while (pidProvider.alreadyRegistered(dataFile) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT);
260+
}
261+
if (!retry) {
262+
logger.warning("Reserving File PID for: " + getDataset().getId() + ", fileId: " + dataFile.getId() + ", during publication failed.");
263+
throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.filePidNotReserved", Arrays.asList(getDataset().getIdentifier())), this);
264+
}
265+
if (attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) {
266+
// Didn't work - we existed the loop with too many tries
267+
throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; "
268+
+ "gave up after " + attempts + " attempts. Current (last requested) identifier: " + dataFile.getIdentifier(), this);
269+
}
270+
}
271+
// Invariant: DataFile identifier does not exist in the remote registry
272+
try {
273+
pidProvider.createIdentifier(dataFile);
274+
dataFile.setGlobalIdCreateTime(getTimestamp());
275+
dataFile.setIdentifierRegistered(true);
276+
} catch (Throwable ex) {
277+
logger.info("Call to globalIdServiceBean.createIdentifier failed: " + ex);
278+
}
279+
280+
} catch (Throwable e) {
281+
if (e instanceof CommandException) {
282+
throw (CommandException) e;
283+
}
284+
throw new CommandException(BundleUtil.getStringFromBundle("file.register.error", pidProvider.getProviderInformation()), this);
285+
}
286+
} else {
287+
throw new IllegalCommandException("This datafile may not have a PID because its id registry service is not supported.", this);
288+
}
289+
290+
}
291+
220292
protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, DatasetVersion persistedVersion) throws IllegalCommandException {
221293
Set<MetadataBlock> changedMDBs = DatasetVersionDifference.getBlocksWithChanges(newVersion, persistedVersion);
222294
for (MetadataBlock mdb : changedMDBs) {

src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java

+11-11
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
102102
try {
103103
// This can potentially throw a CommandException, so let's make
104104
// sure we exit cleanly:
105-
106-
registerExternalIdentifier(theDataset, ctxt, false);
105+
registerExternalIdentifier(theDataset, ctxt, false);
106+
registerFilePidsIfNeeded(theDataset, ctxt, false);
107107
} catch (CommandException comEx) {
108-
logger.warning("Failed to reserve the identifier "+theDataset.getGlobalId().asString()+"; notifying the user(s), unlocking the dataset");
109-
// Send failure notification to the user:
108+
logger.warning("Failed to reserve the identifier " + theDataset.getGlobalId().asString() + "; notifying the user(s), unlocking the dataset");
109+
// Send failure notification to the user:
110110
notifyUsersDatasetPublishStatus(ctxt, theDataset, UserNotification.Type.PUBLISHFAILED_PIDREG);
111-
// Remove the dataset lock:
111+
// Remove the dataset lock:
112112
ctxt.datasets().removeDatasetLocks(theDataset, DatasetLock.Reason.finalizePublication);
113113
// re-throw the exception:
114114
throw comEx;
@@ -395,8 +395,7 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t
395395
// we can't get "dependent" DOIs assigned to files in a dataset
396396
// with the registered id that is a handle; or even a DOI, but in
397397
// an authority that's different from what's currently configured.
398-
// Additionaly in 4.9.3 we have added a system variable to disable
399-
// registering file PIDs on the installation level.
398+
// File PIDs may be enabled/disabled per collection.
400399
boolean registerGlobalIdsForFiles = ctxt.systemConfig().isFilePIDsEnabledForCollection(
401400
getDataset().getOwner())
402401
&& pidProvider.canCreatePidsLike(dataset.getGlobalId());
@@ -422,8 +421,8 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t
422421
// pidProvider.
423422
dataset.setIdentifierRegistered(true);
424423
} catch (Throwable e) {
425-
logger.warning("Failed to register the identifier " + dataset.getGlobalId().asString()
426-
+ ", or to register a file in the dataset; notifying the user(s), unlocking the dataset");
424+
logger.warning("Failed to publicize the identifier " + dataset.getGlobalId().asString()
425+
+ ", or to publicize a file in the dataset; notifying the user(s), unlocking the dataset");
427426

428427
// Send failure notification to the user:
429428
notifyUsersDatasetPublishStatus(ctxt, dataset, UserNotification.Type.PUBLISHFAILED_PIDREG);
@@ -440,8 +439,9 @@ private void updateFiles(Timestamp updateTime, CommandContext ctxt) throws Comma
440439
if (dataFile.getPublicationDate() == null) {
441440
// this is a new, previously unpublished file, so publish by setting date
442441
dataFile.setPublicationDate(updateTime);
443-
444-
// check if any prexisting roleassignments have file download and send notifications
442+
443+
// check if any pre-existing role assignments have file download and send
444+
// notifications
445445
notifyUsersFileDownload(ctxt, dataFile);
446446
}
447447

src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java

-7
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,13 @@
44
import edu.harvard.iq.dataverse.DatasetLock;
55
import edu.harvard.iq.dataverse.authorization.Permission;
66
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
7-
import edu.harvard.iq.dataverse.engine.command.Command;
87
import edu.harvard.iq.dataverse.engine.command.CommandContext;
98
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
109
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
1110
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
1211
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
13-
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
14-
import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
15-
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
16-
import edu.harvard.iq.dataverse.util.BundleUtil;
1712
import edu.harvard.iq.dataverse.workflow.Workflow;
1813
import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType;
19-
import java.util.Date;
20-
import java.util.List;
2114
import java.util.Optional;
2215
import java.util.logging.Logger;
2316
import static java.util.stream.Collectors.joining;

src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java

+6-24
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,21 @@
33
import edu.harvard.iq.dataverse.Dataset;
44
import edu.harvard.iq.dataverse.authorization.Permission;
55
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
6-
import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
76
import edu.harvard.iq.dataverse.engine.command.CommandContext;
87
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
98
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
109
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
11-
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
1210
import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
13-
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
14-
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
15-
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
1611
import edu.harvard.iq.dataverse.util.BundleUtil;
17-
import java.util.Arrays;
1812
import java.util.Collections;
19-
import java.util.Date;
2013
import java.util.logging.Logger;
2114

2215
/**
2316
* No required permissions because we check for superuser status.
17+
* @param <T>
2418
*/
2519
@RequiredPermissions({})
26-
public class ReservePidCommand extends AbstractVoidCommand {
20+
public class ReservePidCommand extends AbstractDatasetCommand<Dataset> {
2721

2822
private static final Logger logger = Logger.getLogger(ReservePidCommand.class.getCanonicalName());
2923

@@ -35,27 +29,15 @@ public ReservePidCommand(DataverseRequest request, Dataset dataset) {
3529
}
3630

3731
@Override
38-
protected void executeImpl(CommandContext ctxt) throws CommandException {
32+
public Dataset execute(CommandContext ctxt) throws CommandException {
3933

4034
if (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser()) {
4135
throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"),
4236
this, Collections.singleton(Permission.EditDataset), dataset);
4337
}
44-
45-
PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset);
46-
47-
try {
48-
String returnString = pidProvider.createIdentifier(dataset);
49-
logger.fine(returnString);
50-
// No errors caught, so mark PID as reserved.
51-
dataset.setGlobalIdCreateTime(new Date());
52-
// We don't setIdentifierRegistered(true) yet.
53-
ctxt.datasets().merge(dataset);
54-
} catch (Throwable ex) {
55-
String message = BundleUtil.getStringFromBundle("pids.commands.reservePid.failure", Arrays.asList(dataset.getId().toString(), ex.getLocalizedMessage()));
56-
logger.info(message);
57-
throw new IllegalCommandException(message, this);
58-
}
38+
registerExternalIdentifier(getDataset(), ctxt, true);
39+
registerFilePidsIfNeeded(getDataset(), ctxt, true);
40+
return dataset;
5941
}
6042

6143
}

src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
154154
throw e;
155155
}
156156
}
157-
157+
//Set creator and create date for files if needed
158158
for (DataFile dataFile : theDataset.getFiles()) {
159159
if (dataFile.getCreateDate() == null) {
160160
dataFile.setCreateDate(getTimestamp());
@@ -259,6 +259,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
259259
for(FileMetadata fmd: theDataset.getOrCreateEditVersion().getFileMetadatas()) {
260260
logger.fine("FMD: " + fmd.getId() + " for file: " + fmd.getDataFile().getId() + "is in final draft version");
261261
}
262+
registerFilePidsIfNeeded(theDataset, ctxt, true);
262263

263264
if (recalculateUNF) {
264265
ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion());

src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,7 @@ public boolean isFilePIDsEnabledForCollection(Dataverse collection) {
986986
Dataverse thisCollection = collection;
987987

988988
// If neither enabled nor disabled specifically for this collection,
989-
// the parent collection setting is inhereted (recursively):
989+
// the parent collection setting is inherited (recursively):
990990
while (thisCollection.getFilePIDsEnabled() == null) {
991991
if (thisCollection.getOwner() == null) {
992992
// We've reached the root collection, and file PIDs registration
@@ -1002,8 +1002,6 @@ public boolean isFilePIDsEnabledForCollection(Dataverse collection) {
10021002
// takes precedent:
10031003
return thisCollection.getFilePIDsEnabled();
10041004
}
1005-
1006-
10071005

10081006
public String getMDCLogPath() {
10091007
String mDCLogPath = settingsService.getValueForKey(SettingsServiceBean.Key.MDCLogPath, null);

src/main/java/propertyFiles/Bundle.properties

+4-3
Original file line numberDiff line numberDiff line change
@@ -3008,12 +3008,13 @@ pids.api.reservePid.success=PID reserved for {0}
30083008
pids.api.deletePid.success=PID deleted for {0}
30093009
pids.deletePid.failureExpected=Unable to delete PID {0}. Status code: {1}.
30103010
pids.deletePid.failureOther=Problem deleting PID {0}: {1}
3011-
pids.commands.reservePid.failure=Problem reserving PID for dataset id {0}: {1}.
30123011
pids.datacite.errors.noResponseCode=Problem getting HTTP status code from {0}. Is it in DNS? Is doi.dataciterestapiurlstring configured properly?
30133012
pids.datacite.errors.DoiOnly=Only doi: is supported.
30143013

3015-
#PublishDatasetCommand
3016-
publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persistent identifier has not been reserved.
3014+
#AbstractDatasetCommand
3015+
abstractDatasetCommand.pidNotReserved=Unable to reserve a persistent identifier for the dataset: {0}.
3016+
abstractDatasetCommand.filePidNotReserved=Unable to reserve a persistent identifier for one or more files in the dataset: {0}.
3017+
abstractDatasetCommand.pidReservationRetryExceeded="This dataset may not be registered because its identifier is already in use by another dataset: gave up after {0} attempts. Current (last requested) identifier: {1}"
30173018

30183019
# APIs
30193020
api.errors.invalidApiToken=Invalid API token.

0 commit comments

Comments
 (0)