Skip to content

Commit 94a8942

Browse files
committed
Merge branch 'develop' into 8184-preview-url-changes
2 parents e85c422 + d039a10 commit 94a8942

File tree

14 files changed

+277
-250
lines changed

14 files changed

+277
-250
lines changed

.github/PULL_REQUEST_TEMPLATE.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
**Which issue(s) this PR closes**:
44

5-
Closes #
5+
- Closes #
66

77
**Special notes for your reviewer**:
88

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
### Reindexing after a role assignment is less memory intensive
2+
3+
Adding/removing a user from a role on a collection, particularly the root collection, could lead to a significant increase in memory use resulting in Dataverse itself failing with an out-of-memory condition. Such changes now consume much less memory.
4+
5+
If you have experienced out-of-memory failures in Dataverse in the past that could have been caused by this problem, you may wish to run a [reindex in place](https://guides.dataverse.org/en/latest/admin/solr-search-index.html#reindex-in-place) to update any out-of-date information.
6+
7+
For more information, see #10697 and #10698.

src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java

+156-157
Large diffs are not rendered by default.

src/main/java/edu/harvard/iq/dataverse/WidgetWrapper.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
@Named
1919
public class WidgetWrapper implements java.io.Serializable {
2020

21-
private final static String WIDGET_PARAMETER = "widget";
22-
private final static char WIDGET_SEPARATOR = '@';
21+
private static final String WIDGET_PARAMETER = "widget";
22+
private static final char WIDGET_SEPARATOR = '@';
2323

2424
private Boolean widgetView;
2525
private String widgetHome;

src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
*/
3131
public abstract class AbstractOAuth2AuthenticationProvider implements AuthenticationProvider {
3232

33-
final static Logger logger = Logger.getLogger(AbstractOAuth2AuthenticationProvider.class.getName());
33+
static final Logger logger = Logger.getLogger(AbstractOAuth2AuthenticationProvider.class.getName());
3434

3535
protected static class ParsedUserResponse {
3636
public final AuthenticatedUserDisplayInfo displayInfo;

src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
*/
5050
public class OrcidOAuth2AP extends AbstractOAuth2AuthenticationProvider {
5151

52-
final static Logger logger = Logger.getLogger(OrcidOAuth2AP.class.getName());
52+
static final Logger logger = Logger.getLogger(OrcidOAuth2AP.class.getName());
5353

5454
public static final String PROVIDER_ID_PRODUCTION = "orcid";
5555
public static final String PROVIDER_ID_SANDBOX = "orcid-sandbox";

src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ public class DataverseRequest {
2626
private final String invocationId;
2727
private final HttpServletRequest httpServletRequest;
2828

29-
private final static String undefined = "0.0.0.0";
29+
private static final String undefined = "0.0.0.0";
3030

31-
private final static String MDKEY_PREFIX="mdkey.";
31+
private static final String MDKEY_PREFIX="mdkey.";
3232

3333
private static final Logger logger = Logger.getLogger(DataverseRequest.class.getName());
3434

src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java

+84-67
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
public class SolrIndexServiceBean {
3535

3636
private static final Logger logger = Logger.getLogger(SolrIndexServiceBean.class.getCanonicalName());
37-
37+
3838
@EJB
3939
DvObjectServiceBean dvObjectService;
4040
@EJB
@@ -149,7 +149,7 @@ private List<DvObjectSolrDoc> constructDatasetSolrDocs(Dataset dataset) {
149149
return solrDocs;
150150
}
151151

152-
// private List<DvObjectSolrDoc> constructDatafileSolrDocs(DataFile dataFile) {
152+
// private List<DvObjectSolrDoc> constructDatafileSolrDocs(DataFile dataFile) {
153153
private List<DvObjectSolrDoc> constructDatafileSolrDocs(DataFile dataFile, Map<Long, List<String>> permStringByDatasetVersion) {
154154
List<DvObjectSolrDoc> datafileSolrDocs = new ArrayList<>();
155155
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataFile.getOwner());
@@ -166,14 +166,14 @@ private List<DvObjectSolrDoc> constructDatafileSolrDocs(DataFile dataFile, Map<L
166166
cachedPerms = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId());
167167
}
168168
if (cachedPerms != null) {
169-
logger.fine("reusing cached perms for file " + dataFile.getId());
169+
logger.finest("reusing cached perms for file " + dataFile.getId());
170170
perms = cachedPerms;
171171
} else if (datasetVersionFileIsAttachedTo.isReleased()) {
172-
logger.fine("no cached perms, file is public/discoverable/searchable for file " + dataFile.getId());
172+
logger.finest("no cached perms, file is public/discoverable/searchable for file " + dataFile.getId());
173173
perms.add(IndexServiceBean.getPublicGroupString());
174174
} else {
175175
// go to the well (slow)
176-
logger.fine("no cached perms, file is not public, finding perms for file " + dataFile.getId());
176+
logger.finest("no cached perms, file is not public, finding perms for file " + dataFile.getId());
177177
perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo);
178178
}
179179
} else {
@@ -204,13 +204,14 @@ private List<DvObjectSolrDoc> constructDatafileSolrDocsFromDataset(Dataset datas
204204
} else {
205205
perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo);
206206
}
207+
207208
for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) {
208209
Long fileId = fileMetadata.getDataFile().getId();
209210
String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId;
210211
String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState());
211212
String solrId = solrIdStart + solrIdEnd;
212213
DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms);
213-
logger.fine("adding fileid " + fileId);
214+
logger.finest("adding fileid " + fileId);
214215
datafileSolrDocs.add(dataFileSolrDoc);
215216
}
216217
}
@@ -361,20 +362,19 @@ private void persistToSolr(Collection<SolrInputDocument> docs) throws SolrServer
361362

362363
public IndexResponse indexPermissionsOnSelfAndChildren(long definitionPointId) {
363364
DvObject definitionPoint = dvObjectService.findDvObject(definitionPointId);
364-
if ( definitionPoint == null ) {
365+
if (definitionPoint == null) {
365366
logger.log(Level.WARNING, "Cannot find a DvOpbject with id of {0}", definitionPointId);
366367
return null;
367368
} else {
368369
return indexPermissionsOnSelfAndChildren(definitionPoint);
369370
}
370371
}
371-
372+
372373
/**
373374
* We use the database to determine direct children since there is no
374375
* inheritance
375376
*/
376377
public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) {
377-
List<DvObject> dvObjectsToReindexPermissionsFor = new ArrayList<>();
378378
List<DataFile> filesToReindexAsBatch = new ArrayList<>();
379379
/**
380380
* @todo Re-indexing the definition point itself seems to be necessary
@@ -383,27 +383,47 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
383383

384384
// We don't create a Solr "primary/content" doc for the root dataverse
385385
// so don't create a Solr "permission" doc either.
386+
int i = 0;
387+
int numObjects = 0;
386388
if (definitionPoint.isInstanceofDataverse()) {
387389
Dataverse selfDataverse = (Dataverse) definitionPoint;
388390
if (!selfDataverse.equals(dataverseService.findRootDataverse())) {
389-
dvObjectsToReindexPermissionsFor.add(definitionPoint);
391+
indexPermissionsForOneDvObject(definitionPoint);
392+
numObjects++;
390393
}
391394
List<Dataset> directChildDatasetsOfDvDefPoint = datasetService.findByOwnerId(selfDataverse.getId());
392395
for (Dataset dataset : directChildDatasetsOfDvDefPoint) {
393-
dvObjectsToReindexPermissionsFor.add(dataset);
396+
indexPermissionsForOneDvObject(dataset);
397+
numObjects++;
394398
for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) {
395399
filesToReindexAsBatch.add(datafile);
400+
i++;
401+
if (i % 100 == 0) {
402+
reindexFilesInBatches(filesToReindexAsBatch);
403+
filesToReindexAsBatch.clear();
404+
}
405+
if (i % 1000 == 0) {
406+
logger.fine("Progress: " +i + " files permissions reindexed");
407+
}
396408
}
409+
logger.fine("Progress : dataset " + dataset.getId() + " permissions reindexed");
397410
}
398411
} else if (definitionPoint.isInstanceofDataset()) {
399-
dvObjectsToReindexPermissionsFor.add(definitionPoint);
412+
indexPermissionsForOneDvObject(definitionPoint);
413+
numObjects++;
400414
// index files
401415
Dataset dataset = (Dataset) definitionPoint;
402416
for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) {
403417
filesToReindexAsBatch.add(datafile);
418+
i++;
419+
if (i % 100 == 0) {
420+
reindexFilesInBatches(filesToReindexAsBatch);
421+
filesToReindexAsBatch.clear();
422+
}
404423
}
405424
} else {
406-
dvObjectsToReindexPermissionsFor.add(definitionPoint);
425+
indexPermissionsForOneDvObject(definitionPoint);
426+
numObjects++;
407427
}
408428

409429
/**
@@ -412,64 +432,64 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
412432
* @todo Should update timestamps, probably, even thought these are
413433
* files, see https://github.com/IQSS/dataverse/issues/2421
414434
*/
415-
String response = reindexFilesInBatches(filesToReindexAsBatch);
416-
417-
for (DvObject dvObject : dvObjectsToReindexPermissionsFor) {
418-
/**
419-
* @todo do something with this response
420-
*/
421-
IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObject);
422-
}
423-
435+
reindexFilesInBatches(filesToReindexAsBatch);
436+
logger.fine("Reindexed permissions for " + i + " files and " + numObjects + " datasets/collections");
424437
return new IndexResponse("Number of dvObject permissions indexed for " + definitionPoint
425-
+ ": " + dvObjectsToReindexPermissionsFor.size()
426-
);
438+
+ ": " + numObjects);
427439
}
428440

429441
private String reindexFilesInBatches(List<DataFile> filesToReindexPermissionsFor) {
430442
List<SolrInputDocument> docs = new ArrayList<>();
431443
Map<Long, List<Long>> byParentId = new HashMap<>();
432444
Map<Long, List<String>> permStringByDatasetVersion = new HashMap<>();
433-
for (DataFile file : filesToReindexPermissionsFor) {
434-
Dataset dataset = (Dataset) file.getOwner();
435-
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
436-
for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) {
437-
boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState());
438-
if (cardShouldExist) {
439-
List<String> cachedPermission = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId());
440-
if (cachedPermission == null) {
441-
logger.fine("no cached permission! Looking it up...");
442-
List<DvObjectSolrDoc> fileSolrDocs = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
443-
for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs) {
444-
Long datasetVersionId = fileSolrDoc.getDatasetVersionId();
445-
if (datasetVersionId != null) {
446-
permStringByDatasetVersion.put(datasetVersionId, fileSolrDoc.getPermissions());
445+
int i = 0;
446+
try {
447+
for (DataFile file : filesToReindexPermissionsFor) {
448+
Dataset dataset = (Dataset) file.getOwner();
449+
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
450+
for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) {
451+
boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState());
452+
if (cardShouldExist) {
453+
List<String> cachedPermission = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId());
454+
if (cachedPermission == null) {
455+
logger.finest("no cached permission! Looking it up...");
456+
List<DvObjectSolrDoc> fileSolrDocs = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
457+
for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs) {
458+
Long datasetVersionId = fileSolrDoc.getDatasetVersionId();
459+
if (datasetVersionId != null) {
460+
permStringByDatasetVersion.put(datasetVersionId, fileSolrDoc.getPermissions());
461+
SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
462+
docs.add(solrDoc);
463+
i++;
464+
}
465+
}
466+
} else {
467+
logger.finest("cached permission is " + cachedPermission);
468+
List<DvObjectSolrDoc> fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
469+
for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions) {
447470
SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
448471
docs.add(solrDoc);
472+
i++;
449473
}
450474
}
451-
} else {
452-
logger.fine("cached permission is " + cachedPermission);
453-
List<DvObjectSolrDoc> fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
454-
for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions) {
455-
SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
456-
docs.add(solrDoc);
475+
if (i % 20 == 0) {
476+
persistToSolr(docs);
477+
docs = new ArrayList<>();
457478
}
458479
}
459480
}
481+
Long parent = file.getOwner().getId();
482+
List<Long> existingList = byParentId.get(parent);
483+
if (existingList == null) {
484+
List<Long> empty = new ArrayList<>();
485+
byParentId.put(parent, empty);
486+
} else {
487+
List<Long> updatedList = existingList;
488+
updatedList.add(file.getId());
489+
byParentId.put(parent, updatedList);
490+
}
460491
}
461-
Long parent = file.getOwner().getId();
462-
List<Long> existingList = byParentId.get(parent);
463-
if (existingList == null) {
464-
List<Long> empty = new ArrayList<>();
465-
byParentId.put(parent, empty);
466-
} else {
467-
List<Long> updatedList = existingList;
468-
updatedList.add(file.getId());
469-
byParentId.put(parent, updatedList);
470-
}
471-
}
472-
try {
492+
473493
persistToSolr(docs);
474494
return " " + filesToReindexPermissionsFor.size() + " files indexed across " + docs.size() + " Solr documents ";
475495
} catch (SolrServerException | IOException ex) {
@@ -517,29 +537,26 @@ public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServer
517537
}
518538

519539
/**
520-
*
521-
*
522540
* @return A list of dvobject ids that should have their permissions
523-
* re-indexed because Solr was down when a permission was added. The permission
524-
* should be added to Solr. The id of the permission contains the type of
525-
* DvObject and the primary key of the dvObject.
526-
* DvObjects of type DataFile are currently skipped because their index
527-
* time isn't stored in the database, since they are indexed along
528-
* with their parent dataset (this may change).
541+
* re-indexed because Solr was down when a permission was added. The
542+
* permission should be added to Solr. The id of the permission contains the
543+
* type of DvObject and the primary key of the dvObject. DvObjects of type
544+
* DataFile are currently skipped because their index time isn't stored in
545+
* the database, since they are indexed along with their parent dataset
546+
* (this may change).
529547
*/
530548
public List<Long> findPermissionsInDatabaseButStaleInOrMissingFromSolr() {
531549
List<Long> indexingRequired = new ArrayList<>();
532550
long rootDvId = dataverseService.findRootDataverse().getId();
533551
List<Long> missingDataversePermissionIds = dataverseService.findIdStalePermission();
534552
List<Long> missingDatasetPermissionIds = datasetService.findIdStalePermission();
535-
for (Long id : missingDataversePermissionIds) {
553+
for (Long id : missingDataversePermissionIds) {
536554
if (!id.equals(rootDvId)) {
537-
indexingRequired.add(id);
555+
indexingRequired.add(id);
538556
}
539557
}
540558
indexingRequired.addAll(missingDatasetPermissionIds);
541559
return indexingRequired;
542560
}
543561

544-
545562
}

src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java

+6-6
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,12 @@ public class ShapefileHandler{
6868
private static final Logger logger = Logger.getLogger(ShapefileHandler.class.getCanonicalName());
6969

7070
// Reference for these extensions: http://en.wikipedia.org/wiki/Shapefile
71-
public final static String SHAPEFILE_FILE_TYPE = "application/zipped-shapefile";
72-
public final static String SHAPEFILE_FILE_TYPE_FRIENDLY_NAME = "Shapefile as ZIP Archive";
73-
public final static List<String> SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj");
74-
public final static String SHP_XML_EXTENSION = "shp.xml";
75-
public final static String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__";
76-
public final static List<String> SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION);
71+
public static final String SHAPEFILE_FILE_TYPE = "application/zipped-shapefile";
72+
public static final String SHAPEFILE_FILE_TYPE_FRIENDLY_NAME = "Shapefile as ZIP Archive";
73+
public static final List<String> SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj");
74+
public static final String SHP_XML_EXTENSION = "shp.xml";
75+
public static final String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__";
76+
public static final List<String> SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION);
7777

7878
public boolean DEBUG = false;
7979

0 commit comments

Comments
 (0)