34
34
public class SolrIndexServiceBean {
35
35
36
36
private static final Logger logger = Logger .getLogger (SolrIndexServiceBean .class .getCanonicalName ());
37
-
37
+
38
38
@ EJB
39
39
DvObjectServiceBean dvObjectService ;
40
40
@ EJB
@@ -149,7 +149,7 @@ private List<DvObjectSolrDoc> constructDatasetSolrDocs(Dataset dataset) {
149
149
return solrDocs ;
150
150
}
151
151
152
- // private List<DvObjectSolrDoc> constructDatafileSolrDocs(DataFile dataFile) {
152
+ // private List<DvObjectSolrDoc> constructDatafileSolrDocs(DataFile dataFile) {
153
153
private List <DvObjectSolrDoc > constructDatafileSolrDocs (DataFile dataFile , Map <Long , List <String >> permStringByDatasetVersion ) {
154
154
List <DvObjectSolrDoc > datafileSolrDocs = new ArrayList <>();
155
155
Map <DatasetVersion .VersionState , Boolean > desiredCards = searchPermissionsService .getDesiredCards (dataFile .getOwner ());
@@ -166,14 +166,14 @@ private List<DvObjectSolrDoc> constructDatafileSolrDocs(DataFile dataFile, Map<L
166
166
cachedPerms = permStringByDatasetVersion .get (datasetVersionFileIsAttachedTo .getId ());
167
167
}
168
168
if (cachedPerms != null ) {
169
- logger .fine ("reusing cached perms for file " + dataFile .getId ());
169
+ logger .finest ("reusing cached perms for file " + dataFile .getId ());
170
170
perms = cachedPerms ;
171
171
} else if (datasetVersionFileIsAttachedTo .isReleased ()) {
172
- logger .fine ("no cached perms, file is public/discoverable/searchable for file " + dataFile .getId ());
172
+ logger .finest ("no cached perms, file is public/discoverable/searchable for file " + dataFile .getId ());
173
173
perms .add (IndexServiceBean .getPublicGroupString ());
174
174
} else {
175
175
// go to the well (slow)
176
- logger .fine ("no cached perms, file is not public, finding perms for file " + dataFile .getId ());
176
+ logger .finest ("no cached perms, file is not public, finding perms for file " + dataFile .getId ());
177
177
perms = searchPermissionsService .findDatasetVersionPerms (datasetVersionFileIsAttachedTo );
178
178
}
179
179
} else {
@@ -204,13 +204,14 @@ private List<DvObjectSolrDoc> constructDatafileSolrDocsFromDataset(Dataset datas
204
204
} else {
205
205
perms = searchPermissionsService .findDatasetVersionPerms (datasetVersionFileIsAttachedTo );
206
206
}
207
+
207
208
for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo .getFileMetadatas ()) {
208
209
Long fileId = fileMetadata .getDataFile ().getId ();
209
210
String solrIdStart = IndexServiceBean .solrDocIdentifierFile + fileId ;
210
211
String solrIdEnd = getDatasetOrDataFileSolrEnding (datasetVersionFileIsAttachedTo .getVersionState ());
211
212
String solrId = solrIdStart + solrIdEnd ;
212
213
DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc (fileId .toString (), solrId , datasetVersionFileIsAttachedTo .getId (), fileMetadata .getLabel (), perms );
213
- logger .fine ("adding fileid " + fileId );
214
+ logger .finest ("adding fileid " + fileId );
214
215
datafileSolrDocs .add (dataFileSolrDoc );
215
216
}
216
217
}
@@ -361,20 +362,19 @@ private void persistToSolr(Collection<SolrInputDocument> docs) throws SolrServer
361
362
362
363
public IndexResponse indexPermissionsOnSelfAndChildren (long definitionPointId ) {
363
364
DvObject definitionPoint = dvObjectService .findDvObject (definitionPointId );
364
- if ( definitionPoint == null ) {
365
+ if (definitionPoint == null ) {
365
366
logger .log (Level .WARNING , "Cannot find a DvOpbject with id of {0}" , definitionPointId );
366
367
return null ;
367
368
} else {
368
369
return indexPermissionsOnSelfAndChildren (definitionPoint );
369
370
}
370
371
}
371
-
372
+
372
373
/**
373
374
* We use the database to determine direct children since there is no
374
375
* inheritance
375
376
*/
376
377
public IndexResponse indexPermissionsOnSelfAndChildren (DvObject definitionPoint ) {
377
- List <DvObject > dvObjectsToReindexPermissionsFor = new ArrayList <>();
378
378
List <DataFile > filesToReindexAsBatch = new ArrayList <>();
379
379
/**
380
380
* @todo Re-indexing the definition point itself seems to be necessary
@@ -383,27 +383,47 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
383
383
384
384
// We don't create a Solr "primary/content" doc for the root dataverse
385
385
// so don't create a Solr "permission" doc either.
386
+ int i = 0 ;
387
+ int numObjects = 0 ;
386
388
if (definitionPoint .isInstanceofDataverse ()) {
387
389
Dataverse selfDataverse = (Dataverse ) definitionPoint ;
388
390
if (!selfDataverse .equals (dataverseService .findRootDataverse ())) {
389
- dvObjectsToReindexPermissionsFor .add (definitionPoint );
391
+ indexPermissionsForOneDvObject (definitionPoint );
392
+ numObjects ++;
390
393
}
391
394
List <Dataset > directChildDatasetsOfDvDefPoint = datasetService .findByOwnerId (selfDataverse .getId ());
392
395
for (Dataset dataset : directChildDatasetsOfDvDefPoint ) {
393
- dvObjectsToReindexPermissionsFor .add (dataset );
396
+ indexPermissionsForOneDvObject (dataset );
397
+ numObjects ++;
394
398
for (DataFile datafile : filesToReIndexPermissionsFor (dataset )) {
395
399
filesToReindexAsBatch .add (datafile );
400
+ i ++;
401
+ if (i % 100 == 0 ) {
402
+ reindexFilesInBatches (filesToReindexAsBatch );
403
+ filesToReindexAsBatch .clear ();
404
+ }
405
+ if (i % 1000 == 0 ) {
406
+ logger .fine ("Progress: " +i + " files permissions reindexed" );
407
+ }
396
408
}
409
+ logger .fine ("Progress : dataset " + dataset .getId () + " permissions reindexed" );
397
410
}
398
411
} else if (definitionPoint .isInstanceofDataset ()) {
399
- dvObjectsToReindexPermissionsFor .add (definitionPoint );
412
+ indexPermissionsForOneDvObject (definitionPoint );
413
+ numObjects ++;
400
414
// index files
401
415
Dataset dataset = (Dataset ) definitionPoint ;
402
416
for (DataFile datafile : filesToReIndexPermissionsFor (dataset )) {
403
417
filesToReindexAsBatch .add (datafile );
418
+ i ++;
419
+ if (i % 100 == 0 ) {
420
+ reindexFilesInBatches (filesToReindexAsBatch );
421
+ filesToReindexAsBatch .clear ();
422
+ }
404
423
}
405
424
} else {
406
- dvObjectsToReindexPermissionsFor .add (definitionPoint );
425
+ indexPermissionsForOneDvObject (definitionPoint );
426
+ numObjects ++;
407
427
}
408
428
409
429
/**
@@ -412,64 +432,64 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
412
432
* @todo Should update timestamps, probably, even thought these are
413
433
* files, see https://github.com/IQSS/dataverse/issues/2421
414
434
*/
415
- String response = reindexFilesInBatches (filesToReindexAsBatch );
416
-
417
- for (DvObject dvObject : dvObjectsToReindexPermissionsFor ) {
418
- /**
419
- * @todo do something with this response
420
- */
421
- IndexResponse indexResponse = indexPermissionsForOneDvObject (dvObject );
422
- }
423
-
435
+ reindexFilesInBatches (filesToReindexAsBatch );
436
+ logger .fine ("Reindexed permissions for " + i + " files and " + numObjects + " datasets/collections" );
424
437
return new IndexResponse ("Number of dvObject permissions indexed for " + definitionPoint
425
- + ": " + dvObjectsToReindexPermissionsFor .size ()
426
- );
438
+ + ": " + numObjects );
427
439
}
428
440
429
441
private String reindexFilesInBatches (List <DataFile > filesToReindexPermissionsFor ) {
430
442
List <SolrInputDocument > docs = new ArrayList <>();
431
443
Map <Long , List <Long >> byParentId = new HashMap <>();
432
444
Map <Long , List <String >> permStringByDatasetVersion = new HashMap <>();
433
- for (DataFile file : filesToReindexPermissionsFor ) {
434
- Dataset dataset = (Dataset ) file .getOwner ();
435
- Map <DatasetVersion .VersionState , Boolean > desiredCards = searchPermissionsService .getDesiredCards (dataset );
436
- for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor (dataset )) {
437
- boolean cardShouldExist = desiredCards .get (datasetVersionFileIsAttachedTo .getVersionState ());
438
- if (cardShouldExist ) {
439
- List <String > cachedPermission = permStringByDatasetVersion .get (datasetVersionFileIsAttachedTo .getId ());
440
- if (cachedPermission == null ) {
441
- logger .fine ("no cached permission! Looking it up..." );
442
- List <DvObjectSolrDoc > fileSolrDocs = constructDatafileSolrDocs ((DataFile ) file , permStringByDatasetVersion );
443
- for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs ) {
444
- Long datasetVersionId = fileSolrDoc .getDatasetVersionId ();
445
- if (datasetVersionId != null ) {
446
- permStringByDatasetVersion .put (datasetVersionId , fileSolrDoc .getPermissions ());
445
+ int i = 0 ;
446
+ try {
447
+ for (DataFile file : filesToReindexPermissionsFor ) {
448
+ Dataset dataset = (Dataset ) file .getOwner ();
449
+ Map <DatasetVersion .VersionState , Boolean > desiredCards = searchPermissionsService .getDesiredCards (dataset );
450
+ for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor (dataset )) {
451
+ boolean cardShouldExist = desiredCards .get (datasetVersionFileIsAttachedTo .getVersionState ());
452
+ if (cardShouldExist ) {
453
+ List <String > cachedPermission = permStringByDatasetVersion .get (datasetVersionFileIsAttachedTo .getId ());
454
+ if (cachedPermission == null ) {
455
+ logger .finest ("no cached permission! Looking it up..." );
456
+ List <DvObjectSolrDoc > fileSolrDocs = constructDatafileSolrDocs ((DataFile ) file , permStringByDatasetVersion );
457
+ for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs ) {
458
+ Long datasetVersionId = fileSolrDoc .getDatasetVersionId ();
459
+ if (datasetVersionId != null ) {
460
+ permStringByDatasetVersion .put (datasetVersionId , fileSolrDoc .getPermissions ());
461
+ SolrInputDocument solrDoc = SearchUtil .createSolrDoc (fileSolrDoc );
462
+ docs .add (solrDoc );
463
+ i ++;
464
+ }
465
+ }
466
+ } else {
467
+ logger .finest ("cached permission is " + cachedPermission );
468
+ List <DvObjectSolrDoc > fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs ((DataFile ) file , permStringByDatasetVersion );
469
+ for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions ) {
447
470
SolrInputDocument solrDoc = SearchUtil .createSolrDoc (fileSolrDoc );
448
471
docs .add (solrDoc );
472
+ i ++;
449
473
}
450
474
}
451
- } else {
452
- logger .fine ("cached permission is " + cachedPermission );
453
- List <DvObjectSolrDoc > fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs ((DataFile ) file , permStringByDatasetVersion );
454
- for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions ) {
455
- SolrInputDocument solrDoc = SearchUtil .createSolrDoc (fileSolrDoc );
456
- docs .add (solrDoc );
475
+ if (i % 20 == 0 ) {
476
+ persistToSolr (docs );
477
+ docs = new ArrayList <>();
457
478
}
458
479
}
459
480
}
481
+ Long parent = file .getOwner ().getId ();
482
+ List <Long > existingList = byParentId .get (parent );
483
+ if (existingList == null ) {
484
+ List <Long > empty = new ArrayList <>();
485
+ byParentId .put (parent , empty );
486
+ } else {
487
+ List <Long > updatedList = existingList ;
488
+ updatedList .add (file .getId ());
489
+ byParentId .put (parent , updatedList );
490
+ }
460
491
}
461
- Long parent = file .getOwner ().getId ();
462
- List <Long > existingList = byParentId .get (parent );
463
- if (existingList == null ) {
464
- List <Long > empty = new ArrayList <>();
465
- byParentId .put (parent , empty );
466
- } else {
467
- List <Long > updatedList = existingList ;
468
- updatedList .add (file .getId ());
469
- byParentId .put (parent , updatedList );
470
- }
471
- }
472
- try {
492
+
473
493
persistToSolr (docs );
474
494
return " " + filesToReindexPermissionsFor .size () + " files indexed across " + docs .size () + " Solr documents " ;
475
495
} catch (SolrServerException | IOException ex ) {
@@ -517,29 +537,26 @@ public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServer
517
537
}
518
538
519
539
/**
520
- *
521
- *
522
540
* @return A list of dvobject ids that should have their permissions
523
- * re-indexed because Solr was down when a permission was added. The permission
524
- * should be added to Solr. The id of the permission contains the type of
525
- * DvObject and the primary key of the dvObject.
526
- * DvObjects of type DataFile are currently skipped because their index
527
- * time isn't stored in the database, since they are indexed along
528
- * with their parent dataset (this may change).
541
+ * re-indexed because Solr was down when a permission was added. The
542
+ * permission should be added to Solr. The id of the permission contains the
543
+ * type of DvObject and the primary key of the dvObject. DvObjects of type
544
+ * DataFile are currently skipped because their index time isn't stored in
545
+ * the database, since they are indexed along with their parent dataset
546
+ * (this may change).
529
547
*/
530
548
public List <Long > findPermissionsInDatabaseButStaleInOrMissingFromSolr () {
531
549
List <Long > indexingRequired = new ArrayList <>();
532
550
long rootDvId = dataverseService .findRootDataverse ().getId ();
533
551
List <Long > missingDataversePermissionIds = dataverseService .findIdStalePermission ();
534
552
List <Long > missingDatasetPermissionIds = datasetService .findIdStalePermission ();
535
- for (Long id : missingDataversePermissionIds ) {
553
+ for (Long id : missingDataversePermissionIds ) {
536
554
if (!id .equals (rootDvId )) {
537
- indexingRequired .add (id );
555
+ indexingRequired .add (id );
538
556
}
539
557
}
540
558
indexingRequired .addAll (missingDatasetPermissionIds );
541
559
return indexingRequired ;
542
560
}
543
561
544
-
545
562
}
0 commit comments