@@ -179,7 +179,7 @@ bool GCManager::is_eligible_for_gc(chunk_id_t chunk_id) {
179
179
180
180
// defrag_blk_num > (GC_THRESHOLD_PERCENT/100) * total_blk_num, to avoid floating point number calculation
181
181
// TODO: avoid overflow here.
182
- return 100 * defrag_blk_num > total_blk_num * gc_garbage_rate_threshold;
182
+ return 100 * defrag_blk_num >= total_blk_num * gc_garbage_rate_threshold;
183
183
}
184
184
185
185
void GCManager::scan_chunks_for_gc () {
@@ -368,7 +368,7 @@ bool GCManager::pdev_gc_actor::replace_blob_index(chunk_id_t move_from_chunk, ch
368
368
auto pg_id = move_from_vchunk->m_pg_id .value ();
369
369
auto hs_pg = m_hs_home_object->get_hs_pg (pg_id);
370
370
371
- // TODO:: add logic to handle pg_index_table is nullptr if destroying pg happens when baseline resync
371
+ // TODO:: add logic to handle pg_index_table is nullptr if destroying pg happens when GC
372
372
RELEASE_ASSERT (hs_pg, " Unknown PG for pg_id={}" , pg_id);
373
373
auto pg_index_table = hs_pg->index_table_ ;
374
374
RELEASE_ASSERT (pg_index_table, " Index table not found for PG pg_id={}" , pg_id);
@@ -380,32 +380,49 @@ bool GCManager::pdev_gc_actor::replace_blob_index(chunk_id_t move_from_chunk, ch
380
380
381
381
// TODO:: optimization, concurrently update pg index table.
382
382
for (const auto & [k, v] : valid_blob_indexes) {
383
- BlobRouteKey index_key{BlobRoute{k.key ().shard , k.key ().blob }};
383
+ const auto & shard = k.key ().shard ;
384
+ const auto & blob = k.key ().blob ;
385
+ BlobRouteKey index_key{BlobRoute{shard, blob}};
384
386
385
387
homestore::BtreeSinglePutRequest update_req{
386
388
&index_key, &v, homestore::btree_put_type::UPDATE, nullptr ,
387
- [](homestore::BtreeKey const & key, homestore::BtreeValue const & value_in_btree,
388
- homestore::BtreeValue const & new_value) -> homestore::put_filter_decision {
389
+ [&pg_id, &shard, &blob ](homestore::BtreeKey const & key, homestore::BtreeValue const & value_in_btree,
390
+ homestore::BtreeValue const & new_value) -> homestore::put_filter_decision {
389
391
BlobRouteValue existing_value{value_in_btree};
390
392
if (existing_value.pbas () == HSHomeObject::tombstone_pbas) {
391
- // if the blob has been deleted and the value is tombstone, we should not use a valid pba to update
392
- // a tombstone. this case might happen when the blob is deleted by client after we finish data copy
393
+ LOGDEBUG (
394
+ " remove tombstone when updating pg index after data copy , pg_id={}, shard_id={}, blob_id={}" ,
395
+ pg_id, shard, blob);
396
+ BlobRouteValue new_pba_value{new_value};
397
+ homestore::data_service ().async_free_blk (new_pba_value.pbas ());
393
398
return homestore::put_filter_decision::remove;
394
399
}
395
400
return homestore::put_filter_decision::replace;
396
401
}};
397
402
398
403
ret = pg_index_table->put (update_req);
399
- LOGDEBUG (" update index table for pg={}, ret={}, move_from_chunk={}, move_to_chunk={}" , pg_id, ret,
400
- move_from_chunk, move_to_chunk);
401
404
402
- if (ret != homestore::btree_status_t ::success && ret != homestore::btree_status_t ::filtered_out) {
405
+ // 1 if the key exist, and the filter returns homestore::put_filter_decision::replace, then ret will be
406
+ // homestore::btree_status_t::success
407
+
408
+ // 2 if the key exist , and the filter returns homestore::put_filter_decision::remove, the ret will be
409
+ // homestore::btree_status_t::filtered_out.(this might happen is a key is deleted after data copy but before
410
+ // replace index)
411
+
412
+ // 3 if the key doest not exist, the ret will be homestore::btree_status_t::not_found(this might
413
+ // happen when crash recovery)
414
+
415
+ if (ret != homestore::btree_status_t ::success && ret != homestore::btree_status_t ::filtered_out &&
416
+ ret != homestore::btree_status_t ::not_found) {
403
417
LOGERROR (
404
418
" Failed to update blob in pg index table for move_from_chunk={}, error status = {}, move_to_chunk={}" ,
405
419
move_from_chunk, ret, move_to_chunk);
406
- m_reserved_chunk_queue.blockingWrite (move_to_chunk);
420
+ // pg index table might be partial updated, we can not put move_to_chunk back to the queue
421
+ // m_reserved_chunk_queue.blockingWrite(move_to_chunk);
407
422
return false ;
408
423
}
424
+ LOGDEBUG (" update index table for pg={}, ret={}, move_from_chunk={}, move_to_chunk={}, shard={}, blob={}" , pg_id,
425
+ ret, move_from_chunk, move_to_chunk, shard, blob);
409
426
}
410
427
411
428
// TODO:: revisit the following part with the consideration of persisting order for recovery.
@@ -517,7 +534,16 @@ bool GCManager::pdev_gc_actor::copy_valid_data(chunk_id_t move_from_chunk, chunk
517
534
auto start_key = BlobRouteKey{BlobRoute{shard_id, std::numeric_limits< uint64_t >::min ()}};
518
535
auto end_key = BlobRouteKey{BlobRoute{shard_id, std::numeric_limits< uint64_t >::max ()}};
519
536
520
- // delete all the tombstone keys in pg index table and get the valid blob keys
537
+ #if 0
538
+ // range_remove will hit "Node lock and refresh failed" in some case and reture a not_found even if some key has
539
+ // been remove, then
540
+ // 1 we can not know whether should we try , it will not return retry.
541
+ // 2 if not_found happens, whether it means the shard is empty, or just a failure when searching.
542
+ // 3 valid_blob_indexes will lost some keys since "Node lock and refresh failed" happen and the call will return
543
+ // in advance
544
+
545
+ // so not use this until index svc has fixed this. delete all the tombstone keys in pg index table
546
+ // and get the valid blob keys
521
547
homestore::BtreeRangeRemoveRequest< BlobRouteKey > range_remove_req{
522
548
homestore::BtreeKeyRange< BlobRouteKey >{
523
549
std::move(start_key), true /* inclusive */, std::move(end_key), true /* inclusive */
@@ -539,41 +565,47 @@ bool GCManager::pdev_gc_actor::copy_valid_data(chunk_id_t move_from_chunk, chunk
539
565
LOGWARN("can not range remove blobs with tombstone in pg index table , pg_id={}, status={}", pg_id, status);
540
566
return false;
541
567
}
568
+ #endif
542
569
543
- # if 0
570
+ // query will never hit "Node lock and refresh failed" and never need to retry
544
571
homestore::BtreeQueryRequest< BlobRouteKey > query_req{
545
572
homestore::BtreeKeyRange< BlobRouteKey >{std::move (start_key), true /* inclusive */ , std::move (end_key),
546
573
true /* inclusive */ },
547
574
homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY,
548
575
std::numeric_limits< uint32_t >::max () /* blob count in a shard will not exceed uint32_t_max*/ ,
549
- [](homestore::BtreeKey const& key,
550
- homestore::BtreeValue const& value) -> bool {
576
+ [](homestore::BtreeKey const & key, homestore::BtreeValue const & value) -> bool {
551
577
BlobRouteValue existing_value{value};
552
- if (existing_value.pbas() == HSHomeObject::tombstone_pbas) {
553
- return false;
554
- }
578
+ if (existing_value.pbas () == HSHomeObject::tombstone_pbas) { return false ; }
555
579
return true ;
556
580
}};
557
581
558
582
auto const status = pg_index_table->query (query_req, valid_blob_indexes);
559
- if (status != homestore::btree_status_t::success && status != homestore::btree_status_t::has_more ) {
583
+ if (status != homestore::btree_status_t ::success) {
560
584
LOGERROR (" Failed to query blobs in index table for status={} shard={}" , status, shard_id);
561
585
return false ;
562
586
}
563
- #endif
587
+
588
+ auto is_last_shard_in_emergent_chunk = is_emergent && is_last_shard;
564
589
565
590
if (valid_blob_indexes.empty ()) {
566
591
LOGDEBUG (" empty shard found in move_from_chunk, chunk_id={}, shard_id={}" , move_from_chunk, shard_id);
567
592
// TODO::send a delete shard request to raft channel. there is a case that when we are doing gc, the
568
593
// shard becomes empty, need to handle this case
569
594
570
595
// we should always write a shard header for the last shard of emergent gc.
571
- if (!is_emergent || !is_last_shard) continue ;
596
+ if (!is_last_shard_in_emergent_chunk) continue ;
597
+ } else {
598
+ LOGDEBUG (" {} valid blobs found in move_from_chunk, chunk_id={}, shard_id={}" , valid_blob_indexes.size (),
599
+ move_from_chunk, shard_id);
572
600
}
573
601
574
602
// prepare a shard header for this shard in move_to_chunk
575
603
sisl::sg_list header_sgs = generate_shard_super_blk_sg_list (shard_id);
576
- if (!is_emergent) {
604
+
605
+ // we now generate shard header from metablk. the shard state in shard header blk should be open, but for sealed
606
+ // shard, the state in the generated in-memory header_sgs is sealed.
607
+
608
+ if (!is_last_shard_in_emergent_chunk) {
577
609
// for the sealed shard, the shard state in header should also be open.now, the written header is the same
578
610
// as footer except the shard state, so we lost the original header.
579
611
r_cast< HSHomeObject::shard_info_superblk* >(header_sgs.iovs [0 ].iov_base )->info .state =
@@ -587,6 +619,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data(chunk_id_t move_from_chunk, chunk
587
619
uint64_t total_capacity_bytes;
588
620
*/
589
621
}
622
+
590
623
// for emergent gc, we directly use the current shard header as the new header
591
624
592
625
// TODO::involve ratelimiter in the following code, where read/write are scheduled. or do we need a central
@@ -746,6 +779,37 @@ bool GCManager::pdev_gc_actor::copy_valid_data(chunk_id_t move_from_chunk, chunk
746
779
return false ;
747
780
}
748
781
782
+ // remove all the tombstone keys in pg index table for this chunk
783
+ // TODO:: we can enable the range_remove above and delete this part after the indexsvc issue is fixed
784
+ for (const auto & shard_id : shards) {
785
+ auto start_key = BlobRouteKey{BlobRoute{shard_id, std::numeric_limits< uint64_t >::min ()}};
786
+ auto end_key = BlobRouteKey{BlobRoute{shard_id, std::numeric_limits< uint64_t >::max ()}};
787
+ homestore::BtreeRangeRemoveRequest< BlobRouteKey > range_remove_req{
788
+ homestore::BtreeKeyRange< BlobRouteKey >{
789
+ std::move (start_key), true /* inclusive */ , std::move (end_key), true /* inclusive */
790
+ },
791
+ nullptr , std::numeric_limits< uint32_t >::max (),
792
+ [](homestore::BtreeKey const & key, homestore::BtreeValue const & value) -> bool {
793
+ BlobRouteValue existing_value{value};
794
+ if (existing_value.pbas () == HSHomeObject::tombstone_pbas) {
795
+ // delete tombstone key value
796
+ return true ;
797
+ }
798
+ return false ;
799
+ }};
800
+
801
+ auto status = pg_index_table->remove (range_remove_req);
802
+ if (status != homestore::btree_status_t ::success &&
803
+ status != homestore::btree_status_t ::not_found /* empty shard*/ ) {
804
+ // if fail to remove tombstone, it does not matter and they will be removed in the next gc task.
805
+ LOGWARN (" fail to remove tombstone for shard={}, pg_id={}, status={}" , shard_id, pg_id, status);
806
+ }
807
+ // TODO:: after the completion of indexsvc, we need to retry according to the returned status.
808
+
809
+ LOGDEBUG (" remove tombstone for pg={}, shard={}, ret={}, move_from_chunk={}, move_to_chunk={}," , pg_id, shard_id,
810
+ status, move_from_chunk, move_to_chunk);
811
+ }
812
+
749
813
return true ;
750
814
}
751
815
0 commit comments