eBay · JacksonYao287 · Jun 3, 2025 · Jun 5, 2025 · Jun 5, 2025 · Jun 6, 2025
diff --git a/conanfile.py b/conanfile.py
@@ -9,7 +9,7 @@
 
 class HomeObjectConan(ConanFile):
     name = "homeobject"
-    version = "2.4.1"
+    version = "2.4.2"
 
     homepage = "https://github.com/eBay/HomeObject"
     description = "Blob Store built on HomeReplication"

diff --git a/src/include/homeobject/shard_manager.hpp b/src/include/homeobject/shard_manager.hpp
@@ -27,7 +27,6 @@ struct ShardInfo {
     uint64_t last_modified_time;
     uint64_t available_capacity_bytes;
     uint64_t total_capacity_bytes;
-    uint64_t deleted_capacity_bytes;
     std::optional< peer_id_t > current_leader{std::nullopt};
 
     auto operator<=>(ShardInfo const& rhs) const { return id <=> rhs.id; }

diff --git a/src/lib/homeobject_impl.hpp b/src/lib/homeobject_impl.hpp
@@ -162,8 +162,8 @@ class HomeObjectImpl : public HomeObject,
 
     /// BlobManager
     BlobManager::AsyncResult< blob_id_t > put(shard_id_t shard, Blob&&, trace_id_t tid) final;
-    BlobManager::AsyncResult< Blob > get(shard_id_t shard, blob_id_t const& blob, uint64_t off,
-                                         uint64_t len, trace_id_t tid) const final;
+    BlobManager::AsyncResult< Blob > get(shard_id_t shard, blob_id_t const& blob, uint64_t off, uint64_t len,
+                                         trace_id_t tid) const final;
     BlobManager::NullAsyncResult del(shard_id_t shard, blob_id_t const& blob, trace_id_t tid) final;
 };
 

diff --git a/src/lib/homestore_backend/CMakeLists.txt b/src/lib/homestore_backend/CMakeLists.txt
@@ -127,7 +127,10 @@ add_test(NAME HomestoreResyncTestWithLeaderRestart
 add_executable(homestore_test_gc)
 target_sources(homestore_test_gc PRIVATE $<TARGET_OBJECTS:homestore_tests_gc>)
 target_link_libraries(homestore_test_gc PUBLIC homeobject_homestore ${COMMON_TEST_DEPS})
-add_test(NAME HomestoreTestGC COMMAND homestore_test_pg -csv error --executor immediate --config_path ./
-        --override_config homestore_config.consensus.snapshot_freq_distance:0
-        --override_config homestore_config.consensus.max_grpc_message_size:138412032)
+add_test(NAME HomestoreTestGC COMMAND homestore_test_gc -csv error --executor immediate --config_path ./
+        --override_config hs_backend_config.enable_gc=true 
+        --override_config hs_backend_config.gc_garbage_rate_threshold=0 
+        --override_config hs_backend_config.gc_scan_interval_sec=5
+        # remove this until gc supports baseline resync
+        --override_config homestore_config.consensus.snapshot_freq_distance:0)
 
diff --git a/src/lib/homestore_backend/gc_manager.cpp b/src/lib/homestore_backend/gc_manager.cpp
diff --git a/src/lib/homestore_backend/gc_manager.hpp b/src/lib/homestore_backend/gc_manager.hpp
@@ -16,24 +16,11 @@
 
 #include "heap_chunk_selector.h"
 #include "index_kv.hpp"
+#include "hs_backend_config.hpp"
 
 namespace homeobject {
 
 class HSHomeObject;
-// TODO:: make those #define below configurable
-
-// Default number of chunks reserved for GC per pdev
-#define RESERVED_CHUNK_NUM_PER_PDEV 6
-// reserved chunk number dedicated for emergent GC
-#define RESERVED_CHUNK_NUM_DEDICATED_FOR_EGC 2
-// GC interval in seconds, this is used to schedule the GC timer
-#define GC_SCAN_INTERVAL_SEC 10llu
-// Garbage rate threshold, bigger than which, a gc task will be scheduled for a chunk
-#define GC_GARBAGE_RATE_THRESHOLD 80
-// limit the io resource that gc thread can take, so that it will not impact the client io.
-// assuming the throughput of a HDD is 300M/s(including read and write) and gc can take 10% of the io resource, which is
-// 30M/s. A block is 4K, so gc can read/write 30M/s / 4K = 7680 blocks per second.
-#define MAX_READ_WRITE_BLOCK_COUNT_PER_SECOND 7680
 
 ENUM(task_priority, uint8_t, emergent = 0, normal, priority_count);
 
@@ -72,7 +59,7 @@ class GCManager {
 
     struct gc_reserved_chunk_superblk {
         chunk_id_t chunk_id;
-        static std::string name() { return _gc_actor_meta_name; }
+        static std::string name() { return _gc_reserved_chunk_meta_name; }
     };
 #pragma pack()
 
@@ -114,7 +101,7 @@ class GCManager {
         pdev_gc_actor& operator=(pdev_gc_actor&&) = delete;
 
     public:
-        void add_reserved_chunk(chunk_id_t chunk_id);
+        void add_reserved_chunk(homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb);
         folly::SemiFuture< bool > add_gc_task(uint8_t priority, chunk_id_t move_from_chunk);
         void handle_recovered_gc_task(const GCManager::gc_task_superblk* gc_task);
         void start();
@@ -136,7 +123,7 @@ class GCManager {
         // before we select a reserved chunk and start gc, we need:
         //  1 clear all the entries of this chunk in the gc index table
         //  2 reset this chunk to make sure it is empty.
-        void purge_reserved_chunk(chunk_id_t move_to_chunk);
+        bool purge_reserved_chunk(chunk_id_t move_to_chunk);
 
     private:
         // utils
@@ -148,10 +135,18 @@ class GCManager {
         folly::MPMCQueue< chunk_id_t > m_reserved_chunk_queue;
         std::shared_ptr< GCBlobIndexTable > m_index_table;
         HSHomeObject* m_hs_home_object{nullptr};
-        RateLimiter m_rate_limiter{MAX_READ_WRITE_BLOCK_COUNT_PER_SECOND};
+
+        // limit the io resource that gc thread can take, so that it will not impact the client io.
+        // assuming the throughput of a HDD is 300M/s(including read and write) and gc can take 10% of the io resource,
+        // which is 30M/s. A block is 4K, so gc can read/write 30M/s / 4K = 7680 blocks per second.
+        RateLimiter m_rate_limiter{HS_BACKEND_DYNAMIC_CONFIG(max_read_write_block_count_per_second)};
+
         std::shared_ptr< folly::IOThreadPoolExecutor > m_gc_executor;
         std::shared_ptr< folly::IOThreadPoolExecutor > m_egc_executor;
         std::atomic_bool m_is_stopped{true};
+        // since we have a very small number of reserved chunks, a vector is enough
+        // TODO:: use a map if we have a large number of reserved chunks
+        std::vector< homestore::superblk< GCManager::gc_reserved_chunk_superblk > > m_reserved_chunks;
     };
 
 public:
@@ -179,9 +174,9 @@ class GCManager {
     void start();
     void stop();
 
-private:
     void scan_chunks_for_gc();
 
+private:
     void on_gc_task_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
     void on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
     void on_reserved_chunk_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);

diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp
@@ -263,6 +263,45 @@ std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id,
     return num_chunk;
 }
 
+void HeapChunkSelector::switch_chunks_for_pg(const pg_id_t pg_id, const chunk_num_t old_chunk_id,
+                                             const chunk_num_t new_chunk_id) {
+    LOGDEBUGMOD(homeobject, "switch chunks for pg_id={}, old_chunk={}, new_chunk={}", pg_id, old_chunk_id,
+                new_chunk_id);
+
+    auto EXVchunk_old = get_extend_vchunk(old_chunk_id);
+    auto EXVchunk_new = get_extend_vchunk(new_chunk_id);
+
+    auto old_available_blks = EXVchunk_old->available_blks();
+    auto new_available_blks = EXVchunk_new->available_blks();
+
+    RELEASE_ASSERT(EXVchunk_old->m_v_chunk_id.has_value(), "old_chunk_id={} should has a vchunk_id", old_chunk_id);
+    RELEASE_ASSERT(EXVchunk_old->m_pg_id.has_value(), "old_chunk_id={} should belongs to a pg", old_chunk_id);
+    RELEASE_ASSERT(EXVchunk_old->m_pg_id.value() == pg_id, "old_chunk_id={} should belongs to pg={}", old_chunk_id,
+                   pg_id);
+
+    auto v_chunk_id = EXVchunk_old->m_v_chunk_id.value();
+
+    std::unique_lock lock(m_chunk_selector_mtx);
+    auto pg_it = m_per_pg_chunks.find(pg_id);
+    RELEASE_ASSERT(pg_it != m_per_pg_chunks.end(), "No pg_chunk_collection found for pg={}", pg_id);
+    auto& pg_chunk_collection = pg_it->second;
+
+    std::unique_lock lk(pg_chunk_collection->mtx);
+    auto& pg_chunks = pg_chunk_collection->m_pg_chunks;
+
+    RELEASE_ASSERT(pg_chunks[v_chunk_id]->get_chunk_id() == old_chunk_id,
+                   "vchunk={} for pg={} should have a pchunk={} , but have a pchunk={}", v_chunk_id, pg_id,
+                   old_chunk_id, pg_chunks[v_chunk_id]->get_chunk_id());
+
+    pg_chunks[v_chunk_id] = EXVchunk_new;
+
+    LOGINFOMOD(homeobject,
+               "vchunk={} in pg_chunk_collection for pg_id={} has been update from pchunk_id={} to pchunk_id={}",
+               v_chunk_id, pg_id, old_chunk_id, new_chunk_id);
+
+    pg_chunk_collection->available_blk_count += new_available_blks - old_available_blks;
+}
+
 bool HeapChunkSelector::recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids) {
     std::unique_lock lock_guard(m_chunk_selector_mtx);
     // check pg exist

diff --git a/src/lib/homestore_backend/heap_chunk_selector.h b/src/lib/homestore_backend/heap_chunk_selector.h
@@ -128,7 +128,7 @@ class HeapChunkSelector : public homestore::ChunkSelector {
     // this should be called on each pg meta blk found
     bool recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids);
 
-    // this should be called after all pg meta blk recovered
+    // this should be called after all pg meta blk and gc reserved chunk recovered
     void build_pdev_available_chunk_heap();
 
     // this should be called after ShardManager is initialized and get all the open shards
@@ -198,6 +198,8 @@ class HeapChunkSelector : public homestore::ChunkSelector {
 
     homestore::cshared< ExtendedVChunk > get_extend_vchunk(const chunk_num_t chunk_id) const;
 
+    void switch_chunks_for_pg(const pg_id_t pg_id, const chunk_num_t old_chunk_id, const chunk_num_t new_chunk_id);
+
 private:
     void add_chunk_internal(const chunk_num_t, bool add_to_heap = true);
 

diff --git a/src/lib/homestore_backend/hs_backend_config.fbs b/src/lib/homestore_backend/hs_backend_config.fbs
@@ -18,6 +18,26 @@ table HSBackendSettings {
 
     //Reserved space in a chunk
     reserved_bytes_in_chunk: uint64 = 16777216 (hotswap);
+
+    //Enable GC
+    //TODO: make this hotswap after gc is well tested
+    enable_gc: bool = false;
+
+    //Total reserved chunk num (dedicated for gc/egc) per pdev 
+    reserved_chunk_num_per_pdev: uint8 = 6;
+
+    //Reserved chunk number (dedicated for egc) per pdev
+    reserved_chunk_num_per_pdev_for_egc: uint8 = 2;
+
+    //GC scan interval(second)
+    gc_scan_interval_sec: uint64 = 60;
+
+    //GC garbage rate threshold, upon which a chunk will be selected for gc
+    gc_garbage_rate_threshold: uint8 = 80;
+
+    //max read/write block count per second, which is used by ratelimiter to limit the io resource taken by gc
+    max_read_write_block_count_per_second: uint16 = 7680;
+
 }
 
 root_type HSBackendSettings;
diff --git a/src/lib/homestore_backend/hs_blob_manager.cpp b/src/lib/homestore_backend/hs_blob_manager.cpp
@@ -527,13 +527,18 @@ void HSHomeObject::on_blob_del_commit(int64_t lsn, sisl::blob const& header, sis
         if (recovery_done_) {
             BLOGE(tid, blob_info.shard_id, blob_info.blob_id, "Failed to move blob to tombstone, error={}", r.error());
             if (ctx) ctx->promise_.setValue(folly::makeUnexpected(r.error()));
-            return;
         } else {
             if (ctx) ctx->promise_.setValue(BlobManager::Result< BlobInfo >(blob_info));
-            return;
         }
+
+        // if we return directly, for the perspective of statemachin, this lsn has been committed successfully. so there
+        // will be no more deletion for this blob, and as a result , blob leak happens
+        RELEASE_ASSERT(false, "fail to commit delete blob log for pg={}, shard={}, blob={}", msg_header->pg_id,
+                       msg_header->shard_id, blob_info.blob_id);
     }
 
+    LOGD("shard_id={}, blob_id={} has been moved to tombstone, lsn={}", blob_info.shard_id, blob_info.blob_id, lsn);
+
     auto& multiBlks = r.value();
     if (multiBlks != tombstone_pbas) {
         repl_dev->async_free_blks(lsn, multiBlks);

diff --git a/src/lib/homestore_backend/hs_cp_callbacks.cpp b/src/lib/homestore_backend/hs_cp_callbacks.cpp
@@ -32,20 +32,21 @@ std::unique_ptr< CPContext > HSHomeObject::MyCPCallbacks::on_switchover_cp(CP* c
 folly::Future< bool > HSHomeObject::MyCPCallbacks::cp_flush(CP* cp) {
     std::vector< HSHomeObject::HS_PG* > dirty_pg_list;
     dirty_pg_list.reserve(home_obj_._pg_map.size());
-    {
-        std::shared_lock lock_guard(home_obj_._pg_lock);
-        for (auto const& [id, pg] : home_obj_._pg_map) {
-            auto hs_pg = static_cast< HSHomeObject::HS_PG* >(pg.get());
-
-            // All dirty durable entries are updated in the superblk. We persist outside the pg_lock
-            if (!hs_pg->is_dirty_.exchange(false)) { continue; }
-
-            hs_pg->pg_sb_->blob_sequence_num = hs_pg->durable_entities().blob_sequence_num.load();
-            hs_pg->pg_sb_->active_blob_count = hs_pg->durable_entities().active_blob_count.load();
-            hs_pg->pg_sb_->tombstone_blob_count = hs_pg->durable_entities().tombstone_blob_count.load();
-            hs_pg->pg_sb_->total_occupied_blk_count = hs_pg->durable_entities().total_occupied_blk_count.load();
-            dirty_pg_list.push_back(hs_pg);
-        }
+
+    // the metablk update in cp flush might have a confict with gc, which will also try to update metablk, so we need
+    // hold the unique lock until all the updates are completed
+    std::unique_lock lock_guard(home_obj_._pg_lock);
+    for (auto const& [id, pg] : home_obj_._pg_map) {
+        auto hs_pg = static_cast< HSHomeObject::HS_PG* >(pg.get());
+
+        // All dirty durable entries are updated in the superblk. We persist outside the pg_lock
+        if (!hs_pg->is_dirty_.exchange(false)) { continue; }
+
+        hs_pg->pg_sb_->blob_sequence_num = hs_pg->durable_entities().blob_sequence_num.load();
+        hs_pg->pg_sb_->active_blob_count = hs_pg->durable_entities().active_blob_count.load();
+        hs_pg->pg_sb_->tombstone_blob_count = hs_pg->durable_entities().tombstone_blob_count.load();
+        hs_pg->pg_sb_->total_occupied_blk_count = hs_pg->durable_entities().total_occupied_blk_count.load();
+        dirty_pg_list.push_back(hs_pg);
     }
 
     for (auto& hs_pg : dirty_pg_list) {

diff --git a/src/lib/homestore_backend/hs_homeobject.cpp b/src/lib/homestore_backend/hs_homeobject.cpp
@@ -223,10 +223,11 @@ void HSHomeObject::init_homestore() {
         repl_app->on_repl_devs_init_completed();
 
         // we need to regitster the meta blk handlers after metaservice is ready.
-        gc_mgr_ = std::make_unique< GCManager >(chunk_selector_, this);
+        gc_mgr_ = std::make_shared< GCManager >(chunk_selector_, this);
         // if this is the first time we are starting, we need to create gc metablk for each pdev， which record the
         // reserved chunks and indextable.
         auto pdev_chunks = chunk_selector_->get_pdev_chunks();
+        const auto reserved_chunk_num_per_pdev = HS_BACKEND_DYNAMIC_CONFIG(reserved_chunk_num_per_pdev);
         for (auto const& [pdev_id, chunks] : pdev_chunks) {
             // 1 create gc index table for each pdev
             auto gc_index_table = create_gc_index_table();
@@ -241,12 +242,12 @@ void HSHomeObject::init_homestore() {
             gc_actor_sb->index_table_uuid = uuid;
             gc_actor_sb.write();
 
-            RELEASE_ASSERT(chunks.size() > RESERVED_CHUNK_NUM_PER_PDEV,
+            RELEASE_ASSERT(chunks.size() > reserved_chunk_num_per_pdev,
                            "pdev {} has {} chunks, but we need at least {} chunks for reserved chunk", pdev_id,
-                           chunks.size(), RESERVED_CHUNK_NUM_PER_PDEV);
+                           chunks.size(), reserved_chunk_num_per_pdev);
 
             // 3 create reserved chunk meta blk for each pdev, which contains the reserved chunks.
-            for (size_t i = 0; i < RESERVED_CHUNK_NUM_PER_PDEV; ++i) {
+            for (size_t i = 0; i < reserved_chunk_num_per_pdev; ++i) {
                 auto chunk = chunks[i];
                 homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb{
                     GCManager::_gc_reserved_chunk_meta_name};
@@ -352,15 +353,21 @@ void HSHomeObject::init_cp() {
 
 void HSHomeObject::init_gc() {
     using namespace homestore;
-    if (!gc_mgr_) gc_mgr_ = std::make_unique< GCManager >(chunk_selector_, this);
+    if (!gc_mgr_) gc_mgr_ = std::make_shared< GCManager >(chunk_selector_, this);
     // when initializing, there is not gc task. we need to recover reserved chunks here, so that the reserved chunks
     // will not be put into pdev heap when built
     HomeStore::instance()->meta_service().read_sub_sb(GCManager::_gc_actor_meta_name);
     HomeStore::instance()->meta_service().read_sub_sb(GCManager::_gc_reserved_chunk_meta_name);
     HomeStore::instance()->meta_service().read_sub_sb(GCManager::_gc_task_meta_name);
 
-    // TODO::enable gc after we have data copy
-    // gc_mgr_->start();
+    const auto enable_gc = HS_BACKEND_DYNAMIC_CONFIG(enable_gc);
+
+    if (enable_gc) {
+        LOGI("Starting GC");
+        gc_mgr_->start();
+    } else {
+        LOGI("GC is disabled");
+    }
 }
 
 // void HSHomeObject::trigger_timed_events() { persist_pg_sb(); }
@@ -460,4 +467,13 @@ std::shared_ptr< GCBlobIndexTable > HSHomeObject::get_gc_index_table(std::string
     return it->second;
 }
 
+void HSHomeObject::trigger_immediate_gc() {
+    if (!gc_mgr_) {
+        LOGI("scan chunks for gc immediately");
+        gc_mgr_->scan_chunks_for_gc();
+    } else {
+        LOGE("GC is not enabled");
+    }
+}
+
 } // namespace homeobject