Skip to content

implement gc data copy #303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class HomeObjectConan(ConanFile):
name = "homeobject"
version = "2.4.1"
version = "2.4.2"

homepage = "https://github.com/eBay/HomeObject"
description = "Blob Store built on HomeReplication"
Expand Down
1 change: 0 additions & 1 deletion src/include/homeobject/shard_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ struct ShardInfo {
uint64_t last_modified_time;
uint64_t available_capacity_bytes;
uint64_t total_capacity_bytes;
uint64_t deleted_capacity_bytes;
std::optional< peer_id_t > current_leader{std::nullopt};

auto operator<=>(ShardInfo const& rhs) const { return id <=> rhs.id; }
Expand Down
4 changes: 2 additions & 2 deletions src/lib/homeobject_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ class HomeObjectImpl : public HomeObject,

/// BlobManager
BlobManager::AsyncResult< blob_id_t > put(shard_id_t shard, Blob&&, trace_id_t tid) final;
BlobManager::AsyncResult< Blob > get(shard_id_t shard, blob_id_t const& blob, uint64_t off,
uint64_t len, trace_id_t tid) const final;
BlobManager::AsyncResult< Blob > get(shard_id_t shard, blob_id_t const& blob, uint64_t off, uint64_t len,
trace_id_t tid) const final;
BlobManager::NullAsyncResult del(shard_id_t shard, blob_id_t const& blob, trace_id_t tid) final;
};

Expand Down
9 changes: 6 additions & 3 deletions src/lib/homestore_backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ add_test(NAME HomestoreResyncTestWithLeaderRestart
add_executable(homestore_test_gc)
target_sources(homestore_test_gc PRIVATE $<TARGET_OBJECTS:homestore_tests_gc>)
target_link_libraries(homestore_test_gc PUBLIC homeobject_homestore ${COMMON_TEST_DEPS})
add_test(NAME HomestoreTestGC COMMAND homestore_test_pg -csv error --executor immediate --config_path ./
--override_config homestore_config.consensus.snapshot_freq_distance:0
--override_config homestore_config.consensus.max_grpc_message_size:138412032)
add_test(NAME HomestoreTestGC COMMAND homestore_test_gc -csv error --executor immediate --config_path ./
--override_config hs_backend_config.enable_gc=true
--override_config hs_backend_config.gc_garbage_rate_threshold=0
--override_config hs_backend_config.gc_scan_interval_sec=5
# remove this until gc supports baseline resync
--override_config homestore_config.consensus.snapshot_freq_distance:0)

608 changes: 549 additions & 59 deletions src/lib/homestore_backend/gc_manager.cpp

Large diffs are not rendered by default.

33 changes: 14 additions & 19 deletions src/lib/homestore_backend/gc_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,11 @@

#include "heap_chunk_selector.h"
#include "index_kv.hpp"
#include "hs_backend_config.hpp"

namespace homeobject {

class HSHomeObject;
// TODO:: make those #define below configurable

// Default number of chunks reserved for GC per pdev
#define RESERVED_CHUNK_NUM_PER_PDEV 6
// reserved chunk number dedicated for emergent GC
#define RESERVED_CHUNK_NUM_DEDICATED_FOR_EGC 2
// GC interval in seconds, this is used to schedule the GC timer
#define GC_SCAN_INTERVAL_SEC 10llu
// Garbage rate threshold, bigger than which, a gc task will be scheduled for a chunk
#define GC_GARBAGE_RATE_THRESHOLD 80
// limit the io resource that gc thread can take, so that it will not impact the client io.
// assuming the throughput of a HDD is 300M/s(including read and write) and gc can take 10% of the io resource, which is
// 30M/s. A block is 4K, so gc can read/write 30M/s / 4K = 7680 blocks per second.
#define MAX_READ_WRITE_BLOCK_COUNT_PER_SECOND 7680

ENUM(task_priority, uint8_t, emergent = 0, normal, priority_count);

Expand Down Expand Up @@ -72,7 +59,7 @@ class GCManager {

struct gc_reserved_chunk_superblk {
chunk_id_t chunk_id;
static std::string name() { return _gc_actor_meta_name; }
static std::string name() { return _gc_reserved_chunk_meta_name; }
};
#pragma pack()

Expand Down Expand Up @@ -114,7 +101,7 @@ class GCManager {
pdev_gc_actor& operator=(pdev_gc_actor&&) = delete;

public:
void add_reserved_chunk(chunk_id_t chunk_id);
void add_reserved_chunk(homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb);
folly::SemiFuture< bool > add_gc_task(uint8_t priority, chunk_id_t move_from_chunk);
void handle_recovered_gc_task(const GCManager::gc_task_superblk* gc_task);
void start();
Expand All @@ -136,7 +123,7 @@ class GCManager {
// before we select a reserved chunk and start gc, we need:
// 1 clear all the entries of this chunk in the gc index table
// 2 reset this chunk to make sure it is empty.
void purge_reserved_chunk(chunk_id_t move_to_chunk);
bool purge_reserved_chunk(chunk_id_t move_to_chunk);

private:
// utils
Expand All @@ -148,10 +135,18 @@ class GCManager {
folly::MPMCQueue< chunk_id_t > m_reserved_chunk_queue;
std::shared_ptr< GCBlobIndexTable > m_index_table;
HSHomeObject* m_hs_home_object{nullptr};
RateLimiter m_rate_limiter{MAX_READ_WRITE_BLOCK_COUNT_PER_SECOND};

// limit the io resource that gc thread can take, so that it will not impact the client io.
// assuming the throughput of a HDD is 300M/s(including read and write) and gc can take 10% of the io resource,
// which is 30M/s. A block is 4K, so gc can read/write 30M/s / 4K = 7680 blocks per second.
RateLimiter m_rate_limiter{HS_BACKEND_DYNAMIC_CONFIG(max_read_write_block_count_per_second)};

std::shared_ptr< folly::IOThreadPoolExecutor > m_gc_executor;
std::shared_ptr< folly::IOThreadPoolExecutor > m_egc_executor;
std::atomic_bool m_is_stopped{true};
// since we have a very small number of reserved chunks, a vector is enough
// TODO:: use a map if we have a large number of reserved chunks
std::vector< homestore::superblk< GCManager::gc_reserved_chunk_superblk > > m_reserved_chunks;
};

public:
Expand Down Expand Up @@ -179,9 +174,9 @@ class GCManager {
void start();
void stop();

private:
void scan_chunks_for_gc();

private:
void on_gc_task_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
void on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
void on_reserved_chunk_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
Expand Down
39 changes: 39 additions & 0 deletions src/lib/homestore_backend/heap_chunk_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,45 @@ std::optional< uint32_t > HeapChunkSelector::select_chunks_for_pg(pg_id_t pg_id,
return num_chunk;
}

void HeapChunkSelector::switch_chunks_for_pg(const pg_id_t pg_id, const chunk_num_t old_chunk_id,
const chunk_num_t new_chunk_id) {
LOGDEBUGMOD(homeobject, "switch chunks for pg_id={}, old_chunk={}, new_chunk={}", pg_id, old_chunk_id,
new_chunk_id);

auto EXVchunk_old = get_extend_vchunk(old_chunk_id);
auto EXVchunk_new = get_extend_vchunk(new_chunk_id);

auto old_available_blks = EXVchunk_old->available_blks();
auto new_available_blks = EXVchunk_new->available_blks();

RELEASE_ASSERT(EXVchunk_old->m_v_chunk_id.has_value(), "old_chunk_id={} should has a vchunk_id", old_chunk_id);
RELEASE_ASSERT(EXVchunk_old->m_pg_id.has_value(), "old_chunk_id={} should belongs to a pg", old_chunk_id);
RELEASE_ASSERT(EXVchunk_old->m_pg_id.value() == pg_id, "old_chunk_id={} should belongs to pg={}", old_chunk_id,
pg_id);

auto v_chunk_id = EXVchunk_old->m_v_chunk_id.value();

std::unique_lock lock(m_chunk_selector_mtx);
auto pg_it = m_per_pg_chunks.find(pg_id);
RELEASE_ASSERT(pg_it != m_per_pg_chunks.end(), "No pg_chunk_collection found for pg={}", pg_id);
auto& pg_chunk_collection = pg_it->second;

std::unique_lock lk(pg_chunk_collection->mtx);
auto& pg_chunks = pg_chunk_collection->m_pg_chunks;

RELEASE_ASSERT(pg_chunks[v_chunk_id]->get_chunk_id() == old_chunk_id,
"vchunk={} for pg={} should have a pchunk={} , but have a pchunk={}", v_chunk_id, pg_id,
old_chunk_id, pg_chunks[v_chunk_id]->get_chunk_id());

pg_chunks[v_chunk_id] = EXVchunk_new;

LOGINFOMOD(homeobject,
"vchunk={} in pg_chunk_collection for pg_id={} has been update from pchunk_id={} to pchunk_id={}",
v_chunk_id, pg_id, old_chunk_id, new_chunk_id);

pg_chunk_collection->available_blk_count += new_available_blks - old_available_blks;
}

bool HeapChunkSelector::recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids) {
std::unique_lock lock_guard(m_chunk_selector_mtx);
// check pg exist
Expand Down
4 changes: 3 additions & 1 deletion src/lib/homestore_backend/heap_chunk_selector.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class HeapChunkSelector : public homestore::ChunkSelector {
// this should be called on each pg meta blk found
bool recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids);

// this should be called after all pg meta blk recovered
// this should be called after all pg meta blk and gc reserved chunk recovered
void build_pdev_available_chunk_heap();

// this should be called after ShardManager is initialized and get all the open shards
Expand Down Expand Up @@ -198,6 +198,8 @@ class HeapChunkSelector : public homestore::ChunkSelector {

homestore::cshared< ExtendedVChunk > get_extend_vchunk(const chunk_num_t chunk_id) const;

void switch_chunks_for_pg(const pg_id_t pg_id, const chunk_num_t old_chunk_id, const chunk_num_t new_chunk_id);

private:
void add_chunk_internal(const chunk_num_t, bool add_to_heap = true);

Expand Down
20 changes: 20 additions & 0 deletions src/lib/homestore_backend/hs_backend_config.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,26 @@ table HSBackendSettings {

//Reserved space in a chunk
reserved_bytes_in_chunk: uint64 = 16777216 (hotswap);

//Enable GC
//TODO: make this hotswap after gc is well tested
enable_gc: bool = false;

//Total reserved chunk num (dedicated for gc/egc) per pdev
reserved_chunk_num_per_pdev: uint8 = 6;

//Reserved chunk number (dedicated for egc) per pdev
reserved_chunk_num_per_pdev_for_egc: uint8 = 2;

//GC scan interval(second)
gc_scan_interval_sec: uint64 = 60;

//GC garbage rate threshold, upon which a chunk will be selected for gc
gc_garbage_rate_threshold: uint8 = 80;

//max read/write block count per second, which is used by ratelimiter to limit the io resource taken by gc
max_read_write_block_count_per_second: uint16 = 7680;

}

root_type HSBackendSettings;
9 changes: 7 additions & 2 deletions src/lib/homestore_backend/hs_blob_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -527,13 +527,18 @@ void HSHomeObject::on_blob_del_commit(int64_t lsn, sisl::blob const& header, sis
if (recovery_done_) {
BLOGE(tid, blob_info.shard_id, blob_info.blob_id, "Failed to move blob to tombstone, error={}", r.error());
if (ctx) ctx->promise_.setValue(folly::makeUnexpected(r.error()));
return;
} else {
if (ctx) ctx->promise_.setValue(BlobManager::Result< BlobInfo >(blob_info));
return;
}

// if we return directly, for the perspective of statemachin, this lsn has been committed successfully. so there
// will be no more deletion for this blob, and as a result , blob leak happens
RELEASE_ASSERT(false, "fail to commit delete blob log for pg={}, shard={}, blob={}", msg_header->pg_id,
msg_header->shard_id, blob_info.blob_id);
}

LOGD("shard_id={}, blob_id={} has been moved to tombstone, lsn={}", blob_info.shard_id, blob_info.blob_id, lsn);

auto& multiBlks = r.value();
if (multiBlks != tombstone_pbas) {
repl_dev->async_free_blks(lsn, multiBlks);
Expand Down
29 changes: 15 additions & 14 deletions src/lib/homestore_backend/hs_cp_callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,21 @@ std::unique_ptr< CPContext > HSHomeObject::MyCPCallbacks::on_switchover_cp(CP* c
folly::Future< bool > HSHomeObject::MyCPCallbacks::cp_flush(CP* cp) {
std::vector< HSHomeObject::HS_PG* > dirty_pg_list;
dirty_pg_list.reserve(home_obj_._pg_map.size());
{
std::shared_lock lock_guard(home_obj_._pg_lock);
for (auto const& [id, pg] : home_obj_._pg_map) {
auto hs_pg = static_cast< HSHomeObject::HS_PG* >(pg.get());

// All dirty durable entries are updated in the superblk. We persist outside the pg_lock
if (!hs_pg->is_dirty_.exchange(false)) { continue; }

hs_pg->pg_sb_->blob_sequence_num = hs_pg->durable_entities().blob_sequence_num.load();
hs_pg->pg_sb_->active_blob_count = hs_pg->durable_entities().active_blob_count.load();
hs_pg->pg_sb_->tombstone_blob_count = hs_pg->durable_entities().tombstone_blob_count.load();
hs_pg->pg_sb_->total_occupied_blk_count = hs_pg->durable_entities().total_occupied_blk_count.load();
dirty_pg_list.push_back(hs_pg);
}

// the metablk update in cp flush might have a confict with gc, which will also try to update metablk, so we need
// hold the unique lock until all the updates are completed
std::unique_lock lock_guard(home_obj_._pg_lock);
for (auto const& [id, pg] : home_obj_._pg_map) {
auto hs_pg = static_cast< HSHomeObject::HS_PG* >(pg.get());

// All dirty durable entries are updated in the superblk. We persist outside the pg_lock
if (!hs_pg->is_dirty_.exchange(false)) { continue; }

hs_pg->pg_sb_->blob_sequence_num = hs_pg->durable_entities().blob_sequence_num.load();
hs_pg->pg_sb_->active_blob_count = hs_pg->durable_entities().active_blob_count.load();
hs_pg->pg_sb_->tombstone_blob_count = hs_pg->durable_entities().tombstone_blob_count.load();
hs_pg->pg_sb_->total_occupied_blk_count = hs_pg->durable_entities().total_occupied_blk_count.load();
dirty_pg_list.push_back(hs_pg);
}

for (auto& hs_pg : dirty_pg_list) {
Expand Down
30 changes: 23 additions & 7 deletions src/lib/homestore_backend/hs_homeobject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,11 @@ void HSHomeObject::init_homestore() {
repl_app->on_repl_devs_init_completed();

// we need to regitster the meta blk handlers after metaservice is ready.
gc_mgr_ = std::make_unique< GCManager >(chunk_selector_, this);
gc_mgr_ = std::make_shared< GCManager >(chunk_selector_, this);
// if this is the first time we are starting, we need to create gc metablk for each pdev, which record the
// reserved chunks and indextable.
auto pdev_chunks = chunk_selector_->get_pdev_chunks();
const auto reserved_chunk_num_per_pdev = HS_BACKEND_DYNAMIC_CONFIG(reserved_chunk_num_per_pdev);
for (auto const& [pdev_id, chunks] : pdev_chunks) {
// 1 create gc index table for each pdev
auto gc_index_table = create_gc_index_table();
Expand All @@ -241,12 +242,12 @@ void HSHomeObject::init_homestore() {
gc_actor_sb->index_table_uuid = uuid;
gc_actor_sb.write();

RELEASE_ASSERT(chunks.size() > RESERVED_CHUNK_NUM_PER_PDEV,
RELEASE_ASSERT(chunks.size() > reserved_chunk_num_per_pdev,
"pdev {} has {} chunks, but we need at least {} chunks for reserved chunk", pdev_id,
chunks.size(), RESERVED_CHUNK_NUM_PER_PDEV);
chunks.size(), reserved_chunk_num_per_pdev);

// 3 create reserved chunk meta blk for each pdev, which contains the reserved chunks.
for (size_t i = 0; i < RESERVED_CHUNK_NUM_PER_PDEV; ++i) {
for (size_t i = 0; i < reserved_chunk_num_per_pdev; ++i) {
auto chunk = chunks[i];
homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb{
GCManager::_gc_reserved_chunk_meta_name};
Expand Down Expand Up @@ -352,15 +353,21 @@ void HSHomeObject::init_cp() {

void HSHomeObject::init_gc() {
using namespace homestore;
if (!gc_mgr_) gc_mgr_ = std::make_unique< GCManager >(chunk_selector_, this);
if (!gc_mgr_) gc_mgr_ = std::make_shared< GCManager >(chunk_selector_, this);
// when initializing, there is not gc task. we need to recover reserved chunks here, so that the reserved chunks
// will not be put into pdev heap when built
HomeStore::instance()->meta_service().read_sub_sb(GCManager::_gc_actor_meta_name);
HomeStore::instance()->meta_service().read_sub_sb(GCManager::_gc_reserved_chunk_meta_name);
HomeStore::instance()->meta_service().read_sub_sb(GCManager::_gc_task_meta_name);

// TODO::enable gc after we have data copy
// gc_mgr_->start();
const auto enable_gc = HS_BACKEND_DYNAMIC_CONFIG(enable_gc);

if (enable_gc) {
LOGI("Starting GC");
gc_mgr_->start();
} else {
LOGI("GC is disabled");
}
}

// void HSHomeObject::trigger_timed_events() { persist_pg_sb(); }
Expand Down Expand Up @@ -460,4 +467,13 @@ std::shared_ptr< GCBlobIndexTable > HSHomeObject::get_gc_index_table(std::string
return it->second;
}

void HSHomeObject::trigger_immediate_gc() {
if (!gc_mgr_) {
LOGI("scan chunks for gc immediately");
gc_mgr_->scan_chunks_for_gc();
} else {
LOGE("GC is not enabled");
}
}

} // namespace homeobject
Loading
Loading