Skip to content

Commit dffc2f1

Browse files
authored
feat: ensure reserved memory for computing tasks on compute node starting (#7670)
The total memory of a CN consists of: 1. computing memory (both stream & batch) 2. storage memory (block cache, meta cache, etc.) 3. memory for system usage That is to say, we have **_CN total memory_ = _computing memory_ + _storage memory_ + _system memory_**, and both _CN total memory_ and _storage memory_ are configured by the user currently. This PR is to ensure that _computing memory_ and _system memory_ are correctly reserved,, i.e. **_computing memory_ + _system memory_ = _CN total memory_ - _storage memory_ > a given amount of memory**. We set this "given amount of memory" as 1G for now (512M for computing and 512M for system). The check is performed on CN starting. Approved-By: fuyufjh Approved-By: hzxa21
1 parent 20bdb72 commit dffc2f1

File tree

8 files changed

+84
-13
lines changed

8 files changed

+84
-13
lines changed

Cargo.lock

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

risedev.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ template:
672672
# Whether to enable in-memory pure KV state backend
673673
enable-in-memory-kv-state-backend: false
674674

675-
# Total available memory to LRU Manager in bytes
675+
# Total available memory for the compute node in bytes
676676
total-memory-bytes: 8589934592
677677

678678
# Parallelism of tasks per compute node

src/common/src/config.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ pub struct StorageConfig {
276276
/// Maximum shared buffer size, writes attempting to exceed the capacity will stall until there
277277
/// is enough space.
278278
#[serde(default = "default::storage::shared_buffer_capacity_mb")]
279-
pub shared_buffer_capacity_mb: u32,
279+
pub shared_buffer_capacity_mb: usize,
280280

281281
/// State store url.
282282
#[serde(default = "default::storage::state_store")]
@@ -348,6 +348,32 @@ impl Default for StorageConfig {
348348
}
349349
}
350350

351+
impl StorageConfig {
352+
/// Checks whether an embedded compactor starts with a compute node.
353+
#[inline(always)]
354+
pub fn embedded_compactor_enabled(&self) -> bool {
355+
// We treat `hummock+memory-shared` as a shared storage, so we won't start the compactor
356+
// along with the compute node.
357+
self.state_store == "hummock+memory"
358+
|| self.state_store.starts_with("hummock+disk")
359+
|| self.disable_remote_compactor
360+
}
361+
362+
/// The maximal memory that storage components may use based on the configurations. Note that
363+
/// this is the total storage memory for one compute node instead of the whole cluster.
364+
pub fn total_storage_memory_limit_mb(&self) -> usize {
365+
let total_memory = self.block_cache_capacity_mb
366+
+ self.meta_cache_capacity_mb
367+
+ self.shared_buffer_capacity_mb
368+
+ self.file_cache.total_buffer_capacity_mb;
369+
if self.embedded_compactor_enabled() {
370+
total_memory + self.compactor_memory_limit_mb
371+
} else {
372+
total_memory
373+
}
374+
}
375+
}
376+
351377
/// The subsection `[storage.file_cache]` in `risingwave.toml`.
352378
///
353379
/// It's put at [`StorageConfig::file_cache`].
@@ -533,7 +559,7 @@ mod default {
533559
4
534560
}
535561

536-
pub fn shared_buffer_capacity_mb() -> u32 {
562+
pub fn shared_buffer_capacity_mb() -> usize {
537563
1024
538564
}
539565

src/compute/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ num-traits = "0.2"
3333
parking_lot = "0.12"
3434
paste = "1"
3535
pprof = { version = "0.11", features = ["flamegraph"] }
36+
pretty-bytes = "0.2.2"
3637
prometheus = { version = "0.13" }
3738
prost = "0.11"
3839
risingwave_batch = { path = "../batch" }

src/compute/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ pub struct ComputeNodeOpts {
7575
#[clap(long, env = "RW_CONFIG_PATH", default_value = "")]
7676
pub config_path: String,
7777

78-
/// Total available memory in bytes, used by LRU Manager
78+
/// Total available memory for the compute node in bytes. Used by both computing and storage.
7979
#[clap(long, env = "RW_TOTAL_MEMORY_BYTES", default_value_t = default_total_memory_bytes())]
8080
pub total_memory_bytes: usize,
8181

src/compute/src/memory_management/memory_manager.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ use risingwave_common::util::epoch::Epoch;
2121
use risingwave_stream::executor::monitor::StreamingMetrics;
2222
use risingwave_stream::task::LocalStreamManager;
2323

24+
/// The minimal memory requirement of computing tasks in megabytes.
25+
pub const MIN_COMPUTE_MEMORY_MB: usize = 512;
26+
/// The memory reserved for system usage (stack and code segment of processes, allocation overhead,
27+
/// network buffer, etc.) in megabytes.
28+
pub const SYSTEM_RESERVED_MEMORY_MB: usize = 512;
29+
2430
/// When `enable_managed_cache` is set, compute node will launch a [`GlobalMemoryManager`] to limit
2531
/// the memory usage.
2632
#[cfg_attr(not(target_os = "linux"), expect(dead_code))]

src/compute/src/server.rs

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@ use std::sync::Arc;
1717
use std::time::Duration;
1818

1919
use async_stack_trace::StackTraceManager;
20+
use pretty_bytes::converter::convert;
2021
use risingwave_batch::executor::BatchTaskMetrics;
2122
use risingwave_batch::rpc::service::task_service::BatchServiceImpl;
2223
use risingwave_batch::task::{BatchEnvironment, BatchManager};
2324
use risingwave_common::config::{
24-
load_config, AsyncStackTraceOption, MAX_CONNECTION_WINDOW_SIZE, STREAM_WINDOW_SIZE,
25+
load_config, AsyncStackTraceOption, StorageConfig, MAX_CONNECTION_WINDOW_SIZE,
26+
STREAM_WINDOW_SIZE,
2527
};
2628
use risingwave_common::monitor::process_linux::monitor_process;
2729
use risingwave_common::util::addr::HostAddr;
@@ -52,7 +54,9 @@ use risingwave_stream::task::{LocalStreamManager, StreamEnvironment};
5254
use tokio::sync::oneshot::Sender;
5355
use tokio::task::JoinHandle;
5456

55-
use crate::memory_management::memory_manager::GlobalMemoryManager;
57+
use crate::memory_management::memory_manager::{
58+
GlobalMemoryManager, MIN_COMPUTE_MEMORY_MB, SYSTEM_RESERVED_MEMORY_MB,
59+
};
5660
use crate::rpc::service::config_service::ConfigServiceImpl;
5761
use crate::rpc::service::exchange_metrics::ExchangeServiceMetrics;
5862
use crate::rpc::service::exchange_service::ExchangeServiceImpl;
@@ -71,11 +75,13 @@ pub async fn compute_node_serve(
7175
) -> (Vec<JoinHandle<()>>, Sender<()>) {
7276
// Load the configuration.
7377
let config = load_config(&opts.config_path, Some(opts.override_config));
78+
validate_compute_node_memory_config(opts.total_memory_bytes, &config.storage);
7479
info!(
7580
"Starting compute node with config {:?} with debug assertions {}",
7681
config,
7782
if cfg!(debug_assertions) { "on" } else { "off" }
7883
);
84+
7985
// Initialize all the configs
8086
let storage_config = Arc::new(config.storage.clone());
8187
let stream_config = Arc::new(config.streaming.clone());
@@ -144,12 +150,8 @@ pub async fn compute_node_serve(
144150
let mut extra_info_sources: Vec<ExtraInfoSourceRef> = vec![];
145151
if let Some(storage) = state_store.as_hummock_trait() {
146152
extra_info_sources.push(storage.sstable_id_manager().clone());
147-
// Note: we treat `hummock+memory-shared` as a shared storage, so we won't start the
148-
// compactor along with compute node.
149-
if config.storage.state_store == "hummock+memory"
150-
|| config.storage.state_store.starts_with("hummock+disk")
151-
|| storage_config.disable_remote_compactor
152-
{
153+
154+
if storage_config.embedded_compactor_enabled() {
153155
tracing::info!("start embedded compactor");
154156
let read_memory_limiter = Arc::new(MemoryLimiter::new(
155157
storage_config.compactor_memory_limit_mb as u64 * 1024 * 1024 / 2,
@@ -325,3 +327,28 @@ pub async fn compute_node_serve(
325327

326328
(join_handle_vec, shutdown_send)
327329
}
330+
331+
/// Check whether the compute node has enough memory to perform computing tasks. Apart from storage,
332+
/// it must reserve at least `MIN_COMPUTE_MEMORY_MB` for computing and `SYSTEM_RESERVED_MEMORY_MB`
333+
/// for other system usage. Otherwise, it is not allowed to start.
334+
fn validate_compute_node_memory_config(
335+
cn_total_memory_bytes: usize,
336+
storage_config: &StorageConfig,
337+
) {
338+
let storage_memory_mb = storage_config.total_storage_memory_limit_mb();
339+
if storage_memory_mb << 20 > cn_total_memory_bytes {
340+
panic!(
341+
"The storage memory exceeds the total compute node memory:\nTotal compute node memory: {}\nStorage memory: {}\nAt least 1 GB memory should be reserved apart from the storage memory. Please increase the total compute node memory or decrease the storage memory in configurations and restart the compute node.",
342+
convert(cn_total_memory_bytes as _),
343+
convert((storage_memory_mb << 20) as _)
344+
);
345+
} else if (storage_memory_mb + MIN_COMPUTE_MEMORY_MB + SYSTEM_RESERVED_MEMORY_MB) << 20
346+
>= cn_total_memory_bytes
347+
{
348+
panic!(
349+
"No enough memory for computing and other system usage:\nTotal compute node memory: {}\nStorage memory: {}\nAt least 1 GB memory should be reserved apart from the storage memory. Please increase the total compute node memory or decrease the storage memory in configurations and restart the compute node.",
350+
convert(cn_total_memory_bytes as _),
351+
convert((storage_memory_mb << 20) as _)
352+
);
353+
}
354+
}

src/storage/src/hummock/event_handler/hummock_event_handler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pub struct BufferTracker {
5454

5555
impl BufferTracker {
5656
pub fn from_storage_config(config: &StorageConfig) -> Self {
57-
let capacity = config.shared_buffer_capacity_mb as usize * (1 << 20);
57+
let capacity = config.shared_buffer_capacity_mb * (1 << 20);
5858
let flush_threshold = capacity * 4 / 5;
5959
Self::new(capacity, flush_threshold)
6060
}

0 commit comments

Comments
 (0)