Skip to content

Commit 92e45bb

Browse files
feat(compute): give more batch memory for serving node (#18365) (#18367)
Co-authored-by: Dylan <[email protected]>
1 parent dc8faba commit 92e45bb

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

src/compute/src/memory/config.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ const STORAGE_META_CACHE_MEMORY_PROPORTION: f64 = 0.35;
4343
const STORAGE_SHARED_BUFFER_MEMORY_PROPORTION: f64 = 0.3;
4444

4545
/// The proportion of compute memory used for batch processing.
46-
const COMPUTE_BATCH_MEMORY_PROPORTION: f64 = 0.3;
46+
const COMPUTE_BATCH_MEMORY_PROPORTION_FOR_STREAMING: f64 = 0.3;
47+
const COMPUTE_BATCH_MEMORY_PROPORTION_FOR_SERVING: f64 = 0.6;
4748

4849
/// Each compute node reserves some memory for stack and code segment of processes, allocation
4950
/// overhead, network buffer, etc. based on gradient reserve memory proportion. The reserve memory
@@ -299,8 +300,12 @@ pub fn storage_memory_config(
299300
}
300301
}
301302

302-
pub fn batch_mem_limit(compute_memory_bytes: usize) -> u64 {
303-
(compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION) as u64
303+
pub fn batch_mem_limit(compute_memory_bytes: usize, is_serving_node: bool) -> u64 {
304+
if is_serving_node {
305+
(compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION_FOR_SERVING) as u64
306+
} else {
307+
(compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION_FOR_STREAMING) as u64
308+
}
304309
}
305310

306311
#[cfg(test)]

src/compute/src/server.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ pub async fn compute_node_serve(
288288
let batch_mgr = Arc::new(BatchManager::new(
289289
config.batch.clone(),
290290
batch_manager_metrics,
291-
batch_mem_limit(compute_memory_bytes),
291+
batch_mem_limit(compute_memory_bytes, opts.role.for_serving()),
292292
));
293293

294294
// NOTE: Due to some limits, we use `compute_memory_bytes + storage_memory_bytes` as

0 commit comments

Comments
 (0)