Skip to content

Commit 726e08a

Browse files
authored
Merge pull request #13 from tomharmon/task/add-gpu-feature
task: make gpu features optional
2 parents 6e8d811 + df01446 commit 726e08a

File tree

2 files changed

+43
-23
lines changed

2 files changed

+43
-23
lines changed

Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
[package]
22
name = "opentelemetry-system-metrics"
3-
version = "0.4.1"
3+
version = "0.4.2"
44
edition = "2021"
55
license = "Apache-2.0"
66
description = "System metric export through Opentelemetry"
77
repository = "https://github.com/haixuanTao/opentelemetry-system-metrics"
88

99
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1010

11+
[features]
12+
default = []
13+
gpu = ["nvml-wrapper"]
14+
1115
[dependencies]
1216
opentelemetry = { version = "0.29.1", features = ["metrics"] }
1317
sysinfo = "0.34.2"
14-
nvml-wrapper = "0.10.0"
18+
nvml-wrapper = { version = "0.10.0", optional = true }
1519
eyre = "0.6.12"
1620
tracing = "0.1.41"
1721
tokio = { version = "1.44.2", features = [

src/lib.rs

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
//! - Memory
55
//! - Disk
66
//! - Network
7+
//! - GPU (optional, see below)
78
//!
89
//!
910
//! # Getting started
@@ -15,11 +16,19 @@
1516
//! opentelemetry-system-metrics = "0.4"
1617
//! tokio = { version = "1", features = ["full"] }
1718
//! sysinfo = "0.34"
18-
//! nvml-wrapper = "0.10"
1919
//! eyre = { version = "0.6", features = ["tokio"] }
2020
//! tracing = "0.1"
2121
//! ```
2222
//!
23+
//! ## GPU metrics
24+
//!
25+
//! GPU metrics are optional. To enable GPU metrics, enable the `gpu` feature for this crate.
26+
//!
27+
//! ```toml
28+
//! [dependencies]
29+
//! opentelemetry-system-metrics = { version = "0.4", features = ["gpu"] }
30+
//! ```
31+
//!
2332
//! ```
2433
//! use opentelemetry::global;
2534
//! use opentelemetry_system_metrics::init_process_observer;
@@ -35,14 +44,15 @@
3544
3645
use eyre::ContextCompat;
3746
use eyre::Result;
47+
#[cfg(feature = "gpu")]
3848
use nvml_wrapper::enums::device::UsedGpuMemory;
49+
#[cfg(feature = "gpu")]
3950
use nvml_wrapper::Nvml;
4051
use opentelemetry::metrics::Meter;
4152
use opentelemetry::Key;
4253
use opentelemetry::KeyValue;
4354
use std::time::Duration;
4455
use sysinfo::{get_current_pid, System};
45-
use tracing::warn;
4656

4757
const PROCESS_PID: Key = Key::from_static_str("process.pid");
4858
const PROCESS_EXECUTABLE_NAME: Key = Key::from_static_str("process.executable.name");
@@ -55,6 +65,7 @@ const PROCESS_MEMORY_VIRTUAL: &str = "process.memory.virtual";
5565
const PROCESS_DISK_IO: &str = "process.disk.io";
5666
// const PROCESS_NETWORK_IO: &str = "process.network.io";
5767
const DIRECTION: Key = Key::from_static_str("direction");
68+
#[cfg(feature = "gpu")]
5869
const PROCESS_GPU_MEMORY_USAGE: &str = "process.gpu.memory.usage";
5970

6071
/// Record asynchronously information about the current process.
@@ -114,6 +125,7 @@ async fn register_metrics(
114125
let core_count =
115126
System::physical_core_count().with_context(|| "Could not get physical core count")?;
116127

128+
#[cfg(feature = "gpu")]
117129
let nvml = Nvml::init();
118130

119131
let process_cpu_utilization = meter
@@ -142,6 +154,7 @@ async fn register_metrics(
142154
.with_unit("byte")
143155
.build();
144156

157+
#[cfg(feature = "gpu")]
145158
let process_gpu_memory_usage = meter
146159
.u64_gauge(PROCESS_GPU_MEMORY_USAGE)
147160
.with_description("The amount of physical GPU memory in use.")
@@ -228,29 +241,32 @@ async fn register_metrics(
228241
}
229242
}
230243

231-
// let mut last_timestamp = last_timestamp.lock().unwrap().clone();
232-
match &nvml {
233-
Ok(nvml) => {
234-
// Get the first `Device` (GPU) in the system
235-
if let Ok(device) = nvml.device_by_index(0) {
236-
if let Ok(gpu_stats) = device.running_compute_processes() {
237-
for stat in gpu_stats.iter() {
238-
if stat.pid == pid.as_u32() {
239-
let memory_used = match stat.used_gpu_memory {
240-
UsedGpuMemory::Used(bytes) => bytes,
241-
UsedGpuMemory::Unavailable => 0,
242-
};
244+
#[cfg(feature = "gpu")]
245+
{
246+
match &nvml {
247+
Ok(nvml) => {
248+
// Get the first `Device` (GPU) in the system
249+
if let Ok(device) = nvml.device_by_index(0) {
250+
if let Ok(gpu_stats) = device.running_compute_processes() {
251+
for stat in gpu_stats.iter() {
252+
if stat.pid == pid.as_u32() {
253+
let memory_used = match stat.used_gpu_memory {
254+
UsedGpuMemory::Used(bytes) => bytes,
255+
UsedGpuMemory::Unavailable => 0,
256+
};
243257

244-
process_gpu_memory_usage.record(memory_used, &common_attributes);
258+
process_gpu_memory_usage
259+
.record(memory_used, &common_attributes);
245260

246-
break;
261+
break;
262+
}
247263
}
248-
}
249-
};
264+
};
265+
}
266+
}
267+
Err(_) => {
268+
// If we can't get the NVML, we just put 0.
250269
}
251-
}
252-
Err(_) => {
253-
// If we can't get the NVML, we just put 0.
254270
}
255271
}
256272
}

0 commit comments

Comments
 (0)