4
4
//! - Memory
5
5
//! - Disk
6
6
//! - Network
7
+ //! - GPU (optional, see below)
7
8
//!
8
9
//!
9
10
//! # Getting started
15
16
//! opentelemetry-system-metrics = "0.4"
16
17
//! tokio = { version = "1", features = ["full"] }
17
18
//! sysinfo = "0.34"
18
- //! nvml-wrapper = "0.10"
19
19
//! eyre = { version = "0.6", features = ["tokio"] }
20
20
//! tracing = "0.1"
21
21
//! ```
22
22
//!
23
+ //! ## GPU metrics
24
+ //!
25
+ //! GPU metrics are optional. To enable GPU metrics, enable the `gpu` feature for this crate.
26
+ //!
27
+ //! ```toml
28
+ //! [dependencies]
29
+ //! opentelemetry-system-metrics = { version = "0.4", features = ["gpu"] }
30
+ //! ```
31
+ //!
23
32
//! ```
24
33
//! use opentelemetry::global;
25
34
//! use opentelemetry_system_metrics::init_process_observer;
35
44
36
45
use eyre:: ContextCompat ;
37
46
use eyre:: Result ;
47
+ #[ cfg( feature = "gpu" ) ]
38
48
use nvml_wrapper:: enums:: device:: UsedGpuMemory ;
49
+ #[ cfg( feature = "gpu" ) ]
39
50
use nvml_wrapper:: Nvml ;
40
51
use opentelemetry:: metrics:: Meter ;
41
52
use opentelemetry:: Key ;
42
53
use opentelemetry:: KeyValue ;
43
54
use std:: time:: Duration ;
44
55
use sysinfo:: { get_current_pid, System } ;
45
- use tracing:: warn;
46
56
47
57
const PROCESS_PID : Key = Key :: from_static_str ( "process.pid" ) ;
48
58
const PROCESS_EXECUTABLE_NAME : Key = Key :: from_static_str ( "process.executable.name" ) ;
@@ -55,6 +65,7 @@ const PROCESS_MEMORY_VIRTUAL: &str = "process.memory.virtual";
55
65
const PROCESS_DISK_IO : & str = "process.disk.io" ;
56
66
// const PROCESS_NETWORK_IO: &str = "process.network.io";
57
67
const DIRECTION : Key = Key :: from_static_str ( "direction" ) ;
68
+ #[ cfg( feature = "gpu" ) ]
58
69
const PROCESS_GPU_MEMORY_USAGE : & str = "process.gpu.memory.usage" ;
59
70
60
71
/// Record asynchronously information about the current process.
@@ -114,6 +125,7 @@ async fn register_metrics(
114
125
let core_count =
115
126
System :: physical_core_count ( ) . with_context ( || "Could not get physical core count" ) ?;
116
127
128
+ #[ cfg( feature = "gpu" ) ]
117
129
let nvml = Nvml :: init ( ) ;
118
130
119
131
let process_cpu_utilization = meter
@@ -142,6 +154,7 @@ async fn register_metrics(
142
154
. with_unit ( "byte" )
143
155
. build ( ) ;
144
156
157
+ #[ cfg( feature = "gpu" ) ]
145
158
let process_gpu_memory_usage = meter
146
159
. u64_gauge ( PROCESS_GPU_MEMORY_USAGE )
147
160
. with_description ( "The amount of physical GPU memory in use." )
@@ -228,29 +241,32 @@ async fn register_metrics(
228
241
}
229
242
}
230
243
231
- // let mut last_timestamp = last_timestamp.lock().unwrap().clone();
232
- match & nvml {
233
- Ok ( nvml) => {
234
- // Get the first `Device` (GPU) in the system
235
- if let Ok ( device) = nvml. device_by_index ( 0 ) {
236
- if let Ok ( gpu_stats) = device. running_compute_processes ( ) {
237
- for stat in gpu_stats. iter ( ) {
238
- if stat. pid == pid. as_u32 ( ) {
239
- let memory_used = match stat. used_gpu_memory {
240
- UsedGpuMemory :: Used ( bytes) => bytes,
241
- UsedGpuMemory :: Unavailable => 0 ,
242
- } ;
244
+ #[ cfg( feature = "gpu" ) ]
245
+ {
246
+ match & nvml {
247
+ Ok ( nvml) => {
248
+ // Get the first `Device` (GPU) in the system
249
+ if let Ok ( device) = nvml. device_by_index ( 0 ) {
250
+ if let Ok ( gpu_stats) = device. running_compute_processes ( ) {
251
+ for stat in gpu_stats. iter ( ) {
252
+ if stat. pid == pid. as_u32 ( ) {
253
+ let memory_used = match stat. used_gpu_memory {
254
+ UsedGpuMemory :: Used ( bytes) => bytes,
255
+ UsedGpuMemory :: Unavailable => 0 ,
256
+ } ;
243
257
244
- process_gpu_memory_usage. record ( memory_used, & common_attributes) ;
258
+ process_gpu_memory_usage
259
+ . record ( memory_used, & common_attributes) ;
245
260
246
- break ;
261
+ break ;
262
+ }
247
263
}
248
- }
249
- } ;
264
+ } ;
265
+ }
266
+ }
267
+ Err ( _) => {
268
+ // If we can't get the NVML, we just put 0.
250
269
}
251
- }
252
- Err ( _) => {
253
- // If we can't get the NVML, we just put 0.
254
270
}
255
271
}
256
272
}
0 commit comments