Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit f40a377

Browse files
fix: get driver version and cuda version at a single command (#1754)
Co-authored-by: vansangpfiev <[email protected]>
1 parent 045762c commit f40a377

File tree

8 files changed

+36
-42
lines changed

8 files changed

+36
-42
lines changed

engine/cli/commands/engine_install_cmd.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ bool EngineInstallCmd::Exec(const std::string& engine,
3737
dp.Connect(host_, port_);
3838
// engine can be small, so need to start ws first
3939
auto dp_res = std::async(std::launch::deferred, [&dp] {
40-
bool need_cuda_download = !system_info_utils::GetCudaVersion().empty();
40+
bool need_cuda_download =
41+
!system_info_utils::GetDriverAndCudaVersion().second.empty();
4142
if (need_cuda_download) {
4243
return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit});
4344
} else {
@@ -149,7 +150,8 @@ bool EngineInstallCmd::Exec(const std::string& engine,
149150
dp.Connect(host_, port_);
150151
// engine can be small, so need to start ws first
151152
auto dp_res = std::async(std::launch::deferred, [&dp] {
152-
bool need_cuda_download = !system_info_utils::GetCudaVersion().empty();
153+
bool need_cuda_download =
154+
!system_info_utils::GetDriverAndCudaVersion().second.empty();
153155
if (need_cuda_download) {
154156
return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit});
155157
} else {

engine/cli/commands/engine_install_cmd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ class EngineInstallCmd {
1414
port_(port),
1515
show_menu_(show_menu),
1616
hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
17-
.cuda_driver_version = system_info_utils::GetCudaVersion()} {};
17+
.cuda_driver_version =
18+
system_info_utils::GetDriverAndCudaVersion().second} {};
1819

1920
bool Exec(const std::string& engine, const std::string& version = "latest",
2021
const std::string& src = "");

engine/cli/commands/engine_update_cmd.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ bool EngineUpdateCmd::Exec(const std::string& host, int port,
2525
dp.Connect(host, port);
2626
// engine can be small, so need to start ws first
2727
auto dp_res = std::async(std::launch::deferred, [&dp] {
28-
bool need_cuda_download = !system_info_utils::GetCudaVersion().empty();
28+
bool need_cuda_download =
29+
!system_info_utils::GetDriverAndCudaVersion().second.empty();
2930
if (need_cuda_download) {
3031
return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit});
3132
} else {

engine/cli/commands/server_start_cmd.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace commands {
88

99
namespace {
1010
bool TryConnectToServer(const std::string& host, int port) {
11-
constexpr const auto kMaxRetry = 3u;
11+
constexpr const auto kMaxRetry = 4u;
1212
auto count = 0u;
1313
// Check if server is started
1414
while (true) {

engine/services/engine_service.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ class EngineService : public EngineServiceI {
6060
explicit EngineService(std::shared_ptr<DownloadService> download_service)
6161
: download_service_{download_service},
6262
hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
63-
.cuda_driver_version = system_info_utils::GetCudaVersion()} {}
63+
.cuda_driver_version =
64+
system_info_utils::GetDriverAndCudaVersion().second} {}
6465

6566
std::vector<EngineInfo> GetEngineInfoList() const;
6667

engine/services/hardware_service.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace services {
1616

1717
namespace {
1818
bool TryConnectToServer(const std::string& host, int port) {
19-
constexpr const auto kMaxRetry = 3u;
19+
constexpr const auto kMaxRetry = 4u;
2020
auto count = 0u;
2121
// Check if server is started
2222
while (true) {
@@ -292,7 +292,7 @@ void HardwareService::UpdateHardwareInfos() {
292292
}
293293

294294
#if defined(_WIN32) || defined(_WIN64) || defined(__linux__)
295-
if (system_info_utils::IsNvidiaSmiAvailable()) {
295+
if (!gpus.empty()) {
296296
const char* value = std::getenv("CUDA_VISIBLE_DEVICES");
297297
if (value) {
298298
LOG_INFO << "CUDA_VISIBLE_DEVICES: " << value;

engine/utils/hardware/gpu_info.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@ inline std::vector<GPU> GetGPUInfo() {
1111
// Only support for nvidia for now
1212
// auto gpus = hwinfo::getAllGPUs();
1313
auto nvidia_gpus = system_info_utils::GetGpuInfoList();
14-
auto cuda_version = system_info_utils::GetCudaVersion();
1514
for (auto& n : nvidia_gpus) {
1615
res.emplace_back(
1716
GPU{.id = n.id,
1817
.name = n.name,
19-
.version = cuda_version,
18+
.version = nvidia_gpus[0].cuda_driver_version.value_or("unknown"),
2019
.add_info =
2120
NvidiaAddInfo{
2221
.driver_version = n.driver_version.value_or("unknown"),

engine/utils/system_info_utils.h

Lines changed: 22 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ constexpr static auto kUnsupported{"Unsupported"};
1919
constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"};
2020
constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"};
2121
constexpr static auto kGpuQueryCommand{
22-
"nvidia-smi --query-gpu=index,memory.total,memory.free,name,compute_cap,uuid "
22+
"nvidia-smi "
23+
"--query-gpu=index,memory.total,memory.free,name,compute_cap,uuid "
2324
"--format=csv,noheader,nounits"};
2425
constexpr static auto kGpuInfoRegex{
2526
R"((\d+),\s*(\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+),\s*([^\n,]+))"};
@@ -100,53 +101,42 @@ inline bool IsNvidiaSmiAvailable() {
100101
#endif
101102
}
102103

103-
inline std::string GetDriverVersion() {
104+
inline std::pair<std::string, std::string> GetDriverAndCudaVersion() {
104105
if (!IsNvidiaSmiAvailable()) {
105106
CTL_INF("nvidia-smi is not available!");
106-
return "";
107+
return {};
107108
}
108109
try {
110+
std::string driver_version;
111+
std::string cuda_version;
109112
CommandExecutor cmd("nvidia-smi");
110113
auto output = cmd.execute();
111114

112115
const std::regex driver_version_reg(kDriverVersionRegex);
113-
std::smatch match;
116+
std::smatch driver_match;
114117

115-
if (std::regex_search(output, match, driver_version_reg)) {
116-
LOG_INFO << "Gpu Driver Version: " << match[1].str();
117-
return match[1].str();
118+
if (std::regex_search(output, driver_match, driver_version_reg)) {
119+
LOG_INFO << "Gpu Driver Version: " << driver_match[1].str();
120+
driver_version = driver_match[1].str();
118121
} else {
119122
LOG_ERROR << "Gpu Driver not found!";
120-
return "";
123+
return {};
121124
}
122-
} catch (const std::exception& e) {
123-
LOG_ERROR << "Error: " << e.what();
124-
return "";
125-
}
126-
}
127-
128-
inline std::string GetCudaVersion() {
129-
if (!IsNvidiaSmiAvailable()) {
130-
CTL_INF("nvidia-smi is not available!");
131-
return "";
132-
}
133-
try {
134-
CommandExecutor cmd("nvidia-smi");
135-
auto output = cmd.execute();
136125

137126
const std::regex cuda_version_reg(kCudaVersionRegex);
138-
std::smatch match;
127+
std::smatch cuda_match;
139128

140-
if (std::regex_search(output, match, cuda_version_reg)) {
141-
LOG_INFO << "CUDA Version: " << match[1].str();
142-
return match[1].str();
129+
if (std::regex_search(output, cuda_match, cuda_version_reg)) {
130+
LOG_INFO << "CUDA Version: " << cuda_match[1].str();
131+
cuda_version = cuda_match[1].str();
143132
} else {
144133
LOG_ERROR << "CUDA Version not found!";
145-
return "";
134+
return {};
146135
}
136+
return std::pair(driver_version, cuda_version);
147137
} catch (const std::exception& e) {
148138
LOG_ERROR << "Error: " << e.what();
149-
return "";
139+
return {};
150140
}
151141
}
152142

@@ -227,9 +217,9 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
227217
if (!IsNvidiaSmiAvailable())
228218
return gpuInfoList;
229219
try {
230-
// TODO: improve by parsing both in one command execution
231-
auto driver_version = GetDriverVersion();
232-
auto cuda_version = GetCudaVersion();
220+
auto [driver_version, cuda_version] = GetDriverAndCudaVersion();
221+
if (driver_version.empty() || cuda_version.empty())
222+
return gpuInfoList;
233223

234224
CommandExecutor cmd(kGpuQueryCommand);
235225
auto output = cmd.execute();
@@ -249,7 +239,7 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
249239
driver_version, // driver_version
250240
cuda_version, // cuda_driver_version
251241
match[5].str(), // compute_cap
252-
match[6].str() // uuid
242+
match[6].str() // uuid
253243
};
254244
gpuInfoList.push_back(gpuInfo);
255245
search_start = match.suffix().first;

0 commit comments

Comments
 (0)