Skip to content

Commit a11ebc9

Browse files
authored
Merge pull request #26 from PKUHPC/feat-partition-management
feat: 资源分区管理优化
2 parents 3913c70 + 319e25f commit a11ebc9

File tree

5 files changed

+160
-2
lines changed

5 files changed

+160
-2
lines changed

.changeset/big-plants-nail.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@scow/scow-scheduler-adapter-interface": minor
3+
---
4+
在 QueryAccountBlockStatus 的Response中变更 blocked 的注释,增加 account_blocked_details
5+
在 ListImplementedOptionalFeatures 中增加可选功能枚举值
6+
在 account.proto中增加可选功能rpc接口

.changeset/yellow-chefs-itch.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@scow/scow-scheduler-adapter-interface": patch
3+
---
4+
5+
在 GetClusterInfoResponse 中增加集群节点信息,作业信息等内容优化查询效率

protos/account.proto

+131-1
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,26 @@ message QueryAccountBlockStatusRequest {
6868
string account_name = 1;
6969
}
7070

71+
message AccountStatusInPartition {
72+
string partition = 1;
73+
bool blocked = 2;
74+
}
75+
7176
message QueryAccountBlockStatusResponse {
77+
// In versions 1.7.0 and earlier:
78+
// - The account's status is uniformly consistent across all partitions by default.
79+
// - false: The queried account is unblocked in all partitions.
80+
// - true: The queried account has no available partitions.
81+
82+
// In versions later than 1.7.0:
83+
// - The field "blocked" represents whether the account is completely blocked across all partitions:
84+
// - true: The account is fully blocked in all partitions.
85+
// - false: The account has one or more partitions where it is not blocked.
7286
bool blocked = 1;
87+
88+
// the details of account blocked status in every partition
89+
// only returns in version later than 1.7.0
90+
repeated AccountStatusInPartition account_blocked_details = 2;
7391
}
7492

7593
message DeleteAccountRequest {
@@ -79,6 +97,72 @@ message DeleteAccountRequest {
7997
message DeleteAccountResponse {
8098
}
8199

100+
101+
102+
// ************ Below is the interfaces for optional feature: RESOURCE_MANAGEMENT ***************
103+
message BlockAccountWithPartitionsRequest {
104+
string account_name = 1;
105+
// when the value exists: block specified partition(s) of the account
106+
// when the value is [] or undefined: block the account in all partitions
107+
repeated string blocked_partitions = 2;
108+
}
109+
110+
message BlockAccountWithPartitionsResponse {
111+
}
112+
113+
message UnblockAccountWithPartitionsRequest {
114+
string account_name = 1;
115+
// specify the available partition(s) when executing unblock
116+
// when the value is [] or undefined: use all partitions
117+
repeated string unblocked_partitions = 2;
118+
}
119+
120+
message UnblockAccountWithPartitionsResponse {
121+
}
122+
123+
message QueryAccountBlockStatusWithPartitionsRequest {
124+
string account_name = 1;
125+
// query in specified partition(s)
126+
// when the value is [] or undefined, return all partitions blocked details
127+
repeated string queried_partitions = 2;
128+
}
129+
130+
message QueryAccountBlockStatusWithPartitionsResponse {
131+
// return the overall blocked_status in all partitions
132+
// false: when the queried account has on or more available partitions
133+
// true: when the queried account has no available partitions
134+
bool blocked = 1;
135+
// the details of account blocked status in every partition
136+
repeated AccountStatusInPartition account_blocked_details = 2;
137+
}
138+
139+
message ClusterAccountInfoWithBlockedDetails {
140+
message UserInAccount {
141+
string user_id = 1;
142+
string user_name = 2;
143+
bool blocked = 3;
144+
}
145+
string account_name = 1;
146+
repeated UserInAccount users = 2;
147+
optional string owner = 3;
148+
// return the overall blocked_status in all partitions
149+
// false: when the queried account has one or more available partitions
150+
// true: when the queried account has no available partitions
151+
bool blocked = 4;
152+
// the details of account blocked status in every partition
153+
repeated AccountStatusInPartition account_blocked_details = 5;
154+
}
155+
156+
message GetAllAccountsWithUsersAndBlockedDetailsRequest {
157+
158+
}
159+
160+
message GetAllAccountsWithUsersAndBlockedDetailsResponse {
161+
repeated ClusterAccountInfoWithBlockedDetails accounts = 1;
162+
}
163+
// ********** Above is the interfaces for optional feature: RESOURCE_MANAGEMENT ***************
164+
165+
82166
service AccountService {
83167

84168
/**
@@ -128,17 +212,63 @@ service AccountService {
128212

129213
/*
130214
* description: query if an account is blocked
215+
* Version differences:
216+
* - In versions 1.7.0 and earlier:
217+
* - The response's 'blocked' field indicates a uniform status across all partitions.
218+
* - The 'account_blocked_details' field is not returned.
219+
* - In versions after 1.7.0:
220+
* - The 'blocked' field indicates whether the account is completely blocked across all partitions.
221+
* - The 'account_blocked_details' field provides detailed status information for each partition.
131222
* errors:
132223
* - account not exist
133224
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
134225
*/
135226
rpc QueryAccountBlockStatus(QueryAccountBlockStatusRequest) returns (QueryAccountBlockStatusResponse);
136227

137-
/*
228+
/*
138229
* description: delete account
139230
* errors:
140231
* - account not exist
141232
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
142233
*/
143234
rpc DeleteAccount(DeleteAccountRequest) returns (DeleteAccountResponse);
235+
236+
237+
// ********** Below is the rpcs for optional feature: RESOURCE_MANAGEMENT ***************
238+
/*
239+
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
240+
* description: block an account with specified partitions
241+
* errors:
242+
* - account not exist
243+
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
244+
* special case:
245+
* - account already blocked, don't throw error
246+
*/
247+
rpc BlockAccountWithPartitions(BlockAccountWithPartitionsRequest) returns (BlockAccountWithPartitionsResponse);
248+
/*
249+
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
250+
* description: unblock an account with specified partitions
251+
* errors:
252+
* - account not exist
253+
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
254+
* special case:
255+
* - account already unblocked, don't throw error
256+
*/
257+
rpc UnblockAccountWithPartitions(UnblockAccountWithPartitionsRequest) returns (UnblockAccountWithPartitionsResponse);
258+
/*
259+
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
260+
* description: query if an account is blocked with specified partitions
261+
* errors:
262+
* - account not exist
263+
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
264+
*/
265+
rpc QueryAccountBlockStatusWithPartitions(QueryAccountBlockStatusWithPartitionsRequest) returns (QueryAccountBlockStatusWithPartitionsResponse);
266+
/*
267+
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
268+
* description: get all accounts with blocked partitions' detail and all associated users
269+
* special case:
270+
* - account no users, exclude this account
271+
*/
272+
rpc GetAllAccountsWithUsersAndBlockedDetails(GetAllAccountsWithUsersAndBlockedDetailsRequest) returns (GetAllAccountsWithUsersAndBlockedDetailsResponse);
273+
// ********** Above is the interfaces for optional feature: RESOURCE_MANAGEMENT ***************
144274
}

protos/config.proto

+18
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,23 @@ message GetClusterInfoRequest {
8484
message GetClusterInfoResponse {
8585
string cluster_name = 1;
8686
repeated PartitionInfo partitions = 2;
87+
// 3-17 Newly added parameters
88+
// only returns in version later than 1.7.0
89+
uint32 node_count = 3;
90+
uint32 running_node_count = 4;
91+
uint32 idle_node_count = 5;
92+
uint32 not_available_node_count = 6;
93+
uint32 cpu_core_count =7;
94+
uint32 running_cpu_count = 8;
95+
uint32 idle_cpu_count = 9;
96+
uint32 not_available_cpu_count = 10;
97+
uint32 gpu_core_count = 11;
98+
uint32 running_gpu_count = 12;
99+
uint32 idle_gpu_count = 13;
100+
uint32 not_available_gpu_count = 14;
101+
uint32 job_count = 15;
102+
uint32 running_job_count = 16;
103+
uint32 pending_job_count = 17;
87104
}
88105

89106
message NodeInfo {
@@ -122,6 +139,7 @@ message ListImplementedOptionalFeaturesRequest {}
122139

123140
enum OptionalFeatures {
124141
UNKNOWN = 0;
142+
RESOURCE_MANAGEMENT = 1;
125143
}
126144

127145
message ListImplementedOptionalFeaturesResponse {

protos/job.proto

-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ message GetJobsRequest {
117117

118118
// returned jobs should be sorted if set
119119
optional SortInfo sort = 4;
120-
121120
}
122121

123122
message GetJobsResponse {

0 commit comments

Comments
 (0)