Skip to content

feat: 资源分区管理优化 #26

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/big-plants-nail.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@scow/scow-scheduler-adapter-interface": minor
---
在 QueryAccountBlockStatus 的Response中变更 blocked 的注释,增加 account_blocked_details
在 ListImplementedOptionalFeatures 中增加可选功能枚举值
在 account.proto中增加可选功能rpc接口
5 changes: 5 additions & 0 deletions .changeset/yellow-chefs-itch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@scow/scow-scheduler-adapter-interface": patch
---

在 GetClusterInfoResponse 中增加集群节点信息,作业信息等内容优化查询效率
132 changes: 131 additions & 1 deletion protos/account.proto
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,26 @@ message QueryAccountBlockStatusRequest {
string account_name = 1;
}

message AccountStatusInPartition {
string partition = 1;
bool blocked = 2;
}

message QueryAccountBlockStatusResponse {
// In versions 1.7.0 and earlier:
// - The account's status is uniformly consistent across all partitions by default.
// - false: The queried account is unblocked in all partitions.
// - true: The queried account has no available partitions.

// In versions later than 1.7.0:
// - The field "blocked" represents whether the account is completely blocked across all partitions:
// - true: The account is fully blocked in all partitions.
// - false: The account has one or more partitions where it is not blocked.
bool blocked = 1;

// the details of account blocked status in every partition
// only returns in version later than 1.7.0
repeated AccountStatusInPartition account_blocked_details = 2;
}

message DeleteAccountRequest {
Expand All @@ -79,6 +97,72 @@ message DeleteAccountRequest {
message DeleteAccountResponse {
}



// ************ Below is the interfaces for optional feature: RESOURCE_MANAGEMENT ***************
message BlockAccountWithPartitionsRequest {
string account_name = 1;
// when the value exists: block specified partition(s) of the account
// when the value is [] or undefined: block the account in all partitions
repeated string blocked_partitions = 2;
}

message BlockAccountWithPartitionsResponse {
}

message UnblockAccountWithPartitionsRequest {
string account_name = 1;
// specify the available partition(s) when executing unblock
// when the value is [] or undefined: use all partitions
repeated string unblocked_partitions = 2;
}

message UnblockAccountWithPartitionsResponse {
}

message QueryAccountBlockStatusWithPartitionsRequest {
string account_name = 1;
// query in specified partition(s)
// when the value is [] or undefined, return all partitions blocked details
repeated string queried_partitions = 2;
}

message QueryAccountBlockStatusWithPartitionsResponse {
// return the overall blocked_status in all partitions
// false: when the queried account has on or more available partitions
// true: when the queried account has no available partitions
bool blocked = 1;
// the details of account blocked status in every partition
repeated AccountStatusInPartition account_blocked_details = 2;
}

message ClusterAccountInfoWithBlockedDetails {
message UserInAccount {
string user_id = 1;
string user_name = 2;
bool blocked = 3;
}
string account_name = 1;
repeated UserInAccount users = 2;
optional string owner = 3;
// return the overall blocked_status in all partitions
// false: when the queried account has one or more available partitions
// true: when the queried account has no available partitions
bool blocked = 4;
// the details of account blocked status in every partition
repeated AccountStatusInPartition account_blocked_details = 5;
}

message GetAllAccountsWithUsersAndBlockedDetailsRequest {

}

message GetAllAccountsWithUsersAndBlockedDetailsResponse {
repeated ClusterAccountInfoWithBlockedDetails accounts = 1;
}
// ********** Above is the interfaces for optional feature: RESOURCE_MANAGEMENT ***************


service AccountService {

/**
Expand Down Expand Up @@ -128,17 +212,63 @@ service AccountService {

/*
* description: query if an account is blocked
* Version differences:
* - In versions 1.7.0 and earlier:
* - The response's 'blocked' field indicates a uniform status across all partitions.
* - The 'account_blocked_details' field is not returned.
* - In versions after 1.7.0:
* - The 'blocked' field indicates whether the account is completely blocked across all partitions.
* - The 'account_blocked_details' field provides detailed status information for each partition.
* errors:
* - account not exist
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
*/
rpc QueryAccountBlockStatus(QueryAccountBlockStatusRequest) returns (QueryAccountBlockStatusResponse);

/*
/*
* description: delete account
* errors:
* - account not exist
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
*/
rpc DeleteAccount(DeleteAccountRequest) returns (DeleteAccountResponse);


// ********** Below is the rpcs for optional feature: RESOURCE_MANAGEMENT ***************
/*
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
* description: block an account with specified partitions
* errors:
* - account not exist
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
* special case:
* - account already blocked, don't throw error
*/
rpc BlockAccountWithPartitions(BlockAccountWithPartitionsRequest) returns (BlockAccountWithPartitionsResponse);
/*
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
* description: unblock an account with specified partitions
* errors:
* - account not exist
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
* special case:
* - account already unblocked, don't throw error
*/
rpc UnblockAccountWithPartitions(UnblockAccountWithPartitionsRequest) returns (UnblockAccountWithPartitionsResponse);
/*
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
* description: query if an account is blocked with specified partitions
* errors:
* - account not exist
* NOT_FOUND, ACCOUNT_NOT_FOUND, {}
*/
rpc QueryAccountBlockStatusWithPartitions(QueryAccountBlockStatusWithPartitionsRequest) returns (QueryAccountBlockStatusWithPartitionsResponse);
/*
* FOR OPTIOANL FEATURE: RESOURCE_MANAGEMENT
* description: get all accounts with blocked partitions' detail and all associated users
* special case:
* - account no users, exclude this account
*/
rpc GetAllAccountsWithUsersAndBlockedDetails(GetAllAccountsWithUsersAndBlockedDetailsRequest) returns (GetAllAccountsWithUsersAndBlockedDetailsResponse);
// ********** Above is the interfaces for optional feature: RESOURCE_MANAGEMENT ***************
}
18 changes: 18 additions & 0 deletions protos/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,23 @@ message GetClusterInfoRequest {
message GetClusterInfoResponse {
string cluster_name = 1;
repeated PartitionInfo partitions = 2;
// 3-17 Newly added parameters
// only returns in version later than 1.7.0
uint32 node_count = 3;
uint32 running_node_count = 4;
uint32 idle_node_count = 5;
uint32 not_available_node_count = 6;
uint32 cpu_core_count =7;
uint32 running_cpu_count = 8;
uint32 idle_cpu_count = 9;
uint32 not_available_cpu_count = 10;
uint32 gpu_core_count = 11;
uint32 running_gpu_count = 12;
uint32 idle_gpu_count = 13;
uint32 not_available_gpu_count = 14;
uint32 job_count = 15;
uint32 running_job_count = 16;
uint32 pending_job_count = 17;
}

message NodeInfo {
Expand Down Expand Up @@ -122,6 +139,7 @@ message ListImplementedOptionalFeaturesRequest {}

enum OptionalFeatures {
UNKNOWN = 0;
RESOURCE_MANAGEMENT = 1;
}

message ListImplementedOptionalFeaturesResponse {
Expand Down
1 change: 0 additions & 1 deletion protos/job.proto
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ message GetJobsRequest {

// returned jobs should be sorted if set
optional SortInfo sort = 4;

}

message GetJobsResponse {
Expand Down
Loading