Skip to content

Add cbatch/crun/calloc add --exclusive #256

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
57 changes: 30 additions & 27 deletions internal/cacct/CmdArgParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,33 +116,36 @@ func init() {
Fields are identified by a percent sign (%) followed by a character or string.
Use a dot (.) and a number between % and the format character or string to specify a minimum width for the field.
Supported format identifiers or string, string case insensitive:
%a/%Account - Display the account associated with the job.
%c/%AllocCpus - Display the number of allocated CPUs, formatted to two decimal places.
%D/%ElapsedTime - Display the elapsed time from the start of the job.
%E/%EndTime - Display the end time of the job.
%e/%ExitCode - Display the exit code of the job.
If the exit code is based on a specific base (e.g., kCraneExitCodeBase),
it formats as "0:<code>" or "<code>:0" based on the condition.
%h/%Held - Display the hold status of the job.
%j/%JobID - Display the ID of the job.
%k/%Comment - Display the comment of the job.
%L/%NodeList - Display the list of nodes the job is running on.
%l/%TimeLimit - Display the time limit of the job.
%m/%MemPerNode - Display the requested mem per node of the job.
%N/%NodeNum - Display the node num of the job.
%n/%JobName - Display the name of the job.
%P/%Partition - Display the partition associated with the job.
%p/%Priority - Display the priority of the job.
%q/%Qos - Display the QoS of the job.
%R/%Reason - Display the reason of pending.
%r/%ReqNodes - Display the reqnodes of the job.
%S/%StartTime - Display the start time of the job.
%s/%SubmitTime - Display the submit time num of the job.
%t/%State - Display the state of the job.
%T/%JobType - Display the job type.
%U/%UserName - Display the username of the job.
%u/%Uid - Display the uid of the job.
%x/%ExcludeNodes - Display the excludenodes of the job.
%a/%Account - Display the account associated with the job.
%C/%ReqCpus - Display the number of requested CPUs, formatted to two decimal places
%c/%AllocCpus - Display the number of allocated CPUs, formatted to two decimal places.
%D/%ElapsedTime - Display the elapsed time from the start of the job.
%E/%EndTime - Display the end time of the job.
%e/%ExitCode - Display the exit code of the job.
If the exit code is based on a specific base (e.g., kCraneExitCodeBase),
it formats as "0:<code>" or "<code>:0" based on the condition.
%h/%Held - Display the hold status of the job.
%j/%JobID - Display the ID of the job.
%k/%Comment - Display the comment of the job.
%L/%NodeList - Display the list of nodes the job is running on.
%l/%TimeLimit - Display the time limit of the job.
%M/%ReqMemPerNode - Display the requested mem per node of the job.
%m/%AllocMemPerNode - Display the allocted mem per node of the job.
%N/%NodeNum - Display the node num of the job.
%n/%JobName - Display the name of the job.
%P/%Partition - Display the partition associated with the job.
%p/%Priority - Display the priority of the job.
%q/%Qos - Display the QoS of the job.
%R/%Reason - Display the reason of pending.
%r/%ReqNodes - Display the reqnodes of the job.
%S/%StartTime - Display the start time of the job.
%s/%SubmitTime - Display the submit time num of the job.
%t/%State - Display the state of the job.
%T/%JobType - Display the job type.
%U/%UserName - Display the username of the job.
%u/%Uid - Display the uid of the job.
%x/%ExcludeNodes - Display the excludenodes of the job.
%X/%Exclusive - Display the exclusive status of the job.

Each format specifier or string can be modified with a width specifier (e.g., "%.5j").
If the width is specified, the field will be formatted to at least that width.
Expand Down
62 changes: 48 additions & 14 deletions internal/cacct/cacct.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ func QueryJob() util.CraneCmdError {
tableData := make([][]string, len(reply.TaskInfoList))
if FlagFull {
header = []string{"JobId", "JobName", "UserName", "Partition",
"NodeNum", "Account", "AllocCPUs", "MemPerNode", "State", "TimeLimit",
"StartTime", "EndTime", "SubmitTime", "Qos", "Held", "Priority", "CranedList", "ExitCode"}
"NodeNum", "Account", "ReqCPUs", "ReqMemPerNode", "AllocCPUs", "AllocMemPerNode", "State", "TimeLimit",
"StartTime", "EndTime", "SubmitTime", "Qos", "Exclusive", "Held", "Priority", "CranedList", "ExitCode"}

for i := 0; i < len(reply.TaskInfoList); i++ {
taskInfo := reply.TaskInfoList[i]
Expand Down Expand Up @@ -210,14 +210,17 @@ func QueryJob() util.CraneCmdError {
taskInfo.Partition,
strconv.FormatUint(uint64(taskInfo.NodeNum), 10),
taskInfo.Account,
strconv.FormatFloat(taskInfo.ResView.AllocatableRes.CpuCoreLimit*float64(taskInfo.NodeNum), 'f', 2, 64),
strconv.FormatUint(taskInfo.ResView.AllocatableRes.MemoryLimitBytes/(1024*1024), 10),
ProcessReqCPUs(taskInfo),
ProcessReqMemPerNode(taskInfo),
ProcessAllocCPUs(taskInfo),
ProcessAllocMemPerNode(taskInfo),
taskInfo.Status.String(),
timeLimitStr,
startTimeStr,
endTimeStr,
submitTimeStr,
taskInfo.Qos,
ProcessExclusive(taskInfo),
strconv.FormatBool(taskInfo.Held),
strconv.FormatUint(uint64(taskInfo.Priority), 10),
taskInfo.GetCranedList(),
Expand All @@ -240,7 +243,7 @@ func QueryJob() util.CraneCmdError {
taskInfo.Name,
taskInfo.Partition,
taskInfo.Account,
strconv.FormatFloat(taskInfo.ResView.AllocatableRes.CpuCoreLimit*float64(taskInfo.NodeNum), 'f', 2, 64),
ProcessAllocCPUs(taskInfo),
taskInfo.Status.String(),
exitCode}
}
Expand Down Expand Up @@ -318,9 +321,14 @@ func ProcessAccount(task *protos.TaskInfo) string {
return task.Account
}

// ReqCPUs (C)
func ProcessReqCPUs(task *protos.TaskInfo) string {
return strconv.FormatFloat(task.ReqResView.AllocatableRes.CpuCoreLimit*float64(task.NodeNum), 'f', 2, 64)
}

// AllocCPUs (c)
func ProcessAllocCPUs(task *protos.TaskInfo) string {
return strconv.FormatFloat(task.ResView.AllocatableRes.CpuCoreLimit*float64(task.NodeNum), 'f', 2, 64)
return strconv.FormatFloat(task.AllocatedResView.AllocatableRes.CpuCoreLimit, 'f', 2, 64)
}

// ElapsedTime (D)
Expand Down Expand Up @@ -397,9 +405,18 @@ func ProcessTimeLimit(task *protos.TaskInfo) string {
return util.SecondTimeFormat(task.TimeLimit.Seconds)
}

// MemPerNode (m)
func ProcessMemPerNode(task *protos.TaskInfo) string {
return strconv.FormatUint(task.ResView.AllocatableRes.MemoryLimitBytes/(1024*1024), 10)
// ReqMemPerNode (M)
func ProcessReqMemPerNode(task *protos.TaskInfo) string {
return util.FormatMemToMB(task.ReqResView.AllocatableRes.MemoryLimitBytes)
}

// AllocMemPerNode (m)
func ProcessAllocMemPerNode(task *protos.TaskInfo) string {
if task.NodeNum == 0 {
return "0"
}
allocMemPerNode := task.AllocatedResView.AllocatableRes.MemoryLimitBytes / uint64(task.NodeNum)
return util.FormatMemToMB(allocMemPerNode)
}

// NodeNum (N)
Expand Down Expand Up @@ -481,6 +498,11 @@ func ProcessUid(task *protos.TaskInfo) string {
return strconv.FormatUint(uint64(task.Uid), 10)
}

// Exclusive (X)
func ProcessExclusive(task *protos.TaskInfo) string {
return strconv.FormatBool(task.Exclusive)
}

// ExcludeNodes (x)
func ProcessExcludeNodes(task *protos.TaskInfo) string {
return strings.Join(task.ExcludeNodes, ",")
Expand All @@ -491,6 +513,10 @@ var fieldProcessors = map[string]FieldProcessor{
"a": {"Account", ProcessAccount},
"account": {"Account", ProcessAccount},

// Group C
"C": {"ReqCpus", ProcessReqCPUs},
"reqcpus" : {"ReqCpus", ProcessReqCPUs},

// Group c
"c": {"AllocCPUs", ProcessAllocCPUs},
"alloccpus": {"AllocCPUs", ProcessAllocCPUs},
Expand Down Expand Up @@ -527,9 +553,13 @@ var fieldProcessors = map[string]FieldProcessor{
"l": {"TimeLimit", ProcessTimeLimit},
"timelimit": {"TimeLimit", ProcessTimeLimit},

// Group M
"M": {"ReqMemPerNode", ProcessReqMemPerNode},
"reqmempernode": {"ReqMemPerNode", ProcessReqMemPerNode},

// Group m
"m": {"MemPerNode", ProcessMemPerNode},
"mempernode": {"MemPerNode", ProcessMemPerNode},
"m": {"AllocMemPerNode", ProcessAllocMemPerNode},
"allocmempernode": {"AllocMemPerNode", ProcessAllocMemPerNode},

// Group N
"N": {"NodeNum", ProcessNodeNum},
Expand Down Expand Up @@ -583,6 +613,10 @@ var fieldProcessors = map[string]FieldProcessor{
"u": {"Uid", ProcessUid},
"uid": {"Uid", ProcessUid},

// Group X
"X": {"Exclusive", ProcessExclusive},
"exclusive": {"Exclusive", ProcessExclusive},

// Group x
"x": {"ExcludeNodes", ProcessExcludeNodes},
"excludenodes": {"ExcludeNodes", ProcessExcludeNodes},
Expand Down Expand Up @@ -640,9 +674,9 @@ func FormatData(reply *protos.QueryTasksInfoReply) (header []string, tableData [
fieldProcessor, found := fieldProcessors[field]
if !found {
log.Errorln("Invalid format specifier or string, string unfold case insensitive, reference:\n" +
"a/Account, c/AllocCPUs, D/ElapsedTime, E/EndTime, e/ExitCode, h/Held, j/JobID, L/NodeList, l/TimeLimit,\n" +
"m/MemPerNode, N/NodeNum, n/JobName, P/Partition, p/Priority, q/Qos, r/ReqNodes, R/Reason, S/StartTime,\n" +
"s/SubmitTime, T/JobType, t/State, U/UserName, u/Uid, x/ExcludeNodes.")
"a/Account, C/ReqCpus, c/AllocCPUs, D/ElapsedTime, E/EndTime, e/ExitCode, h/Held, j/JobID, L/NodeList, l/TimeLimit,\n" +
"M/ReqMemPerNode, m/AllocMemPerNode, N/NodeNum, n/JobName, P/Partition, p/Priority, q/Qos, r/ReqNodes, R/Reason, S/StartTime,\n" +
"s/SubmitTime, T/JobType, t/State, U/UserName, u/Uid, X/Exclusive, x/ExcludeNodes.")
os.Exit(util.ErrorInvalidFormat)
}

Expand Down
2 changes: 2 additions & 0 deletions internal/calloc/CmdArgParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var (
FlagExcludes string
FlagGetUserEnv bool
FlagExport string
FlagExclusive bool

FlagExtraAttr string
FlagMailType string
Expand Down Expand Up @@ -99,4 +100,5 @@ func init() {
RootCmd.Flags().StringVar(&FlagMailUser, "mail-user", "", "Mail address of the notification receiver")
RootCmd.Flags().StringVar(&FlagComment, "comment", "", "Comment of the job")
RootCmd.Flags().StringVarP(&FlagReservation, "reservation", "r", "", "Use reserved resources")
RootCmd.Flags().BoolVar(&FlagExclusive, "exclusive", false, "Exclusive node resources")
}
13 changes: 8 additions & 5 deletions internal/calloc/calloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ func MainCalloc(cmd *cobra.Command, args []string) util.CraneCmdError {
Name: "Interactive",
TimeLimit: util.InvalidDuration(),
PartitionName: "",
Resources: &protos.ResourceView{
ReqResources: &protos.ResourceView{
AllocatableRes: &protos.AllocatableResource{
CpuCoreLimit: 1,
MemoryLimitBytes: 0,
Expand Down Expand Up @@ -399,7 +399,7 @@ func MainCalloc(cmd *cobra.Command, args []string) util.CraneCmdError {

if FlagGres != "" {
gresMap := util.ParseGres(FlagGres)
task.Resources.DeviceMap = gresMap
task.ReqResources.DeviceMap = gresMap
}
if FlagTime != "" {
seconds, err := util.ParseDurationStrToSeconds(FlagTime)
Expand All @@ -415,8 +415,8 @@ func MainCalloc(cmd *cobra.Command, args []string) util.CraneCmdError {
log.Errorf("Invalid argument: %v", err)
return util.ErrorCmdArg
}
task.Resources.AllocatableRes.MemoryLimitBytes = memInByte
task.Resources.AllocatableRes.MemorySwLimitBytes = memInByte
task.ReqResources.AllocatableRes.MemoryLimitBytes = memInByte
task.ReqResources.AllocatableRes.MemorySwLimitBytes = memInByte
}
if FlagPartition != "" {
task.PartitionName = FlagPartition
Expand Down Expand Up @@ -461,6 +461,9 @@ func MainCalloc(cmd *cobra.Command, args []string) util.CraneCmdError {
if FlagComment != "" {
structExtraFromCli.Comment = FlagComment
}
if FlagExclusive {
task.Exclusive = true
}

// Marshal extra attributes
if err := structExtraFromCli.Marshal(&task.ExtraAttr); err != nil {
Expand All @@ -469,7 +472,7 @@ func MainCalloc(cmd *cobra.Command, args []string) util.CraneCmdError {
}

// Set total limit of cpu cores
task.Resources.AllocatableRes.CpuCoreLimit = task.CpusPerTask * float64(task.NtasksPerNode)
task.ReqResources.AllocatableRes.CpuCoreLimit = task.CpusPerTask * float64(task.NtasksPerNode)

// Check the validity of the parameters
if err := util.CheckTaskArgs(task); err != nil {
Expand Down
2 changes: 2 additions & 0 deletions internal/cbatch/CmdArgParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ var (
FlagStdoutPath string
FlagStderrPath string
FlagOpenMode string
FlagExclusive bool

FlagWrappedScript string

Expand Down Expand Up @@ -163,4 +164,5 @@ func init() {
RootCmd.Flags().BoolVar(&FlagJson, "json", false, "Output in JSON format")
RootCmd.Flags().StringVar(&FlagOpenMode, "open-mode", "", "Set the mode for opening output and error files, supported values: append, truncate (default is truncate) ")
RootCmd.Flags().StringVarP(&FlagReservation, "reservation", "r", "", "Use reserved resources")
RootCmd.Flags().BoolVar(&FlagExclusive, "exclusive", false, "Exclusive node resources")
}
29 changes: 21 additions & 8 deletions internal/cbatch/cbatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ type CbatchArg struct {
func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.TaskToCtld) {
task := new(protos.TaskToCtld)
task.TimeLimit = util.InvalidDuration()
task.Resources = &protos.ResourceView{
task.ReqResources = &protos.ResourceView{
AllocatableRes: &protos.AllocatableResource{
CpuCoreLimit: 1,
MemoryLimitBytes: 0,
Expand Down Expand Up @@ -82,7 +82,7 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
task.CpusPerTask = num
case "--gres":
gresMap := util.ParseGres(arg.val)
task.Resources.DeviceMap = gresMap
task.ReqResources.DeviceMap = gresMap
case "--ntasks-per-node":
num, err := strconv.ParseUint(arg.val, 10, 32)
if err != nil {
Expand All @@ -103,8 +103,8 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
log.Errorf("Invalid argument: %v in script: %v", arg.name, err)
return false, nil
}
task.Resources.AllocatableRes.MemoryLimitBytes = memInByte
task.Resources.AllocatableRes.MemorySwLimitBytes = memInByte
task.ReqResources.AllocatableRes.MemoryLimitBytes = memInByte
task.ReqResources.AllocatableRes.MemorySwLimitBytes = memInByte
case "-p", "--partition":
task.PartitionName = arg.val
case "-J", "--job-name":
Expand Down Expand Up @@ -155,6 +155,13 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
}
case "-r", "--reservation":
task.Reservation = arg.val
case "--exclusive":
val, err := strconv.ParseBool(arg.val)
if err != nil {
log.Errorf("Invalid argument: %v in script: %v", arg.name, err)
return false, nil
}
task.Exclusive = val
default:
log.Errorf("Invalid argument: unrecognized '%s' is given in the script", arg.name)
return false, nil
Expand All @@ -181,7 +188,7 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
task.NtasksPerNode = FlagNtasksPerNode
}
if cmd.Flags().Changed("gres") {
task.Resources.DeviceMap = util.ParseGres(FlagGres)
task.ReqResources.DeviceMap = util.ParseGres(FlagGres)
}

if FlagTime != "" {
Expand All @@ -198,8 +205,8 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
log.Errorf("Invalid argument: %v", err)
return false, nil
}
task.Resources.AllocatableRes.MemoryLimitBytes = memInByte
task.Resources.AllocatableRes.MemorySwLimitBytes = memInByte
task.ReqResources.AllocatableRes.MemoryLimitBytes = memInByte
task.ReqResources.AllocatableRes.MemorySwLimitBytes = memInByte
}

if FlagPartition != "" {
Expand Down Expand Up @@ -247,6 +254,9 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
if FlagComment != "" {
structExtraFromCli.Comment = FlagComment
}
if FlagExclusive {
task.Exclusive = true
}
if FlagOpenMode != "" {
if FlagOpenMode == util.OpenModeAppend {
task.GetBatchMeta().OpenModeAppend = proto.Bool(true)
Expand All @@ -257,6 +267,9 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
return false, nil
}
}
if FlagExclusive {
task.Exclusive = true
}

// Set and check the extra attributes
var extraFromCli string
Expand All @@ -270,7 +283,7 @@ func ProcessCbatchArgs(cmd *cobra.Command, args []CbatchArg) (bool, *protos.Task
}

// Set total limit of cpu cores
task.Resources.AllocatableRes.CpuCoreLimit = task.CpusPerTask * float64(task.NtasksPerNode)
task.ReqResources.AllocatableRes.CpuCoreLimit = task.CpusPerTask * float64(task.NtasksPerNode)

// Check the validity of the parameters
if err := util.CheckFileLength(task.GetBatchMeta().OutputFilePattern); err != nil {
Expand Down
Loading