1
+ #
2
+ # Copyright 2024 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ apiVersion : v1
18
+ kind : ConfigMap
19
+ metadata :
20
+ name : slurm-conf-configmap
21
+ namespace : ${namespace}
22
+ data :
23
+ slurm.conf : |
24
+ # slurm.conf
25
+ #
26
+ # See the slurm.conf man page for more information.
27
+ #
28
+ ClusterName=linux
29
+ SlurmctldHost=slurmctld-0
30
+ #
31
+ SlurmUser=slurm
32
+ SlurmctldPort=6820-6830
33
+ SlurmdPort=6818
34
+ AuthType=auth/munge
35
+ StateSaveLocation=/var/spool/slurmctld
36
+ SlurmdSpoolDir=/var/spool/slurmd
37
+ SwitchType=switch/none
38
+ MpiDefault=pmix
39
+ SlurmctldPidFile=/var/run/slurmd/slurmctld.pid
40
+ SlurmdPidFile=/var/run/slurmd/slurmd.pid
41
+ ProctrackType=proctrack/linuxproc
42
+ ReturnToService=2
43
+ #
44
+ # TIMERS
45
+ SlurmctldTimeout=300
46
+ SlurmdTimeout=30
47
+ InactiveLimit=0
48
+ MinJobAge=300
49
+ KillWait=30
50
+ Waittime=0
51
+ #
52
+ # SCHEDULING
53
+ SchedulerType=sched/backfill
54
+ SelectType=select/cons_tres
55
+ SelectTypeParameters=CR_CPU_Memory
56
+ #
57
+ # LOGGING
58
+ SlurmctldDebug=3
59
+ SlurmctldLogFile=/var/log/slurm/slurmctld.log
60
+ SlurmdDebug=3
61
+ SlurmdLogFile=/var/log/slurm/slurmd.log
62
+ JobCompType=jobcomp/filetxt
63
+ JobCompLoc=/var/log/slurm/jobcomp.log
64
+ #
65
+ # ACCOUNTING
66
+ JobAcctGatherType=jobacct_gather/linux
67
+ JobAcctGatherFrequency=30
68
+ #
69
+ AccountingStorageType=accounting_storage/slurmdbd
70
+ AccountingStorageHost=slurmdbd
71
+ AccountingStoragePort=6819
72
+ #
73
+ SlurmctldParameters=cloud_reg_addrs
74
+
75
+ # CLOUD CONFIGURATIONS
76
+ MaxNodeCount=64000
77
+ include cloud.conf
78
+ cloud.conf : |
79
+ PrivateData=cloud
80
+ SlurmctldParameters=enable_configless
81
+ ## GRES
82
+ GresTypes=gpu
83
+ AccountingStorageTRES=gres/gpu
84
+ DebugFlags=Gres
85
+ TreeWidth=128
86
+
87
+ # NODES
88
+ NodeName=DEFAULT State=UNKNOWN RealMemory=15000 CPUs=4 CoresPerSocket=2 ThreadsPerCore=2 Gres=gpu:1
89
+ NodeName=slurmd-[0-39] State=CLOUD Gres=gpu:1
90
+ NodeSet=slurmdnodeset Nodes=slurmd-[0-39]
91
+
92
+ NodeName=DEFAULT State=UNKNOWN RealMemory=30000 CPUs=8 CoresPerSocket=2 ThreadsPerCore=2 Gres=gpu:2
93
+ NodeName=slurmd1-[0-39] State=CLOUD Gres=gpu:2
94
+ NodeSet=slurmd1nodeset Nodes=slurmd1-[0-39]
95
+
96
+ # PARTITIONS
97
+ PartitionName=all Default=yes Nodes=ALL MaxTime=INFINITE State=UP
98
+
99
+ PropagateResourceLimitsExcept=MEMLOCK
100
+
101
+ PartitionName=1gpunodes Nodes=slurmdnodeset State=UP DefMemPerCPU=7007 SuspendTime=300 Oversubscribe=Exclusive PowerDownOnIdle=YES ResumeTimeout=300 SuspendTimeout=120
102
+ PartitionName=2gpunodes Nodes=slurmd1nodeset State=UP DefMemPerCPU=7007 SuspendTime=300 Oversubscribe=Exclusive PowerDownOnIdle=YES ResumeTimeout=300 SuspendTimeout=120
103
+
104
+ cloud_gres.conf : |
105
+ NodeName=slurmd-[0-39] Name=gpu File=/dev/nvidia0
106
+ NodeName=slurmd1-[0-39] Name=gpu File=/dev/nvidia[0-1]
107
+ gres.conf : |
108
+ NodeName=slurmd-[0-39] Name=gpu File=/dev/nvidia0
109
+ NodeName=slurmd1-[0-39] Name=gpu File=/dev/nvidia[0-1]
110
+ cgroup.conf : |
111
+ ###
112
+ #
113
+ # Slurm cgroup support configuration file
114
+ #
115
+ # See man slurm.conf and man cgroup.conf for further
116
+ # information on cgroup configuration parameters
117
+ #--
118
+ ConstrainCores=yes
119
+ ConstrainDevices=yes
120
+ ConstrainRAMSpace=yes
121
+ ConstrainSwapSpace=yes
122
+ IgnoreSystemd=yes
123
+
0 commit comments