Skip to content

Commit 4f2f1b3

Browse files
kaichiachenpbacsko
authored andcommitted
[YUNIKORN-2854]Add queue maxRunningApps metrics (#1012)
Closes: #1012 Signed-off-by: Peter Bacsko <[email protected]>
1 parent 7391aeb commit 4f2f1b3

File tree

4 files changed

+34
-7
lines changed

4 files changed

+34
-7
lines changed

pkg/metrics/queue.go

+11-6
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,11 @@ const (
4444
ContainerAllocated = "allocated"
4545
ContainerRejected = "rejected"
4646

47-
QueueGuaranteed = "guaranteed"
48-
QueueMax = "max"
49-
QueuePending = "pending"
50-
QueuePreempting = "preempting"
47+
QueueGuaranteed = "guaranteed"
48+
QueueMax = "max"
49+
QueuePending = "pending"
50+
QueuePreempting = "preempting"
51+
QueueMaxRunningApps = "maxRunningApps"
5152
)
5253

5354
// QueueMetrics to declare queue metrics
@@ -99,15 +100,15 @@ func InitQueueMetrics(name string) *QueueMetrics {
99100
Namespace: Namespace,
100101
Name: "queue_resource",
101102
ConstLabels: prometheus.Labels{"queue": name},
102-
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`.",
103+
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`, `maxRunningApps`.",
103104
}, []string{"state", "resource"})
104105

105106
q.resourceMetricsSubsystem = prometheus.NewGaugeVec(
106107
prometheus.GaugeOpts{
107108
Namespace: Namespace,
108109
Subsystem: replaceStr,
109110
Name: "queue_resource",
110-
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`.",
111+
Help: "Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`, `maxRunningApps`.",
111112
}, []string{"state", "resource"})
112113

113114
var queueMetricsList = []prometheus.Collector{
@@ -354,3 +355,7 @@ func (m *QueueMetrics) SetQueuePendingResourceMetrics(resourceName string, value
354355
func (m *QueueMetrics) SetQueuePreemptingResourceMetrics(resourceName string, value float64) {
355356
m.setQueueResource(QueuePreempting, resourceName, value)
356357
}
358+
359+
func (m *QueueMetrics) SetQueueMaxRunningAppsMetrics(value uint64) {
360+
m.setQueueResource(QueueMaxRunningApps, "apps", float64(value))
361+
}

pkg/metrics/queue_test.go

+8
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,14 @@ func TestQueuePreemptingResourceMetrics(t *testing.T) {
246246
verifyResourceMetrics(t, "preempting", "cpu")
247247
}
248248

249+
func TestQueueMaxRunningAppsResourceMetrics(t *testing.T) {
250+
qm = getQueueMetrics()
251+
defer unregisterQueueMetrics()
252+
253+
qm.SetQueueMaxRunningAppsMetrics(1)
254+
verifyResourceMetrics(t, "maxRunningApps", "apps")
255+
}
256+
249257
func TestRemoveQueueMetrics(t *testing.T) {
250258
testQueueName := "root.test"
251259
qm = GetQueueMetrics(testQueueName)

pkg/scheduler/objects/queue.go

+8
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ func NewConfiguredQueue(conf configs.QueueConfig, parent *Queue) (*Queue, error)
125125
sq.parent = parent
126126
sq.isManaged = true
127127
sq.maxRunningApps = conf.MaxApplications
128+
sq.updateMaxRunningAppsMetrics()
128129

129130
// update the properties
130131
if err := sq.applyConf(conf); err != nil {
@@ -223,6 +224,7 @@ func (sq *Queue) applyTemplate(childTemplate *template.Template) {
223224
// update metrics for guaranteed and max resource
224225
sq.updateGuaranteedResourceMetrics()
225226
sq.updateMaxResourceMetrics()
227+
sq.updateMaxRunningAppsMetrics()
226228
}
227229

228230
// getProperties returns a copy of the properties for this queue
@@ -366,6 +368,7 @@ func (sq *Queue) applyConf(conf configs.QueueConfig) error {
366368
return err
367369
}
368370
sq.maxRunningApps = conf.MaxApplications
371+
sq.updateMaxRunningAppsMetrics()
369372
}
370373

371374
sq.properties = conf.Properties
@@ -462,6 +465,7 @@ func (sq *Queue) SetMaxRunningApps(maxApps uint64) {
462465
sq.Lock()
463466
defer sq.Unlock()
464467
sq.maxRunningApps = maxApps
468+
sq.updateMaxRunningAppsMetrics()
465469
}
466470

467471
// setTemplate sets the template on the queue based on the config.
@@ -1705,6 +1709,10 @@ func (sq *Queue) updatePreemptingResourceMetrics() {
17051709
}
17061710
}
17071711

1712+
func (sq *Queue) updateMaxRunningAppsMetrics() {
1713+
metrics.GetQueueMetrics(sq.QueuePath).SetQueueMaxRunningAppsMetrics(sq.maxRunningApps)
1714+
}
1715+
17081716
func (sq *Queue) removeMetrics() {
17091717
metrics.RemoveQueueMetrics(sq.QueuePath)
17101718
}

pkg/scheduler/objects/queue_test.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,11 @@ func TestPendingCalc(t *testing.T) {
298298
want := concatQueueResourceMetric(metrics, []string{`
299299
yunikorn_root_queue_resource{resource="memory",state="pending"} 100
300300
yunikorn_root_queue_resource{resource="vcores",state="pending"} 10
301+
yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
301302
`, `
302303
yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 100
303304
yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 10
305+
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
304306
`},
305307
)
306308
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(want), metrics...), "unexpected metrics")
@@ -314,9 +316,11 @@ yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 10
314316
want = concatQueueResourceMetric(metrics, []string{`
315317
yunikorn_root_queue_resource{resource="memory",state="pending"} 0
316318
yunikorn_root_queue_resource{resource="vcores",state="pending"} 0
319+
yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
317320
`, `
318321
yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 0
319322
yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
323+
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
320324
`},
321325
)
322326
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(want), metrics...), "unexpected metrics")
@@ -334,16 +338,18 @@ yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
334338
want = concatQueueResourceMetric(metrics, []string{`
335339
yunikorn_root_queue_resource{resource="memory",state="pending"} 0
336340
yunikorn_root_queue_resource{resource="vcores",state="pending"} 0
341+
yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
337342
`, `
338343
yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 0
339344
yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
345+
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
340346
`},
341347
)
342348
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(want), metrics...), "unexpected metrics")
343349
}
344350

345351
const (
346-
QueueResourceMetricHelp = "# HELP %v Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`."
352+
QueueResourceMetricHelp = "# HELP %v Queue resource metrics. State of the resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`, `maxRunningApps`."
347353
QueueResourceMetricType = "# TYPE %v gauge"
348354
)
349355

0 commit comments

Comments
 (0)