Skip to content

Commit 7a2aca4

Browse files
committed
Fix review points
1 parent 3de1735 commit 7a2aca4

File tree

5 files changed

+64
-63
lines changed

5 files changed

+64
-63
lines changed

ansible/roles/invoker/tasks/deploy.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,11 @@
282282
"CONFIG_whisk_invoker_https_keystorePassword": "{{ invoker.ssl.keystore.password }}"
283283
"CONFIG_whisk_invoker_https_keystoreFlavor": "{{ invoker.ssl.storeFlavor }}"
284284
"CONFIG_whisk_invoker_https_clientAuth": "{{ invoker.ssl.clientAuth }}"
285-
"CONFIG_whisk_containerPool_prewarmExpirationCheckInterval": "{{ container_pool_prewarm_expirationCheckInterval | default('1 minute') }}"
285+
"CONFIG_whisk_containerPool_prewarmExpirationCheckInitDelay": "{{ container_pool_prewarm_expirationCheckInitDelay | default('10 minutes') }}"
286+
"CONFIG_whisk_containerPool_prewarmExpirationCheckInterval": "{{ container_pool_prewarm_expirationCheckInterval | default('10 minutes') }}"
287+
"CONFIG_whisk_containerPool_prewarmExpirationCheckIntervalVariance": "{{ container_pool_prewarm_expirationCheckIntervalVariance | default('10 seconds') }}"
288+
"CONFIG_whisk_containerPool_prewarmPromotion": "{{ container_pool_strict | default('false') | lower }}"
289+
"CONFIG_whisk_containerPool_prewarmMaxRetryLimit": "{{ container_pool_prewarm_max_retry_limit | default(5) }}"
286290

287291
- name: extend invoker dns env
288292
set_fact:

core/invoker/src/main/resources/application.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ whisk {
6161
concurrent-peek-factor: 0.5 #factor used to limit message peeking: 0 < factor <= 1.0 - larger number improves concurrent processing, but increases risk of message loss during invoker crash
6262
akka-client: false # if true, use PoolingContainerClient for HTTP from invoker to action container (otherwise use ApacheBlockingContainerClient)
6363
prewarm-expiration-check-init-delay: 10 minute # the init delay time for the first check
64-
prewarm-expiration-check-interval: 5 minute # period to check for prewarm expiration
64+
prewarm-expiration-check-interval: 10 minute # period to check for prewarm expiration
6565
prewarm-expiration-check-interval-variance: 10 seconds # varies expiration across invokers to avoid many concurrent expirations
6666
prewarm-expiration-limit: 100 # number of prewarms to expire in one expiration cycle (remaining expired will be considered for expiration in next cycle)
67-
prewarm-max-retry-limit: 3 # max retry limit for create prewarm for same kind/memory
67+
prewarm-max-retry-limit: 5 # max subsequent retry limit to create prewarm containers
6868
prewarm-promotion: false # if true, action can take prewarm container which has bigger memory
6969
memory-sync-interval: 1 second # period to sync memory info to etcd
7070
}

core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/ContainerPool.scala

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,13 @@ class ContainerPool(childFactory: ActorRefFactory => ActorRef,
9191
.nextInt(v.toSeconds.toInt))
9292
.getOrElse(0)
9393
.seconds
94-
context.system.scheduler.schedule(2.seconds, interval, self, AdjustPrewarmedContainer)
94+
if (prewarmConfig.exists(!_.reactive.isEmpty)) {
95+
context.system.scheduler.schedule(
96+
poolConfig.prewarmExpirationCheckInitDelay,
97+
interval,
98+
self,
99+
AdjustPrewarmedContainer)
100+
}
95101

96102
def logContainerStart(r: Run, containerState: String, activeActivations: Int, container: Option[Container]): Unit = {
97103
val namespaceName = r.msg.user.namespace.name.asString
@@ -590,9 +596,9 @@ object ContainerPool {
590596
}
591597
.sortBy(_._2.expires.getOrElse(now))
592598

593-
// emit expired container counter metric with memory + kind
594-
MetricEmitter.emitCounterMetric(LoggingMarkers.CONTAINER_POOL_PREWARM_EXPIRED(memory.toString, kind))
595599
if (expiredPrewarmedContainer.nonEmpty) {
600+
// emit expired container counter metric with memory + kind
601+
MetricEmitter.emitCounterMetric(LoggingMarkers.CONTAINER_POOL_PREWARM_EXPIRED(memory.toString, kind))
596602
logging.info(
597603
this,
598604
s"[kind: ${kind} memory: ${memory.toString}] ${expiredPrewarmedContainer.size} expired prewarmed containers")

core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/v2/FunctionPullingContainerPool.scala

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.openwhisk.core.containerpool.v2
1919

20+
import java.util.concurrent.atomic.AtomicInteger
21+
2022
import akka.actor.{Actor, ActorRef, ActorRefFactory, Cancellable, Props}
2123
import org.apache.kafka.clients.producer.RecordMetadata
2224
import org.apache.openwhisk.common._
@@ -48,8 +50,8 @@ import scala.concurrent.duration._
4850
import scala.util.{Random, Try}
4951
import scala.collection.immutable.Queue
5052

51-
case class Creation(creationMessage: ContainerCreationMessage, action: WhiskAction)
52-
case class Deletion(deletionMessage: ContainerDeletionMessage)
53+
case class CreationContainer(creationMessage: ContainerCreationMessage, action: WhiskAction)
54+
case class DeletionContainer(deletionMessage: ContainerDeletionMessage)
5355
case object Remove
5456
case class Keep(timeout: FiniteDuration)
5557
case class PrewarmContainer(maxConcurrent: Int)
@@ -98,7 +100,7 @@ class FunctionPullingContainerPool(
98100

99101
private var preWarmScheduler: Option[Cancellable] = None
100102
private var prewarmConfigQueue = Queue.empty[(CodeExec[_], ByteSize, Option[FiniteDuration])]
101-
private var prewarmCreateFailedCount = immutable.Map.empty[(String, ByteSize), Int]
103+
private val prewarmCreateFailedCount = new AtomicInteger(0)
102104

103105
val logScheduler = context.system.scheduler.schedule(0.seconds, 1.seconds) {
104106
MetricEmitter.emitHistogramMetric(
@@ -166,7 +168,7 @@ class FunctionPullingContainerPool(
166168
}
167169
}
168170

169-
case Creation(create: ContainerCreationMessage, action: WhiskAction) =>
171+
case CreationContainer(create: ContainerCreationMessage, action: WhiskAction) =>
170172
if (shuttingDown) {
171173
val message =
172174
s"creationId: ${create.creationId}, invoker is shutting down, reschedule ${action.fullyQualifiedName(false)}"
@@ -217,7 +219,7 @@ class FunctionPullingContainerPool(
217219
}
218220
}
219221

220-
case Deletion(deletionMessage: ContainerDeletionMessage) =>
222+
case DeletionContainer(deletionMessage: ContainerDeletionMessage) =>
221223
val oldRevision = deletionMessage.revision
222224
val invocationNamespace = deletionMessage.invocationNamespace
223225
val fqn = deletionMessage.action.copy(version = None)
@@ -252,7 +254,10 @@ class FunctionPullingContainerPool(
252254
case ReadyToWork(data) =>
253255
prewarmStartingPool = prewarmStartingPool - sender()
254256
prewarmedPool = prewarmedPool + (sender() -> data)
255-
prewarmCreateFailedCount = prewarmCreateFailedCount - ((data.kind, data.memoryLimit))
257+
// after create prewarm successfully, reset the value to 0
258+
if (prewarmCreateFailedCount.get() > 0) {
259+
prewarmCreateFailedCount.set(0)
260+
}
256261

257262
// Container is initialized
258263
case Initialized(data) =>
@@ -353,26 +358,13 @@ class FunctionPullingContainerPool(
353358
logging.info(
354359
this,
355360
s"${if (replacePrewarm) "failed" else "expired"} prewarm [kind: ${data.kind}, memory: ${data.memoryLimit.toString}] removed")
356-
if (replacePrewarm) {
357-
prewarmCreateFailedCount.get(data.kind, data.memoryLimit) match {
358-
case Some(retry) =>
359-
prewarmCreateFailedCount = prewarmCreateFailedCount + ((data.kind, data.memoryLimit) -> (retry + 1))
360-
case _ =>
361-
prewarmCreateFailedCount = prewarmCreateFailedCount + ((data.kind, data.memoryLimit) -> 1)
362-
}
363-
}
364361
}
365362

366363
//in case this was a starting prewarm
367364
prewarmStartingPool.get(sender()).foreach { data =>
368365
logging.info(this, s"failed starting prewarm [kind: ${data._1}, memory: ${data._2.toString}] removed")
369366
prewarmStartingPool = prewarmStartingPool - sender()
370-
prewarmCreateFailedCount.get(data._1, data._2) match {
371-
case Some(retry) =>
372-
prewarmCreateFailedCount = prewarmCreateFailedCount + ((data._1, data._2) -> (retry + 1))
373-
case _ =>
374-
prewarmCreateFailedCount = prewarmCreateFailedCount + ((data._1, data._2) -> 1)
375-
}
367+
prewarmCreateFailedCount.incrementAndGet()
376368
}
377369

378370
//backfill prewarms on every ContainerRemoved, just in case
@@ -408,7 +400,7 @@ class FunctionPullingContainerPool(
408400

409401
case AdjustPrewarmedContainer =>
410402
// Reset the prewarmCreateCount value when do expiration check and backfill prewarm if possible
411-
prewarmCreateFailedCount = immutable.Map.empty[(String, ByteSize), Int]
403+
prewarmCreateFailedCount.set(0)
412404
adjustPrewarmedContainer(false, true)
413405
}
414406

@@ -442,8 +434,7 @@ class FunctionPullingContainerPool(
442434
val config = c._1
443435
val currentCount = c._2._1
444436
val desiredCount = c._2._2
445-
val retry = prewarmCreateFailedCount.get((config.exec.kind, config.memoryLimit)).getOrElse(0)
446-
if (retry > poolConfig.prewarmMaxRetryLimit) {
437+
if (prewarmCreateFailedCount.get() > poolConfig.prewarmMaxRetryLimit) {
447438
logging.warn(
448439
this,
449440
s"[kind: ${config.exec.kind}, memory: ${config.memoryLimit.toString}] prewarm create failed count exceeds max retry limit: ${poolConfig.prewarmMaxRetryLimit}, currentCount: ${currentCount}, desiredCount: ${desiredCount}")

0 commit comments

Comments
 (0)