@@ -293,20 +293,12 @@ class InvokerActor(invokerInstance: InvokerInstanceId, controllerInstance: Contr
293
293
294
294
val healthyTimeout : FiniteDuration = 10 .seconds
295
295
296
- // This is done at this point to not intermingle with the state-machine
297
- // especially their timeouts.
296
+ // This is done at this point to not intermingle with the state-machine especially their timeouts.
298
297
def customReceive : Receive = {
299
- case _ : RecordMetadata => // The response of putting testactions to the MessageProducer. We don't have to do anything with them .
298
+ case _ : RecordMetadata => // Ignores the result of publishing test actions to MessageProducer .
300
299
}
301
- override def receive : Receive = customReceive.orElse(super .receive)
302
300
303
- /** Always start UnHealthy. Then the invoker receives some test activations and becomes Healthy. */
304
- startWith(Unhealthy , InvokerInfo (new RingBuffer [InvocationFinishedResult ](InvokerActor .bufferSize)))
305
-
306
- /** An Offline invoker represents an existing but broken invoker. This means, that it does not send pings anymore. */
307
- when(Offline ) {
308
- case Event (_ : PingMessage , _) => goto(Unhealthy )
309
- }
301
+ override def receive : Receive = customReceive.orElse(super .receive)
310
302
311
303
// To be used for all states that should send test actions to reverify the invoker
312
304
val healthPingingState : StateFunction = {
@@ -317,27 +309,43 @@ class InvokerActor(invokerInstance: InvokerInstanceId, controllerInstance: Contr
317
309
stay
318
310
}
319
311
312
+ // To be used for all states that should send test actions to reverify the invoker
313
+ def healthPingingTransitionHandler (state : InvokerState ): TransitionHandler = {
314
+ case _ -> `state` =>
315
+ invokeTestAction()
316
+ setTimer(InvokerActor .timerName, Tick , 1 .minute, repeat = true )
317
+ case `state` -> _ => cancelTimer(InvokerActor .timerName)
318
+ }
319
+
320
+ /** Always start UnHealthy. Then the invoker receives some test activations and becomes Healthy. */
321
+ startWith(Unhealthy , InvokerInfo (new RingBuffer [InvocationFinishedResult ](InvokerActor .bufferSize)))
322
+
323
+ /** An Offline invoker represents an existing but broken invoker. This means, that it does not send pings anymore. */
324
+ when(Offline ) {
325
+ case Event (_ : PingMessage , _) => goto(Unhealthy )
326
+ }
327
+
320
328
/** An Unhealthy invoker represents an invoker that was not able to handle actions successfully. */
321
329
when(Unhealthy , stateTimeout = healthyTimeout)(healthPingingState)
322
330
323
331
/** An Unresponsive invoker represents an invoker that is not responding with active acks in a timely manner */
324
332
when(Unresponsive , stateTimeout = healthyTimeout)(healthPingingState)
325
333
326
334
/**
327
- * A Healthy invoker is characterized by continuously getting pings. It will go offline if that state is not confirmed
328
- * for 20 seconds.
335
+ * A Healthy invoker is characterized by continuously getting pings.
336
+ * It will go offline if that state is not confirmed for 20 seconds.
329
337
*/
330
338
when(Healthy , stateTimeout = healthyTimeout) {
331
339
case Event (_ : PingMessage , _) => stay
332
340
case Event (StateTimeout , _) => goto(Offline )
333
341
}
334
342
335
- /** Handle the completion of an Activation in every state. */
343
+ /** Handles the completion of an Activation in every state. */
336
344
whenUnhandled {
337
345
case Event (cm : InvocationFinishedMessage , info) => handleCompletionMessage(cm.result, info.buffer)
338
346
}
339
347
340
- /** Logging on Transition change */
348
+ /** Logs transition changes. */
341
349
onTransition {
342
350
case _ -> newState if ! newState.isUsable =>
343
351
transid.mark(
@@ -348,14 +356,6 @@ class InvokerActor(invokerInstance: InvokerInstanceId, controllerInstance: Contr
348
356
case _ -> newState if newState.isUsable => logging.info(this , s " $name is ${newState.asString}" )
349
357
}
350
358
351
- // To be used for all states that should send test actions to reverify the invoker
352
- def healthPingingTransitionHandler (state : InvokerState ): TransitionHandler = {
353
- case _ -> `state` =>
354
- invokeTestAction()
355
- setTimer(InvokerActor .timerName, Tick , 1 .minute, repeat = true )
356
- case `state` -> _ => cancelTimer(InvokerActor .timerName)
357
- }
358
-
359
359
onTransition(healthPingingTransitionHandler(Unhealthy ))
360
360
onTransition(healthPingingTransitionHandler(Unresponsive ))
361
361
@@ -372,28 +372,53 @@ class InvokerActor(invokerInstance: InvokerInstanceId, controllerInstance: Contr
372
372
buffer : RingBuffer [InvocationFinishedResult ]) = {
373
373
buffer.add(result)
374
374
375
- // If the action is successful it seems like the Invoker is Healthy again. So we execute immediately
376
- // a new test action to remove the errors out of the RingBuffer as fast as possible.
375
+ // If the action is successful, the Invoker is Healthy. We execute additional test actions
376
+ // immediately to clear the RingBuffer as fast as possible.
377
377
// The actions that arrive while the invoker is unhealthy are most likely health actions.
378
378
// It is possible they are normal user actions as well. This can happen if such actions were in the
379
379
// invoker queue or in progress while the invoker's status flipped to Unhealthy.
380
380
if (result == InvocationFinishedResult .Success && stateName == Unhealthy ) {
381
381
invokeTestAction()
382
382
}
383
383
384
- // Stay in online if the activations was successful.
385
- // Stay in offline, if an activeAck reaches the controller .
384
+ // Stay online if the activations was successful.
385
+ // Stay offline if an activeAck is received (a stale activation) but the invoker ceased pinging .
386
386
if ((stateName == Healthy && result == InvocationFinishedResult .Success ) || stateName == Offline ) {
387
387
stay
388
388
} else {
389
389
val entries = buffer.toList
390
- // Goto Unhealthy or Unresponsive respectively if there are more errors than accepted in buffer, else goto Healthy
390
+
391
+ // Goto Unhealthy or Unresponsive respectively if there are more errors than accepted in buffer at steady state.
392
+ // Otherwise transition to Healthy on successful activations only.
391
393
if (entries.count(_ == InvocationFinishedResult .SystemError ) > InvokerActor .bufferErrorTolerance) {
394
+ // Note: The predicate is false if the ring buffer is still being primed
395
+ // (i.e., the entries.size <= bufferErrorTolerance).
392
396
gotoIfNotThere(Unhealthy )
393
397
} else if (entries.count(_ == InvocationFinishedResult .Timeout ) > InvokerActor .bufferErrorTolerance) {
398
+ // Note: The predicate is false if the ring buffer is still being primed
399
+ // (i.e., the entries.size <= bufferErrorTolerance).
394
400
gotoIfNotThere(Unresponsive )
395
401
} else {
396
- gotoIfNotThere(Healthy )
402
+ result match {
403
+ case InvocationFinishedResult .Success =>
404
+ // Eagerly transition to healthy, at steady state (there aren't sufficient contra-indications) or
405
+ // during priming of the ring buffer. In case of the latter, there is at least one additional test
406
+ // action in flight which can reverse the transition later.
407
+ gotoIfNotThere(Healthy )
408
+
409
+ case InvocationFinishedResult .SystemError if (entries.size <= InvokerActor .bufferErrorTolerance) =>
410
+ // The ring buffer is not fully primed yet, stay/goto Unhealthy
411
+ gotoIfNotThere(Unhealthy )
412
+
413
+ case InvocationFinishedResult .Timeout if (entries.size <= InvokerActor .bufferErrorTolerance) =>
414
+ // The ring buffer is not fully primed yet, stay/goto Unresponsive
415
+ gotoIfNotThere(Unresponsive )
416
+
417
+ case _ =>
418
+ // at steady state, the state of the buffer superceded and we hold the current state
419
+ // until enough events have occured to transition to a new state
420
+ stay
421
+ }
397
422
}
398
423
}
399
424
}
0 commit comments