Skip to content

Updating stepStateMachine to use stateTransitionHookFunc to get next retry wait time for local state retries #155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 29, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ struct __StateMachineState {
GetNextStateFunc getNextStateFn;
ExecuteStateFunc executeStateFn;
StateTransitionHookFunc stateTransitionHookFunc;
UINT32 retry;
UINT32 maxLocalStateRetryCount;
STATUS status;
};
typedef struct __StateMachineState* PStateMachineState;
Expand Down
9 changes: 7 additions & 2 deletions src/state/src/Include_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@ extern "C" {
typedef struct __StateMachineContext StateMachineContext;
struct __StateMachineContext {
PStateMachineState pCurrentState;
UINT32 retryCount;
UINT64 time;
// Current retry count for a given state.
// This is incremented only for retries happening within the same state. Once the
// state transitions to a different state, localRetryCount is reset to 0.
UINT32 localStateRetryCount;
// Wait time before transitioning to next state.
// Next state could be the same state OR a different state
UINT64 stateTransitionWaitTime;
};
typedef struct __StateMachineContext* PStateMachineContext;

Expand Down
53 changes: 29 additions & 24 deletions src/state/src/State.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ STATUS stepStateMachine(PStateMachine pStateMachine)
UINT64 nextState, time;
UINT64 customData;
PStateMachineImpl pStateMachineImpl = (PStateMachineImpl) pStateMachine;
UINT64 errorStateTransitionWaitTime;
UINT64 errorStateTransitionWaitTime = 0;

CHK(pStateMachineImpl != NULL, STATUS_NULL_ARG);
customData = pStateMachineImpl->customData;
Expand All @@ -153,37 +153,42 @@ STATUS stepStateMachine(PStateMachine pStateMachine)
// Clear the iteration info if a different state and transition the state
time = pStateMachineImpl->getCurrentTimeFunc(pStateMachineImpl->getCurrentTimeFuncCustomData);

// This stateTransitionHookFunc will return state transition wait time if the state transition is happening for non 200 service response
// For 200 service response, errorStateTransitionWaitTime will be 0.
if (pStateMachineImpl->context.pCurrentState->stateTransitionHookFunc != NULL) {
CHK_STATUS(pStateMachineImpl->context.pCurrentState->stateTransitionHookFunc(pStateMachineImpl->customData, &errorStateTransitionWaitTime));
}

// Check if we are changing the state
if (pState->state != pStateMachineImpl->context.pCurrentState->state) {
// Clear the iteration data
pStateMachineImpl->context.retryCount = 0;
pStateMachineImpl->context.time = time;

// Call the state machine error handler
// This call could be a no-op if the state transition is happening for SERVICE_CALL_RESULT_OK code
if (pStateMachineImpl->context.pCurrentState->stateTransitionHookFunc != NULL) {
CHK_STATUS(pStateMachineImpl->context.pCurrentState->stateTransitionHookFunc(pStateMachineImpl->customData, &errorStateTransitionWaitTime));
pStateMachineImpl->context.time = time + errorStateTransitionWaitTime;
}

DLOGD("State Machine - Current state: 0x%016" PRIx64 ", Next state: 0x%016" PRIx64, pStateMachineImpl->context.pCurrentState->state,
nextState);
// Since we're transitioning to a different state from this state, reset the local state retry count to0
pStateMachineImpl->context.localStateRetryCount = 0;
} else {
// Increment the state retry count
pStateMachineImpl->context.retryCount++;
pStateMachineImpl->context.time = time + NEXT_SERVICE_CALL_RETRY_DELAY(pStateMachineImpl->context.retryCount);
// Increment the local state retry count.
// Local state retry count determines the number of retries done within the same state.
pStateMachineImpl->context.localStateRetryCount++;
}

// Check if we have tried enough times
if (pState->retry != INFINITE_RETRY_COUNT_SENTINEL) {
CHK(pStateMachineImpl->context.retryCount <= pState->retry, pState->status);
}
DLOGD("State Machine - Current state: 0x%016" PRIx64 ", Next state: 0x%016" PRIx64 ", "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest making this VERBOSE. stepStateMachine() is invoked frequently in higher layers, this would create a spew of logs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"Current local state retry count [%u], Max local state retry count [%u], State transition wait time [" PRIx64 "] ms",
pStateMachineImpl->context.pCurrentState->state,
nextState,
pStateMachineImpl->context.localStateRetryCount,
pState->maxLocalStateRetryCount,
errorStateTransitionWaitTime/HUNDREDS_OF_NANOS_IN_A_MILLISECOND);

// Check if we have tried enough times within the same state
if (pState->maxLocalStateRetryCount != INFINITE_RETRY_COUNT_SENTINEL) {
CHK(pStateMachineImpl->context.localStateRetryCount <= pState->maxLocalStateRetryCount, pState->status);
}

pStateMachineImpl->context.stateTransitionWaitTime = time + errorStateTransitionWaitTime;
pStateMachineImpl->context.pCurrentState = pState;

// Execute the state function if specified
// The executeStateFn callback is expected to wait for stateTransitionWaitTime before executing the actual logic
if (pStateMachineImpl->context.pCurrentState->executeStateFn != NULL) {
CHK_STATUS(pStateMachineImpl->context.pCurrentState->executeStateFn(pStateMachineImpl->customData, pStateMachineImpl->context.time));
CHK_STATUS(pStateMachineImpl->context.pCurrentState->executeStateFn(pStateMachineImpl->customData, pStateMachineImpl->context.stateTransitionWaitTime));
}

CleanUp:
Expand Down Expand Up @@ -247,8 +252,8 @@ STATUS resetStateMachineRetryCount(PStateMachine pStateMachine)
CHK(pStateMachineImpl != NULL, STATUS_NULL_ARG);

// Reset the state
pStateMachineImpl->context.retryCount = 0;
pStateMachineImpl->context.time = 0;
pStateMachineImpl->context.localStateRetryCount = 0;
pStateMachineImpl->context.stateTransitionWaitTime = 0;

CleanUp:

Expand Down
16 changes: 8 additions & 8 deletions src/state/tst/StateApiFunctionalityTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ TEST_F(StateApiFunctionalityTest, checkStateIterationFailure)
}

// Validate the retry
EXPECT_EQ(SERVICE_CALL_MAX_RETRY_COUNT + 10, pStateMachineImpl->context.retryCount);
EXPECT_EQ(SERVICE_CALL_MAX_RETRY_COUNT + 10, pStateMachineImpl->context.localStateRetryCount);
EXPECT_EQ(STATUS_SUCCESS, resetStateMachineRetryCount(mStateMachine));
EXPECT_EQ(0, pStateMachineImpl->context.retryCount);
EXPECT_EQ(0, pStateMachineImpl->context.localStateRetryCount);

// Step to next state
mTestTransitions[0].nextState = TEST_STATE_1;
Expand Down Expand Up @@ -165,7 +165,7 @@ TEST_F(StateApiFunctionalityTest, checkStateErrorHandlerOnStateTransitions)
EXPECT_EQ(1, this->testErrorHandlerMetaData.errorHandlerExecutionCount);
// State's local retry count should be zero since no retries are
// happening within this state.
EXPECT_EQ(0, pStateMachineImpl->context.retryCount);
EXPECT_EQ(0, pStateMachineImpl->context.localStateRetryCount);

// Mock a service API call response to be 200 OK in state 1 before
// transitioning to state 2. Verify that the error handler's execution
Expand All @@ -179,7 +179,7 @@ TEST_F(StateApiFunctionalityTest, checkStateErrorHandlerOnStateTransitions)
EXPECT_EQ(1, this->testErrorHandlerMetaData.errorHandlerExecutionCount);
// State's local retry count should be zero since no retries are
// happening within this state.
EXPECT_EQ(0, pStateMachineImpl->context.retryCount);
EXPECT_EQ(0, pStateMachineImpl->context.localStateRetryCount);

// Mock a service API call response to be a timeout in state 2 before
// transitioning to state 3. Verify that the error handler's business
Expand All @@ -193,7 +193,7 @@ TEST_F(StateApiFunctionalityTest, checkStateErrorHandlerOnStateTransitions)
EXPECT_EQ(2, this->testErrorHandlerMetaData.errorHandlerExecutionCount);
// State's local retry count should be zero since no retries are
// happening within this state.
EXPECT_EQ(0, pStateMachineImpl->context.retryCount);
EXPECT_EQ(0, pStateMachineImpl->context.localStateRetryCount);

// Mock a service API call response to be a timeout in state 3 before
// transitioning to state 3 (same state). Verify that the error was
Expand All @@ -207,7 +207,7 @@ TEST_F(StateApiFunctionalityTest, checkStateErrorHandlerOnStateTransitions)
// Error handler execution count did not increment
EXPECT_EQ(2, this->testErrorHandlerMetaData.errorHandlerExecutionCount);
// State's local retry count should be 1 since we retried once within TEST_STATE_3
EXPECT_EQ(1, pStateMachineImpl->context.retryCount);
EXPECT_EQ(1, pStateMachineImpl->context.localStateRetryCount);

// Mock a service API call response to be 200 OK in state 3 before
// transitioning to state 2. Verify that the error handler's execution
Expand All @@ -221,7 +221,7 @@ TEST_F(StateApiFunctionalityTest, checkStateErrorHandlerOnStateTransitions)
EXPECT_EQ(2, this->testErrorHandlerMetaData.errorHandlerExecutionCount);
// State's local retry count should be zero since no retries are
// happening within this state.
EXPECT_EQ(0, pStateMachineImpl->context.retryCount);
EXPECT_EQ(0, pStateMachineImpl->context.localStateRetryCount);

// Mock a service API call response to be a timeout in state 2 before
// transitioning to state 3. Verify that the error handler's business
Expand All @@ -235,5 +235,5 @@ TEST_F(StateApiFunctionalityTest, checkStateErrorHandlerOnStateTransitions)
EXPECT_EQ(3, this->testErrorHandlerMetaData.errorHandlerExecutionCount);
// State's local retry count should be zero since no retries are
// happening within this state.
EXPECT_EQ(0, pStateMachineImpl->context.retryCount);
EXPECT_EQ(0, pStateMachineImpl->context.localStateRetryCount);
}
4 changes: 2 additions & 2 deletions src/utils/src/ExponentialBackoffRetryStrategy.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,9 @@ STATUS getExponentialBackoffRetryStrategyWaitTime(PRetryStrategy pRetryStrategy,

DLOGD("\n Thread Id [%"PRIu64"] "
"Number of retries [%"PRIu64"], "
"Retry wait time [%"PRIu64"], "
"Retry wait time [%"PRIu64"] ms, "
"Retry system time [%"PRIu64"]",
GETTID(), pRetryState->currentRetryCount, pRetryState->lastRetryWaitTime, pRetryState->lastRetrySystemTime);
GETTID(), pRetryState->currentRetryCount, pRetryState->lastRetryWaitTime/HUNDREDS_OF_NANOS_IN_A_MILLISECOND, pRetryState->lastRetrySystemTime);

CleanUp:
if (retStatus == STATUS_EXPONENTIAL_BACKOFF_RETRIES_EXHAUSTED) {
Expand Down