valkey-io · hpatro · Apr 7, 2025 · Apr 8, 2025 · Apr 9, 2025 · Apr 9, 2025
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
@@ -2146,12 +2146,21 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
         clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_SAVE_CONFIG);
     }
 
+    /* If any of the replica of a given primary can't failover, then immediately mark it as alive. */
+    int cant_failover = 1;
+    for (int j = 0; j < node->num_replicas; j++) {
+        if (!clusterNodeIsNoFailover(node->replicas[j])) {
+            cant_failover = 0;
+            break;
+        }
+    }
+
     /* If it is a primary and...
      * 1) The FAIL state is old enough.
      * 2) It is yet serving slots from our point of view (not failed over).
      * Apparently no one is going to fix these slots, clear the FAIL flag. */
     if (clusterNodeIsVotingPrimary(node) &&
-        (now - node->fail_time) > (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT)) {
+        ((now - node->fail_time) > (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT) || cant_failover)) {
         serverLog(
             LL_NOTICE,
             "Clear FAIL state for node %.40s (%s): is reachable again and nobody is serving its slots after some time.",
@@ -4735,6 +4744,10 @@ void clusterLogCantFailover(int reason) {
     case CLUSTER_CANT_FAILOVER_WAITING_DELAY: msg = "Waiting the delay before I can start a new failover."; break;
     case CLUSTER_CANT_FAILOVER_EXPIRED: msg = "Failover attempt expired."; break;
     case CLUSTER_CANT_FAILOVER_WAITING_VOTES: msg = "Waiting for votes, but majority still not reached."; break;
+    case CLUSTER_CANT_FAILOVER_DISABLED:
+        msg = "Failover has been disabled. "
+              "Please check the 'cluster-replica-no-failover' configuration option";
+        break;
     default: serverPanic("Unknown cant failover reason code.");
     }
     lastlog_time = time(NULL);
@@ -4827,14 +4840,19 @@ void clusterHandleReplicaFailover(void) {
      * 3) We don't have the no failover configuration set, and this is
      *    not a manual failover. */
     if (clusterNodeIsPrimary(myself) || myself->replicaof == NULL ||
-        (!nodeFailed(myself->replicaof) && !manual_failover) ||
-        (server.cluster_replica_no_failover && !manual_failover)) {
+        (!nodeFailed(myself->replicaof) && !manual_failover)) {
         /* There are no reasons to failover, so we set the reason why we
          * are returning without failing over to NONE. */
         server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
         return;
     }
 
+    if (server.cluster_replica_no_failover && !manual_failover) {
+        server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_DISABLED;
+        clusterLogCantFailover(CLUSTER_CANT_FAILOVER_DISABLED);
+        return;
+    }
+
     /* Set data_age to the number of milliseconds we are disconnected from
      * the primary. */
     if (server.repl_state == REPL_STATE_CONNECTED) {
@@ -6602,7 +6620,7 @@ int clusterNodeIsFailing(clusterNode *node) {
 }
 
 int clusterNodeIsNoFailover(clusterNode *node) {
-    return node->flags & CLUSTER_NODE_NOFAILOVER;
+    return nodeCantFailover(node);
 }
 
 const char **clusterDebugCommandExtendedHelp(void) {

diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h
@@ -16,6 +16,7 @@
 #define CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
 #define CLUSTER_CANT_FAILOVER_EXPIRED 3
 #define CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
+#define CLUSTER_CANT_FAILOVER_DISABLED 5
 #define CLUSTER_CANT_FAILOVER_RELOG_PERIOD 1 /* seconds. */
 
 /* clusterState todo_before_sleep flags. */

diff --git a/tests/unit/cluster/failover2.tcl b/tests/unit/cluster/failover2.tcl
@@ -101,6 +101,39 @@ start_cluster 7 3 {tags {external:skip cluster} overrides {cluster-ping-interval
     }
 } ;# start_cluster
 
+start_cluster 3 3 {tags {external:skip cluster hp} overrides {cluster-ping-interval 1000 cluster-node-timeout 5000 cluster-replica-no-failover yes}} {
+    # Killing one primary node.
+    pause_process [srv 0 pid]
+
+    test "no failover - verify replica is not promoted if failover has been disabled" {
+        # Observe no failover
+        wait_for_log_messages -3 {"*Currently unable to failover: Failover has been disabled*"} 0 2000 50
+    }
+
+    test "no failover - primary is in failed state" {
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            if {[process_is_paused [srv -$j pid]]} continue
+            wait_for_condition 1000 50 {
+                [CI $j cluster_state] eq "fail"
+            } else {
+                fail "Cluster node $j cluster_state:[CI $j cluster_state]"
+            }
+        }
+    }
+
+    resume_process [srv 0 pid]
+
+    test "no failover - cluster is in healthy state" {
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            wait_for_condition 1000 50 {
+                [CI $j cluster_state] eq "ok"
+            } else {
+                fail "Cluster node $j cluster_state:[CI $j cluster_state]"
+            }
+        }
+    }
+} ;# start_cluster
+
 run_solo {cluster} {
     start_cluster 32 15 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 15000}} {
         test "Multiple primary nodes are down, rank them based on the failed primary" {