hashicorp · kolesnikovae · Apr 3, 2025 · kolesnikovae · Apr 4, 2025 · kolesnikovae
@@ -36,6 +36,12 @@ type IndexFuture interface {
 type ApplyFuture interface {
 	IndexFuture
 
+	// WaitCommitted blocks until the log entry has been committed to quorum.
+	// It does not wait for FSM application.
+	// The error returned follows the same semantics as the Error method,
+	// except for errors that occur after the log entry has been committed.
+	WaitCommitted() error
+
 	// Response returns the FSM response as returned by the FSM.Apply method. This
 	// must not be called until after the Error method has returned.
 	// Note that if FSM.Apply returns an error, it will be returned by Response,
@@ -87,6 +93,10 @@ func (e errorFuture) Index() uint64 {
 	return 0
 }
 
+func (e errorFuture) WaitCommitted() error {
+	return e.err
+}
+
 // deferError can be embedded to allow a future
 // to provide an error in the future.
 type deferError struct {
@@ -151,9 +161,15 @@ type bootstrapFuture struct {
 // the log is considered committed.
 type logFuture struct {
 	deferError
-	log      Log
-	response interface{}
-	dispatch time.Time
+	log       Log
+	response  interface{}
+	dispatch  time.Time
+	committed chan struct{}
+}
+
+func (l *logFuture) init() {
+	l.committed = make(chan struct{})
+	l.deferError.init()
 }
 
 func (l *logFuture) Response() interface{} {
@@ -164,6 +180,45 @@ func (l *logFuture) Index() uint64 {
 	return l.log.Index
 }
 
+func (l *logFuture) WaitCommitted() error {
+	select {
+	default:
+	case <-l.committed:
+		// If the entry is committed, errors are irrelevant because quorum
+		// agreement ensures safety. If an error occurs before commitment,
+		// it must be returned (e.g., leadership loss).
+		return nil
+	}
+	if l.err == nil {
+		if l.errCh == nil {
+			panic("waiting for response on nil channel")
+		}
+		select {
+		case <-l.committed:
+		case l.err = <-l.errCh:
+			// If the error is nil, it means that the command
+			// has been applied to the FSM already. The l.committed
+			// channel is also closed as the command is guaranteed
+			// to be committed.
+			//
+			// In this case, if Error is called after WaitCommitted,
+			// the method will not block, and the caller will be
+			// able to access the response safely. Otherwise, if the
+			// response has not been received yet, Error will block.
+			//
+			// The same is true for the ShutdownCh.
+		case <-l.ShutdownCh:
+			l.err = ErrRaftShutdown
+		}
+	}
+	select {
+	case <-l.committed:
+		return nil
+	default:
+		return l.err
+	}
+}
+
 type shutdownFuture struct {
 	raft *Raft
 }

@@ -43,3 +43,35 @@ func TestDeferFutureConcurrent(t *testing.T) {
 		t.Errorf("unexpected error result; got %#v want %#v", got, want)
 	}
 }
+
+func TestLogFutureWaitCommittedError(t *testing.T) {
+	assert := func(t *testing.T, want error, fn func() error) {
+		t.Helper()
+		if got := fn(); got != want {
+			t.Fatalf("unexpected error result; got %#v want %#v", got, want)
+		}
+	}
+
+	t.Run("ErrorBeforeCommitted", func(t *testing.T) {
+		want := errors.New("x")
+		var f logFuture
+		f.init()
+		f.respond(want)
+		assert(t, want, f.WaitCommitted)
+		assert(t, want, f.WaitCommitted)
+		assert(t, want, f.Error)
+		assert(t, want, f.Error)
+	})
+
+	t.Run("ErrorAfterCommitted", func(t *testing.T) {
+		want := errors.New("x")
+		var f logFuture
+		f.init()
+		close(f.committed)
+		f.respond(want)
+		assert(t, nil, f.WaitCommitted)
+		assert(t, want, f.Error)
+		assert(t, nil, f.WaitCommitted)
+		assert(t, want, f.Error)
+	})
+}
@@ -569,6 +569,7 @@ func (r *Raft) runLeader() {
 	// maintain that there exists at most one uncommitted configuration entry in
 	// any log, so we have to do proper no-ops here.
 	noop := &logFuture{log: Log{Type: LogNoop}}
+	noop.init()
 	r.dispatchLogs([]*logFuture{noop})
 
 	// Sit in the leader loop until we step down
@@ -818,6 +819,7 @@ func (r *Raft) leaderLoop() {
 				groupReady = append(groupReady, e)
 				groupFutures[idx] = commitLog
 				lastIdxInGroup = idx
+				close(commitLog.committed)
 			}
 
 			// Process the group

@@ -590,6 +590,62 @@ func TestRaft_ApplyConcurrent_Timeout(t *testing.T) {
 	t.Fatalf("Timeout waiting to detect apply timeouts")
 }
 
+func TestRaft_WaitCommittedConcurrent(t *testing.T) {
+	// Make the cluster
+	conf := inmemConfig(t)
+	conf.HeartbeatTimeout = 2 * conf.HeartbeatTimeout
+	conf.ElectionTimeout = 2 * conf.ElectionTimeout
+	c := MakeCluster(3, t, conf)
+	defer c.Close()
+
+	// Wait for a leader
+	leader := c.Leader()
+
+	// Create a wait group
+	const sz = 100
+	var group sync.WaitGroup
+	group.Add(sz)
+
+	applyF := func(i int) {
+		defer group.Done()
+		future := leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0)
+		if err := future.WaitCommitted(); err != nil {
+			c.Failf("[ERR] err: %v", err)
+		}
+	}
+
+	// Concurrently apply
+	for i := 0; i < sz; i++ {
+		go applyF(i)
+	}
+
+	// Wait to finish
+	doneCh := make(chan struct{})
+	go func() {
+		defer close(doneCh)
+		group.Wait()
+		// Unlike Apply, WaitCommitted does not wait for the FSM to apply
+		// the commands. Therefore, we explicitly wait until the committed
+		// commands are replicated and applied to all the replicas' FSM.
+		// WaitForReplication times out after longstopTimeout.
+		c.WaitForReplication(sz)
+	}()
+	select {
+	case <-doneCh:
+	case <-time.After(c.longstopTimeout):
+		t.Fatalf("timeout")
+	}
+
+	// If anything failed up to this point then bail now, rather than do a
+	// confusing compare.
+	if t.Failed() {
+		t.Fatalf("One or more of the apply operations failed")
+	}
+
+	// Check the FSMs
+	c.EnsureSame(t)
+}
+
 func TestRaft_JoinNode(t *testing.T) {
 	// Make a cluster
 	c := MakeCluster(2, t, nil)