Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

raft: making check quorum behaves more like a leader lease #5451

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,12 @@ func (r *raft) Step(m pb.Message) error {
case m.Term > r.Term:
lead := m.From
if m.Type == pb.MsgVote {
if r.state == StateFollower && r.checkQuorum && r.electionElapsed < r.electionTimeout {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think to correctly implement this, we also need pre-vote support.

Or the peer with a higher term will keep on sending vote request. Leader cannot make that peer stable since that peer will always reject the append requests from leader since it has higher term.

Pre-vote can help with this issue since a peer will not increase its term unless it knows there is no leader in the recent election timeout.

Another issue would be peer starting. If a peer get restarted, it will reset the election timeout and cannot vote for the a few seconds or so. If the entire cluster is restarted or is bootstrapped for the first time, there will be a few seconds unavailability.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this limited to followers? As long as checkQuorum is enabled, any node in StateLeader is guaranteed to have been in contact with a majority of nodes within an election timeout, so it shouldn't increase its term either.

If the entire cluster is restarted or is bootstrapped for the first time, there will be a few seconds unavailability.

This is already true: when the entire cluster is restarted, the first election cannot happen until an election timeout (plus randomization) has passed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already true:

Right... Ignore my last point.

r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected vote from %x [logterm: %d, index: %d] at term %d",
r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.From, m.LogTerm, m.Index, r.Term)
r.send(pb.Message{To: m.From, Type: pb.MsgVoteResp, Reject: true})
return nil
}
lead = None
}
r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d]",
Expand Down
88 changes: 88 additions & 0 deletions raft/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1225,6 +1225,94 @@ func TestLeaderStepdownWhenQuorumLost(t *testing.T) {
}
}

func TestLeaderSupersedingWithoutCheckQuorum(t *testing.T) {
a := newTestRaft(1, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
b := newTestRaft(2, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
c := newTestRaft(3, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())

nt := newNetwork(a, b, c)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
if c.state != StateFollower {
t.Errorf("state = %s, want %s", c.state, StateFollower)
}

nt.isolate(1)

nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
if c.state != StateLeader {
t.Errorf("state = %s, want %s", c.state, StateLeader)
}
}

func TestLeaderSupersedingWithCheckQuorum(t *testing.T) {
a := newTestRaft(1, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
b := newTestRaft(2, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
c := newTestRaft(3, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())

a.checkQuorum = true
b.checkQuorum = true
c.checkQuorum = true

nt := newNetwork(a, b, c)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
if c.state != StateFollower {
t.Errorf("state = %s, want %s", c.state, StateFollower)
}

nt.isolate(1)

nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
if c.state != StateCandidate {
t.Errorf("state = %s, want %s", c.state, StateCandidate)
}

// Prevent campaigning from raft b
if b.randomizedElectionTimeout == b.electionTimeout {
b.randomizedElectionTimeout = b.electionTimeout + 1
}

for i := 0; i < b.electionTimeout; i++ {
b.tick()
}

nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
if c.state != StateLeader {
t.Errorf("state = %s, want %s", c.state, StateLeader)
}
}

func TestLeaderElectionWithCheckQuorum(t *testing.T) {
a := newTestRaft(1, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
b := newTestRaft(2, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
c := newTestRaft(3, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())

a.checkQuorum = true
b.checkQuorum = true
c.checkQuorum = true

nt := newNetwork(a, b, c)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})

if b.randomizedElectionTimeout == b.electionTimeout {
b.randomizedElectionTimeout = b.electionTimeout + 1
}

for i := 0; i < b.electionTimeout; i++ {
a.tick()
b.tick()
c.tick()
}

if a.state != StateFollower {
t.Errorf("state = %s, want %s", a.state, StateFollower)
}

nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
if c.state != StateLeader {
t.Errorf("state = %s, want %s", c.state, StateLeader)
}
}

func TestLeaderAppResp(t *testing.T) {
// initial progress: match = 0; next = 3
tests := []struct {
Expand Down