Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hotfix for v2.14: BCI-4095 rpc deadlock #14243

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/curly-birds-guess.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"chainlink": patch
---

Fixed deadlock in RPCClient causing CL Node to stop performing RPC requests for the affected chain #bugfix
15 changes: 9 additions & 6 deletions core/chains/evm/client/rpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ type rpcClient struct {
// stateMu since it can happen on state transitions as well as rpcClient Close.
chStopInFlight chan struct{}

chainInfoLock sync.RWMutex
// intercepted values seen by callers of the rpcClient excluding health check calls. Need to ensure MultiNode provides repeatable read guarantee
highestUserObservations commonclient.ChainInfo
// most recent chain info observed during current lifecycle (reseted on DisconnectAll)
Expand Down Expand Up @@ -318,7 +319,9 @@ func (r *rpcClient) DisconnectAll() {
}
r.cancelInflightRequests()
r.unsubscribeAll()
r.chainInfoLock.Lock()
r.latestChainInfo = commonclient.ChainInfo{}
r.chainInfoLock.Unlock()
}

// unsubscribeAll unsubscribes all subscriptions
Expand Down Expand Up @@ -1203,8 +1206,8 @@ func (r *rpcClient) onNewHead(ctx context.Context, requestCh <-chan struct{}, he
return
}

r.stateMu.Lock()
defer r.stateMu.Unlock()
r.chainInfoLock.Lock()
defer r.chainInfoLock.Unlock()
if !commonclient.CtxIsHeathCheckRequest(ctx) {
r.highestUserObservations.BlockNumber = max(r.highestUserObservations.BlockNumber, head.Number)
r.highestUserObservations.TotalDifficulty = commonclient.MaxTotalDifficulty(r.highestUserObservations.TotalDifficulty, head.TotalDifficulty)
Expand All @@ -1222,8 +1225,8 @@ func (r *rpcClient) onNewFinalizedHead(ctx context.Context, requestCh <-chan str
if head == nil {
return
}
r.stateMu.Lock()
defer r.stateMu.Unlock()
r.chainInfoLock.Lock()
defer r.chainInfoLock.Unlock()
if !commonclient.CtxIsHeathCheckRequest(ctx) {
r.highestUserObservations.FinalizedBlockNumber = max(r.highestUserObservations.FinalizedBlockNumber, head.Number)
}
Expand All @@ -1236,8 +1239,8 @@ func (r *rpcClient) onNewFinalizedHead(ctx context.Context, requestCh <-chan str
}

func (r *rpcClient) GetInterceptedChainInfo() (latest, highestUserObservations commonclient.ChainInfo) {
r.stateMu.RLock()
defer r.stateMu.RUnlock()
r.chainInfoLock.RLock()
defer r.chainInfoLock.RUnlock()
return r.latestChainInfo, r.highestUserObservations
}

Expand Down
27 changes: 27 additions & 0 deletions core/chains/evm/client/rpc_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"math/big"
"net/url"
"sync"
"testing"

"github.com/ethereum/go-ethereum"
Expand Down Expand Up @@ -126,6 +127,32 @@ func TestRPCClient_SubscribeNewHead(t *testing.T) {
assert.Equal(t, int64(0), highestUserObservations.FinalizedBlockNumber)
assert.Equal(t, (*big.Int)(nil), highestUserObservations.TotalDifficulty)
})
t.Run("Concurrent Unsubscribe and onNewHead calls do not lead to a deadlock", func(t *testing.T) {
const numberOfAttempts = 1000 // need a large number to increase the odds of reproducing the issue
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()

rpc := client.NewRPCClient(lggr, *wsURL, nil, "rpc", 1, chainId, commonclient.Primary)
defer rpc.Close()
require.NoError(t, rpc.Dial(ctx))
var wg sync.WaitGroup
for i := 0; i < numberOfAttempts; i++ {
ch := make(chan *evmtypes.Head)
sub, err := rpc.SubscribeNewHead(tests.Context(t), ch)
require.NoError(t, err)
wg.Add(2)
go func() {
server.MustWriteBinaryMessageSync(t, makeNewHeadWSMessage(&evmtypes.Head{Number: 256, TotalDifficulty: big.NewInt(1000)}))
wg.Done()
}()
go func() {
rpc.UnsubscribeAllExceptAliveLoop()
sub.Unsubscribe()
wg.Done()
}()
wg.Wait()
}
})
t.Run("Block's chain ID matched configured", func(t *testing.T) {
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()
Expand Down
Loading