From 6d111704f833670192b01b9d236ac62e4fa7675f Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 1 Apr 2024 18:09:59 +0200 Subject: [PATCH] Draft reproduce issue #17529 Signed-off-by: Marek Siarkowicz --- go.mod | 2 +- go.sum | 4 +-- server/storage/mvcc/watchable_store.go | 5 ---- tests/framework/e2e/etcd_process.go | 3 +-- tests/go.mod | 2 +- tests/go.sum | 4 +-- tests/robustness/failpoint/failpoint.go | 29 +++++++++++---------- tests/robustness/failpoint/gofail.go | 1 + tests/robustness/main_test.go | 4 +-- tests/robustness/scenarios.go | 34 ++++++++++++++++--------- tests/robustness/traffic/kubernetes.go | 6 ++--- tests/robustness/traffic/traffic.go | 2 +- 12 files changed, 51 insertions(+), 45 deletions(-) diff --git a/go.mod b/go.mod index 28bfa5269ab8..33a4aa10c6dd 100644 --- a/go.mod +++ b/go.mod @@ -77,7 +77,7 @@ require ( github.com/spf13/pflag v1.0.5 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect - go.etcd.io/gofail v0.1.0 // indirect + go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 // indirect go.opentelemetry.io/otel v1.27.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect diff --git a/go.sum b/go.sum index 7b7e9109b144..f39ea94d0d59 100644 --- a/go.sum +++ b/go.sum @@ -134,8 +134,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4= go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw= -go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg= -go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA= go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns= go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck= diff --git a/server/storage/mvcc/watchable_store.go b/server/storage/mvcc/watchable_store.go index ad17b2be7ace..a67bf1111957 100644 --- a/server/storage/mvcc/watchable_store.go +++ b/server/storage/mvcc/watchable_store.go @@ -372,11 +372,6 @@ func (s *watchableStore) syncWatchers() int { victims := make(watcherBatch) wb := newWatcherBatch(wg, evs) for w := range wg.watchers { - if w.minRev < compactionRev { - // Skip the watcher that failed to send compacted watch response due to w.ch is full. - // Next retry of syncWatchers would try to resend the compacted watch response to w.ch - continue - } w.minRev = curRev + 1 eb, ok := wb[w] diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index 31aac3a55f7c..f913efffeec3 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -400,8 +400,7 @@ func (f *BinaryFailpoints) DeactivateHTTP(ctx context.Context, failpoint string) return err } httpClient := http.Client{ - // TODO: Decrease after deactivate is not blocked by sleep https://github.com/etcd-io/gofail/issues/64 - Timeout: 2 * time.Second, + Timeout: time.Second, } if f.clientTimeout != 0 { httpClient.Timeout = f.clientTimeout diff --git a/tests/go.mod b/tests/go.mod index 4e8514285025..f7512f390348 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -35,7 +35,7 @@ require ( go.etcd.io/etcd/etcdutl/v3 v3.6.0-alpha.0 go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0 go.etcd.io/etcd/server/v3 v3.6.0-alpha.0 - go.etcd.io/gofail v0.1.0 + go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 go.etcd.io/raft/v3 v3.6.0-alpha.0 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 go.opentelemetry.io/otel v1.27.0 diff --git a/tests/go.sum b/tests/go.sum index 60b2b291297b..d8c67da2a724 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -138,8 +138,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4= go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw= -go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg= -go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA= go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns= go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck= diff --git a/tests/robustness/failpoint/failpoint.go b/tests/robustness/failpoint/failpoint.go index 14e6ddf7e940..c1b4000989c1 100644 --- a/tests/robustness/failpoint/failpoint.go +++ b/tests/robustness/failpoint/failpoint.go @@ -36,20 +36,21 @@ const ( var ( allFailpoints = []Failpoint{ - KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, - DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic, - BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic, - BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic, - CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic, - CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork, - RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic, - RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot, - BeforeApplyOneConfChangeSleep, - MemberReplace, - DropPeerNetwork, - RaftBeforeSaveSleep, - RaftAfterSaveSleep, - ApplyBeforeOpenSnapshot, + //KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, + //DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic, + //BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic, + //BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic, + //CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic, + //CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork, + //RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic, + //RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot, + //BeforeApplyOneConfChangeSleep, + //MemberReplace, + //DropPeerNetwork, + //RaftBeforeSaveSleep, + //RaftAfterSaveSleep, + //ApplyBeforeOpenSnapshot, + sleepBeforeSendWatchResponse, } ) diff --git a/tests/robustness/failpoint/gofail.go b/tests/robustness/failpoint/gofail.go index b6218edb9af9..0ea2acc28e2c 100644 --- a/tests/robustness/failpoint/gofail.go +++ b/tests/robustness/failpoint/gofail.go @@ -59,6 +59,7 @@ var ( BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second} RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second} RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second} + SleepBeforeSendWatchResponse Failpoint = gofailSleepAndDeactivate{"beforeSendWatchResponse", time.Second} ) type goPanicFailpoint struct { diff --git a/tests/robustness/main_test.go b/tests/robustness/main_test.go index a4b6d763a29f..87c20a3e6132 100644 --- a/tests/robustness/main_test.go +++ b/tests/robustness/main_test.go @@ -94,8 +94,8 @@ func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testSce t.Fatal(err) } - watchProgressNotifyEnabled := r.Cluster.Cfg.ServerConfig.ExperimentalWatchProgressNotifyInterval != 0 - validateGotAtLeastOneProgressNotify(t, r.Client, s.watch.requestProgress || watchProgressNotifyEnabled) + //watchProgressNotifyEnabled := r.Cluster.Cfg.ServerConfig.ExperimentalWatchProgressNotifyInterval != 0 + //validateGotAtLeastOneProgressNotify(t, r.Client, s.watch.requestProgress || watchProgressNotifyEnabled) validateConfig := validate.Config{ExpectRevisionUnique: s.traffic.ExpectUniqueRevision()} r.Visualize = validate.ValidateAndReturnVisualize(t, lg, validateConfig, r.Client, persistedRequests, 5*time.Minute) diff --git a/tests/robustness/scenarios.go b/tests/robustness/scenarios.go index 57e0bd204438..be0da29debb3 100644 --- a/tests/robustness/scenarios.go +++ b/tests/robustness/scenarios.go @@ -33,22 +33,22 @@ type TrafficProfile struct { } var trafficProfiles = []TrafficProfile{ - { - Traffic: traffic.EtcdPut, - Profile: traffic.HighTrafficProfile, - }, - { - Traffic: traffic.EtcdPutDeleteLease, - Profile: traffic.LowTraffic, - }, + //{ + // Traffic: traffic.EtcdPut, + // Profile: traffic.HighTrafficProfile, + //}, + //{ + // Traffic: traffic.EtcdPutDeleteLease, + // Profile: traffic.LowTraffic, + //}, { Traffic: traffic.Kubernetes, Profile: traffic.HighTrafficProfile, }, - { - Traffic: traffic.Kubernetes, - Profile: traffic.LowTraffic, - }, + //{ + // Traffic: traffic.Kubernetes, + // Profile: traffic.LowTraffic, + //}, } type testScenario struct { @@ -191,6 +191,16 @@ func regressionScenarios(t *testing.T) []testScenario { e2e.WithClusterSize(1), ), }) + scenarios = append(scenarios, testScenario{ + name: "Issue17529", + profile: traffic.HighTrafficProfile, + traffic: traffic.Kubernetes, + failpoint: failpoint.SleepBeforeSendWatchResponse, + cluster: *e2e.NewConfig( + e2e.WithClusterSize(1), + e2e.WithGoFailEnabled(true), + ), + }) if v.Compare(version.V3_5) >= 0 { opts := []e2e.EPClusterOption{ e2e.WithSnapshotCount(100), diff --git a/tests/robustness/traffic/kubernetes.go b/tests/robustness/traffic/kubernetes.go index e38dcbc3dd9a..d4aea9ba7edb 100644 --- a/tests/robustness/traffic/kubernetes.go +++ b/tests/robustness/traffic/kubernetes.go @@ -37,9 +37,9 @@ var ( resource: "pods", namespace: "default", writeChoices: []choiceWeight[KubernetesRequestType]{ - {choice: KubernetesUpdate, weight: 85}, - {choice: KubernetesDelete, weight: 5}, - {choice: KubernetesCreate, weight: 5}, + {choice: KubernetesUpdate, weight: 75}, + {choice: KubernetesDelete, weight: 10}, + {choice: KubernetesCreate, weight: 10}, {choice: KubernetesCompact, weight: 5}, }, } diff --git a/tests/robustness/traffic/traffic.go b/tests/robustness/traffic/traffic.go index e7f293a14c61..31fec7fa7603 100644 --- a/tests/robustness/traffic/traffic.go +++ b/tests/robustness/traffic/traffic.go @@ -34,7 +34,7 @@ import ( var ( DefaultLeaseTTL int64 = 7200 RequestTimeout = 200 * time.Millisecond - WatchTimeout = 400 * time.Millisecond + WatchTimeout = time.Second MultiOpTxnOpCount = 4 LowTraffic = Profile{