client: Support parallel TSO RPC requests on single dispatcher loop (#…

…8633) close #8432 client: Support parallel TSO RPC requests on single dispatcher loop This commit supports handling multiple TSO RPC concurrently in one single dispatcher loop to reduce the expected time that each GetTS call spent on waiting the next batch. Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
tikv · Sep 26, 2024 · 642f0e9 · 642f0e9
1 parent 7907679
commit 642f0e9
Show file tree

Hide file tree

Showing 9 changed files with 828 additions and 50 deletions.
diff --git a/client/client.go b/client/client.go
@@ -797,6 +797,12 @@ func (c *client) UpdateOption(option DynamicOption, value any) error {
 			return errors.New("[pd] invalid value type for EnableFollowerHandle option, it should be bool")
 		}
 		c.option.setEnableFollowerHandle(enable)
+	case TSOClientRPCConcurrency:
+		value, ok := value.(int)
+		if !ok {
+			return errors.New("[pd] invalid value type for TSOClientRPCConcurrency option, it should be int")
+		}
+		c.option.setTSOClientRPCConcurrency(value)
 	default:
 		return errors.New("[pd] unsupported client option")
 	}

diff --git a/client/metrics.go b/client/metrics.go
@@ -47,6 +47,7 @@ var (
 	tsoBatchSendLatency      prometheus.Histogram
 	requestForwarded         *prometheus.GaugeVec
 	ongoingRequestCountGauge *prometheus.GaugeVec
+	estimateTSOLatencyGauge  *prometheus.GaugeVec
 )
 
 func initMetrics(constLabels prometheus.Labels) {
@@ -127,6 +128,14 @@ func initMetrics(constLabels prometheus.Labels) {
 			Help:        "Current count of ongoing batch tso requests",
 			ConstLabels: constLabels,
 		}, []string{"stream"})
+	estimateTSOLatencyGauge = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace:   "pd_client",
+			Subsystem:   "request",
+			Name:        "estimate_tso_latency",
+			Help:        "Estimated latency of an RTT of getting TSO",
+			ConstLabels: constLabels,
+		}, []string{"stream"})
 }
 
 var (
@@ -236,4 +245,5 @@ func registerMetrics() {
 	prometheus.MustRegister(tsoBatchSize)
 	prometheus.MustRegister(tsoBatchSendLatency)
 	prometheus.MustRegister(requestForwarded)
+	prometheus.MustRegister(estimateTSOLatencyGauge)
 }
diff --git a/client/option.go b/client/option.go
@@ -29,6 +29,7 @@ const (
 	defaultMaxTSOBatchWaitInterval time.Duration = 0
 	defaultEnableTSOFollowerProxy                = false
 	defaultEnableFollowerHandle                  = false
+	defaultTSOClientRPCConcurrency               = 1
 )
 
 // DynamicOption is used to distinguish the dynamic option type.
@@ -43,6 +44,8 @@ const (
 	EnableTSOFollowerProxy
 	// EnableFollowerHandle is the follower handle option.
 	EnableFollowerHandle
+	// TSOClientRPCConcurrency controls the amount of ongoing TSO RPC requests at the same time in a single TSO client.
+	TSOClientRPCConcurrency
 
 	dynamicOptionCount
 )
@@ -77,6 +80,7 @@ func newOption() *option {
 	co.dynamicOptions[MaxTSOBatchWaitInterval].Store(defaultMaxTSOBatchWaitInterval)
 	co.dynamicOptions[EnableTSOFollowerProxy].Store(defaultEnableTSOFollowerProxy)
 	co.dynamicOptions[EnableFollowerHandle].Store(defaultEnableFollowerHandle)
+	co.dynamicOptions[TSOClientRPCConcurrency].Store(defaultTSOClientRPCConcurrency)
 	return co
 }
 
@@ -127,3 +131,14 @@ func (o *option) setEnableTSOFollowerProxy(enable bool) {
 func (o *option) getEnableTSOFollowerProxy() bool {
 	return o.dynamicOptions[EnableTSOFollowerProxy].Load().(bool)
 }
+
+func (o *option) setTSOClientRPCConcurrency(value int) {
+	old := o.getTSOClientRPCConcurrency()
+	if value != old {
+		o.dynamicOptions[TSOClientRPCConcurrency].Store(value)
+	}
+}
+
+func (o *option) getTSOClientRPCConcurrency() int {
+	return o.dynamicOptions[TSOClientRPCConcurrency].Load().(int)
+}
diff --git a/client/tso_batch_controller.go b/client/tso_batch_controller.go
@@ -64,6 +64,17 @@ func (tbc *tsoBatchController) fetchPendingRequests(ctx context.Context, tsoRequ
 	// TODO: `tbc.collectedRequestCount` should never be non-empty here. Consider do assertion here.
 	tbc.collectedRequestCount = 0
 	for {
+		// If the batch size reaches the maxBatchSize limit but the token haven't arrived yet, don't receive more
+		// requests, and return when token is ready.
+		if tbc.collectedRequestCount >= tbc.maxBatchSize && !tokenAcquired {
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case <-tokenCh:
+				return nil
+			}
+		}
+
 		select {
 		case <-ctx.Done():
 			return ctx.Err()
@@ -146,6 +157,37 @@ fetchPendingRequestsLoop:
 	return nil
 }
 
+// fetchRequestsWithTimer tries to fetch requests until the given timer ticks. The caller must set the timer properly
+// before calling this function.
+func (tbc *tsoBatchController) fetchRequestsWithTimer(ctx context.Context, tsoRequestCh <-chan *tsoRequest, timer *time.Timer) error {
+batchingLoop:
+	for tbc.collectedRequestCount < tbc.maxBatchSize {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case req := <-tsoRequestCh:
+			tbc.pushRequest(req)
+		case <-timer.C:
+			break batchingLoop
+		}
+	}
+
+	// Try to collect more requests in non-blocking way.
+nonWaitingBatchLoop:
+	for tbc.collectedRequestCount < tbc.maxBatchSize {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case req := <-tsoRequestCh:
+			tbc.pushRequest(req)
+		default:
+			break nonWaitingBatchLoop
+		}
+	}
+
+	return nil
+}
+
 func (tbc *tsoBatchController) pushRequest(tsoReq *tsoRequest) {
 	tbc.collectedRequests[tbc.collectedRequestCount] = tsoReq
 	tbc.collectedRequestCount++