Skip to content

Commit

Permalink
[#17940] DocDB: Include node information in pg_locks output
Browse files Browse the repository at this point in the history
Summary:
`GetLockStatus` in pg_client_service.cc returns lock info of either old transactions (older than `min_txn_age`) or the specified transaction set.

For the former case, this diff augments the response by including host node uuid of the involved transactions whose lock info is being returned. We include the node uuid in the transaction heartbeats sent from the query layer and make the txn coordinator store the host node uuid. We return the host node info from the coordinator when queried for old transactions. The query layer parses this info and populates `PgGetLockStatusResponsePB` resp with host node uuids of involved transactions.

When the incoming `PgGetLockStatusRequestPB` contains a transaction id, we directly broadcast get lock status request to all tservers skipping the coordinator. Since we skip the coordinator, we don't get the host node info. This will be addressed as part of #16913
Jira: DB-7013

Test Plan:
./yb_build.sh --cxx-test pgwrapper_pg_get_lock_status-test --gtest_filter PgGetLockStatusTest.TestGetLockStatusOfOldTxns
./yb_build.sh --cxx-test pgwrapper_pg_get_lock_status-test --gtest_filter PgGetLockStatusTest.TestGetLockStatusLimitNumOldTxns

Reviewers: rsami

Reviewed By: rsami

Subscribers: ybase, bogdan

Differential Revision: https://phorge.dev.yugabyte.com/D26957
  • Loading branch information
basavaraj29 committed Jul 19, 2023
1 parent 50123f1 commit 6a79146
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 10 deletions.
4 changes: 4 additions & 0 deletions src/yb/client/client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1780,6 +1780,10 @@ void YBClient::SetLocalTabletServer(const string& ts_uuid,
data_->meta_cache_->SetLocalTabletServer(ts_uuid, proxy, local_tserver);
}

const internal::RemoteTabletServer* YBClient::GetLocalTabletServer() const {
return data_->meta_cache_->local_tserver();
}

Result<bool> YBClient::IsLoadBalanced(uint32_t num_servers) {
IsLoadBalancedRequestPB req;
IsLoadBalancedResponsePB resp;
Expand Down
2 changes: 2 additions & 0 deletions src/yb/client/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,8 @@ class YBClient {
const std::shared_ptr<tserver::TabletServerServiceProxy>& proxy,
const tserver::LocalTabletServer* local_tserver);

const internal::RemoteTabletServer* GetLocalTabletServer() const;

// List only those tables whose names pass a substring match on 'filter'.
//
// 'tables' is appended to only on success.
Expand Down
4 changes: 4 additions & 0 deletions src/yb/client/transaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,10 @@ class YBTransaction::Impl final : public internal::TxnBatcherIf {
}
}
}
auto* local_ts = manager_->client()->GetLocalTabletServer();
if (local_ts) {
state.set_host_node_uuid(local_ts->permanent_uuid());
}

if (aborted_set_for_rollback_heartbeat) {
VLOG_WITH_PREFIX(4) << "Setting aborted_set_for_rollback_heartbeat: "
Expand Down
3 changes: 3 additions & 0 deletions src/yb/tablet/operations.proto
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ message TransactionStatePB {

// The time at which this transaction started at the client.
optional fixed64 start_time = 10;

// Stores the uuid of the node hosting the transaction at the query layer.
optional string host_node_uuid = 11;
}

message TruncatePB {
Expand Down
15 changes: 15 additions & 0 deletions src/yb/tablet/transaction_coordinator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ class TransactionState {
return is_external_;
}

const auto& host_node_uuid() const {
return host_node_uuid_;
}

std::string ToString() const {
return Format("{ id: $0 last_touch: $1 status: $2 involved_tablets: $3 replicating: $4 "
Expand Down Expand Up @@ -701,6 +704,12 @@ class TransactionState {
first_touch_ = request->request()->start_time();
request->mutable_request()->clear_tablets();
}
if (!state.host_node_uuid().empty()) {
if (host_node_uuid_.empty()) {
host_node_uuid_ = state.host_node_uuid();
}
DCHECK_EQ(host_node_uuid_, state.host_node_uuid());
}

if (!status.ok()) {
context_.CompleteWithStatus(std::move(request), std::move(status));
Expand Down Expand Up @@ -1004,6 +1013,8 @@ class TransactionState {
std::deque<std::unique_ptr<tablet::UpdateTxnOperation>> request_queue_;

std::vector<TransactionAbortCallback> abort_waiters_;
// Node uuid hosting the transaction at the query layer.
std::string host_node_uuid_;
};

struct CompleteWithStatusEntry {
Expand Down Expand Up @@ -1148,6 +1159,10 @@ class TransactionCoordinator::Impl : public TransactionStateContext,
resp_txn->set_transaction_id(id.data(), id.size());
*resp_txn->mutable_aborted_subtxn_set() = it->GetAbortedSubtxnInfo()->pb();
resp_txn->set_start_time(it->first_touch());
const auto& host_node_uuid = it->host_node_uuid();
if (!host_node_uuid.empty()) {
resp_txn->set_host_node_uuid(host_node_uuid);
}

for (const auto& tablet_id : it->pending_involved_tablets()) {
resp_txn->add_tablets(tablet_id);
Expand Down
7 changes: 7 additions & 0 deletions src/yb/tserver/pg_client.proto
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,13 @@ message PgGetLockStatusResponsePB {
}

repeated NodeLockStatusResponsePB node_locks = 2;

message TransactionList {
repeated bytes transaction_ids = 1;
}

// Map of transactions keyed by the host node uuid.
map<string, TransactionList> transactions_by_node = 3;
}

message PgListLiveTabletServersRequestPB {
Expand Down
31 changes: 21 additions & 10 deletions src/yb/tserver/pg_client_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,6 @@ class PgClientServiceImpl::Impl {
}
const auto& min_txn_age_ms = req.min_txn_age_ms();
const auto& max_num_txns = req.max_num_txns();
RSTATUS_DCHECK(min_txn_age_ms >= 0, InvalidArgument,
"Request must contain non-negative min_txn_age_ms, got $0", min_txn_age_ms);
RSTATUS_DCHECK(max_num_txns > 0, InvalidArgument,
"Request must contain max_num_txns > 0, got $0", max_num_txns);
// Sleep before fetching old transactions and their involved tablets. This is necessary for
Expand Down Expand Up @@ -480,9 +478,12 @@ class PgClientServiceImpl::Impl {

while (!old_txns_pq.empty()) {
auto& old_txn = old_txns_pq.top();
const auto& txn_id = old_txn->transaction_id();
auto& node_entry = (*resp->mutable_transactions_by_node())[old_txn->host_node_uuid()];
node_entry.add_transaction_ids(txn_id);
for (const auto& tablet_id : old_txn->tablets()) {
auto& tablet_entry = (*lock_status_req.mutable_transactions_by_tablet())[tablet_id];
tablet_entry.add_transaction_ids(old_txn->transaction_id());
tablet_entry.add_transaction_ids(txn_id);
}
old_txns_pq.pop();
}
Expand Down Expand Up @@ -540,12 +541,6 @@ class PgClientServiceImpl::Impl {
}
}

std::set<std::string> limit_to_txns;
for (const auto& txn : req.transaction_ids()) {
auto txn_id = VERIFY_RESULT(FullyDecodeTransactionId(txn));
limit_to_txns.insert(txn_id.ToString());
}

// Track the highest seen term for each tablet id.
std::map<TabletId, uint64_t> peer_term;
for (const auto& node_lock : resp->node_locks()) {
Expand All @@ -556,6 +551,7 @@ class PgClientServiceImpl::Impl {
}

std::unordered_set<TabletId> tablets;
std::set<TransactionId> seen_transactions;
for (auto& node_lock : *resp->mutable_node_locks()) {
auto* tablet_lock_infos = node_lock.mutable_tablet_lock_infos();
for (auto lock_it = tablet_lock_infos->begin(); lock_it != tablet_lock_infos->end();) {
Expand All @@ -570,10 +566,13 @@ class PgClientServiceImpl::Impl {
lock_it = node_lock.mutable_tablet_lock_infos()->erase(lock_it);
continue;
}
lock_it++;
RSTATUS_DCHECK(
tablets.emplace(tablet_id).second, IllegalState,
"Found tablet $0 more than once in PgGetLockStatusResponsePB", tablet_id);
for (auto& [txn, _] : lock_it->transaction_locks()) {
seen_transactions.insert(VERIFY_RESULT(TransactionId::FromString(txn)));
}
lock_it++;
}
}

Expand All @@ -586,6 +585,18 @@ class PgClientServiceImpl::Impl {
txn_id, tablet);
}
}
// Ensure that the response contains host node uuid for all involved transactions.
for (const auto& [_, txn_list] : resp->transactions_by_node()) {
for (const auto& txn : txn_list.transaction_ids()) {
seen_transactions.erase(VERIFY_RESULT(FullyDecodeTransactionId(txn)));
}
}
// TODO(pglocks): We currently don't populate transaction's host node info when the incoming
// PgGetLockStatusRequestPB has transaction_id field set. This shouldn't be the case once
// https://github.com/yugabyte/yugabyte-db/issues/16913 is addressed. As part of the fix,
// remove !req.transaction_ids().empty() in the below check.
RSTATUS_DCHECK(seen_transactions.empty() || !req.transaction_ids().empty(), IllegalState,
"Host node uuid not set for all involved transactions");
return Status::OK();
}

Expand Down
1 change: 1 addition & 0 deletions src/yb/tserver/tserver_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ message GetOldTransactionsResponsePB {
repeated bytes tablets = 2;
optional SubtxnSetPB aborted_subtxn_set = 3;
optional fixed64 start_time = 4;
optional string host_node_uuid = 5;
}
repeated OldTransactionMetadataPB txn = 2;

Expand Down

0 comments on commit 6a79146

Please sign in to comment.