Skip to content

Commit

Permalink
[active-standby] Fix default route handler race condition (#254)
Browse files Browse the repository at this point in the history
What is the motivation for this PR?
Fix the race condition of the default route notification.

This is similar to #104

If there are multiple default route notifications received by linkmgrd, the mux port posts the default route handlers wrapped by strand. But boost asio doesn't guarantee the execution order of the default route handlers, so the final state machine default route could be any intermediate default route state.

For example, for default route notifications like:

[2024-06-20 08:28:57.872911] [warning] MuxPort.cpp:365 handleDefaultRouteState: port: EtherTest01, state db default route state: na
[2024-06-20 08:28:57.872954] [warning] MuxPort.cpp:365 handleDefaultRouteState: port: EtherTest01, state db default route state: ok
The final state machine default route state could be "ok" if the handler for "ok" is executed after the handler for "na".
The final state machine default route state could be "na" if the handler for "ok" is executed before the handler for "na".

Signed-off-by: Longxiang Lyu lolv@microsoft.com

Work item tracking
Microsoft ADO (number only): 28471183
How did you do it?
post the default route handlers directly through strand instead of using strand::wrap, so the handlers are executed in the same order as the handlers' post order.

How did you verify/test it?
without this PR, UT fail:

Signed-off-by: Longxiang Lyu <lolv@microsoft.com>
  • Loading branch information
lolyu authored and mssonicbld committed Aug 21, 2024
1 parent 5730b79 commit 2eccbfa
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 3 deletions.
5 changes: 2 additions & 3 deletions src/MuxPort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,12 +369,11 @@ void MuxPort::handleDefaultRouteState(const std::string &routeState)
state = link_manager::LinkManagerStateMachineBase::DefaultRoute::NA;
}

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
boost::asio::post(mStrand, boost::bind(
&link_manager::LinkManagerStateMachineBase::handleDefaultRouteStateNotification,
mLinkManagerStateMachinePtr.get(),
state
)));
));
}

//
Expand Down
1 change: 1 addition & 0 deletions test/FakeMuxPort.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class FakeMuxPort : public ::mux::MuxPort {
link_prober::LinkProberStateMachineBase* getLinkProberStateMachinePtr() { return getLinkManagerStateMachinePtr()->getLinkProberStateMachinePtr().get(); };
mux_state::MuxStateMachine& getMuxStateMachine() { return getLinkManagerStateMachinePtr()->getMuxStateMachine(); };
link_state::LinkStateMachine& getLinkStateMachine() { return getLinkManagerStateMachinePtr()->getLinkStateMachine(); };
link_manager::LinkManagerStateMachineBase::DefaultRoute getDefaultRouteState() { return getLinkManagerStateMachinePtr()->getDefaultRouteState(); };

bool getPendingMuxModeChange() { return getActiveStandbyStateMachinePtr()->mPendingMuxModeChange; };
common::MuxPortConfig::Mode getTargetMuxMode() { return getActiveStandbyStateMachinePtr()->mTargetMuxMode; };
Expand Down
43 changes: 43 additions & 0 deletions test/LinkManagerStateMachineTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "LinkManagerStateMachineTest.h"
#include "link_prober/LinkProberStateMachineBase.h"
#include "common/MuxLogger.h"

#define VALIDATE_STATE(p, m, l) \
do { \
Expand Down Expand Up @@ -69,6 +70,21 @@ void LinkManagerStateMachineTest::runIoService(uint32_t count)
}
}

void LinkManagerStateMachineTest::runIoServiceThreaded(uint32_t count)
{
mWork = std::make_unique<boost::asio::io_service::work>(mIoService);
for (uint8_t i = 0; i < count; i++) {
mThreadGroup.create_thread(boost::bind(&boost::asio::io_service::run, &mIoService));
}
}

void LinkManagerStateMachineTest::stopIoServiceThreaded()
{
mWork.reset();
mIoService.stop();
mThreadGroup.join_all();
}

void LinkManagerStateMachineTest::postLinkProberEvent(link_prober::LinkProberState::Label label, uint32_t count, uint32_t detect_multiplier)
{
switch (label) {
Expand Down Expand Up @@ -1529,4 +1545,31 @@ TEST_F(LinkManagerStateMachineTest, ProbeLinkInSuspendTimeout)
EXPECT_EQ(mFakeMuxPort.mFakeLinkProber->mDetectLinkCallCount, 1);
}

TEST_F(LinkManagerStateMachineTest, DefaultRouteStateRaceCondition)
{
mFakeMuxPort.activateStateMachine();
runIoServiceThreaded(3);

mMuxConfig.enableDefaultRouteFeature(true);
for (int i = 0; i < 10000; ++i)
{
MUXLOGDEBUG(boost::format("Iteration %d") % i);
mFakeMuxPort.handleDefaultRouteState("na");
mFakeMuxPort.handleDefaultRouteState("ok");

int check = 0;
while (((mFakeMuxPort.mFakeLinkProber->mShutdownTxProbeCallCount < i + 1) ||
(mFakeMuxPort.mFakeLinkProber->mRestartTxProbeCallCount < i + 1)) && (check < 10))
{
usleep(1000);
++check;
}

EXPECT_EQ(mFakeMuxPort.getDefaultRouteState(), link_manager::LinkManagerStateMachineBase::DefaultRoute::OK);
EXPECT_EQ(mFakeMuxPort.mFakeLinkProber->mShutdownTxProbeCallCount, i + 1);
EXPECT_EQ(mFakeMuxPort.mFakeLinkProber->mRestartTxProbeCallCount, i + 1);
}
stopIoServiceThreaded();
}

} /* namespace test */
5 changes: 5 additions & 0 deletions test/LinkManagerStateMachineTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#ifndef LINKMANAGERSTATEMACHINETEST_H_
#define LINKMANAGERSTATEMACHINETEST_H_

#include <memory>
#include "gtest/gtest.h"

#include "FakeMuxPort.h"
Expand All @@ -39,6 +40,8 @@ class LinkManagerStateMachineTest: public ::testing::Test
virtual ~LinkManagerStateMachineTest() = default;

void runIoService(uint32_t count = 0);
void runIoServiceThreaded(uint32_t count = 3);
void stopIoServiceThreaded();
void postLinkProberEvent(link_prober::LinkProberState::Label label, uint32_t count = 0, uint32_t detect_multiplier = 0);
void postMuxEvent(mux_state::MuxState::Label label, uint32_t count = 0);
void postLinkEvent(link_state::LinkState::Label label, uint32_t count = 0);
Expand All @@ -58,6 +61,8 @@ class LinkManagerStateMachineTest: public ::testing::Test

public:
boost::asio::io_service mIoService;
std::unique_ptr<boost::asio::io_service::work> mWork;
boost::thread_group mThreadGroup;
common::MuxConfig mMuxConfig;
std::shared_ptr<FakeDbInterface> mDbInterfacePtr;
std::string mPortName = "EtherTest01";
Expand Down

0 comments on commit 2eccbfa

Please sign in to comment.