From ef89ed83f427a1e1c988fe9dac5465a4372f8285 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1niel=20Buga?= <bugadani@gmail.com>
Date: Thu, 19 Sep 2024 14:38:16 +0200
Subject: [PATCH] Rework critical section to avoid spinning in irq-free context

---
 esp-hal/CHANGELOG.md          |   1 +
 esp-hal/src/lock.rs           | 231 ++++++++++++++++------------------
 esp-hal/src/timer/systimer.rs |   6 +-
 esp-hal/src/timer/timg.rs     |   4 +-
 4 files changed, 113 insertions(+), 129 deletions(-)

diff --git a/esp-hal/CHANGELOG.md b/esp-hal/CHANGELOG.md
index 37c8bdbbfa..c9b13aad53 100644
--- a/esp-hal/CHANGELOG.md
+++ b/esp-hal/CHANGELOG.md
@@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Changed the parameters of `Spi::with_pins` to no longer be optional (#2133)
 - Renamed `DummyPin` to `NoPin` and removed all internal logic from it. (#2133)
 - The `NO_PIN` constant has been removed. (#2133)
+- Allow handling interrupts while trying to lock critical section on multi-core chips. (#2197)
 
 ### Fixed
 
diff --git a/esp-hal/src/lock.rs b/esp-hal/src/lock.rs
index 2fa788aea9..8a07400545 100644
--- a/esp-hal/src/lock.rs
+++ b/esp-hal/src/lock.rs
@@ -1,44 +1,3 @@
-struct CriticalSection;
-
-critical_section::set_impl!(CriticalSection);
-
-unsafe impl critical_section::Impl for CriticalSection {
-    unsafe fn acquire() -> critical_section::RawRestoreState {
-        #[cfg_attr(single_core, allow(unused_mut))]
-        let mut tkn = single_core::disable_interrupts();
-
-        #[cfg(multi_core)]
-        {
-            use multicore::{LockKind, MULTICORE_LOCK, REENTRY_FLAG};
-
-            // FIXME: don't spin with interrupts disabled
-            match MULTICORE_LOCK.lock() {
-                LockKind::Lock => {}
-                LockKind::Reentry => tkn |= REENTRY_FLAG,
-            }
-        }
-
-        tkn
-    }
-
-    unsafe fn release(token: critical_section::RawRestoreState) {
-        #[cfg(multi_core)]
-        {
-            use multicore::{MULTICORE_LOCK, REENTRY_FLAG};
-
-            debug_assert!(MULTICORE_LOCK.is_owned_by_current_thread());
-
-            if token & REENTRY_FLAG != 0 {
-                return;
-            }
-
-            MULTICORE_LOCK.unlock();
-        }
-
-        single_core::reenable_interrupts(token);
-    }
-}
-
 mod single_core {
     pub unsafe fn disable_interrupts() -> critical_section::RawRestoreState {
         cfg_if::cfg_if! {
@@ -66,8 +25,8 @@ mod single_core {
                 const RESERVED_MASK: u32 = 0b1111_1111_1111_1000_1111_0000_0000_0000;
                 debug_assert!(token & RESERVED_MASK == 0);
                 core::arch::asm!(
-                            "wsr.ps {0}",
-                            "rsync", in(reg) token)
+                    "wsr.ps {0}",
+                    "rsync", in(reg) token)
             } else {
                 panic!()
             }
@@ -91,29 +50,20 @@ mod multicore {
         }
     }
 
-    // We're using a value that we know get_raw_core() will never return. This
-    // avoids an unnecessary increment of the core ID.
-    //
-    // Safety: Ensure that when adding new chips get_raw_core doesn't return this
-    // value. TODO when we have HIL tests ensure this is the case!
+    // Safety: Ensure that when adding new chips `get_raw_core` doesn't return this
+    // value.
+    // FIXME: ensure in HIL tests this is the case!
     const UNUSED_THREAD_ID_VALUE: usize = 0x100;
 
     pub fn thread_id() -> usize {
         crate::get_raw_core()
     }
 
-    pub(super) static MULTICORE_LOCK: ReentrantMutex = ReentrantMutex::new();
-
-    pub(super) enum LockKind {
-        Lock = 0,
-        Reentry,
-    }
-
-    pub(super) struct ReentrantMutex {
+    pub(super) struct AtomicLock {
         owner: AtomicUsize,
     }
 
-    impl ReentrantMutex {
+    impl AtomicLock {
         pub const fn new() -> Self {
             Self {
                 owner: AtomicUsize::new(UNUSED_THREAD_ID_VALUE),
@@ -128,20 +78,6 @@ mod multicore {
             self.owner.load(Ordering::Relaxed) == thread
         }
 
-        pub fn lock(&self) -> LockKind {
-            let current_thread_id = thread_id();
-
-            match self.try_lock(current_thread_id) {
-                Ok(_) => LockKind::Lock,
-                Err(owner) if owner == current_thread_id => LockKind::Reentry,
-                Err(_) => {
-                    while self.try_lock(current_thread_id).is_ok() {}
-
-                    LockKind::Lock
-                }
-            }
-        }
-
         pub fn try_lock(&self, new_owner: usize) -> Result<(), usize> {
             self.owner
                 .compare_exchange(
@@ -153,52 +89,35 @@ mod multicore {
                 .map(|_| ())
         }
 
-        pub fn unlock(&self) {
+        /// # Safety:
+        ///
+        /// This function must only be called if the lock was acquired by the
+        /// current thread.
+        pub unsafe fn unlock(&self) {
+            debug_assert!(self.is_owned_by_current_thread());
             self.owner.store(UNUSED_THREAD_ID_VALUE, Ordering::Release);
         }
     }
 }
 
-// The state of a re-entrant lock
-pub(crate) struct LockState {
+pub(crate) struct Lock {
     #[cfg(multi_core)]
-    inner: multicore::ReentrantMutex,
+    inner: multicore::AtomicLock,
 }
 
-impl LockState {
+impl Lock {
     pub const fn new() -> Self {
         Self {
             #[cfg(multi_core)]
-            inner: multicore::ReentrantMutex::new(),
+            inner: multicore::AtomicLock::new(),
         }
     }
-}
 
-fn interrupt_free<T>(f: impl FnOnce() -> T) -> T {
-    cfg_if::cfg_if! {
-        if #[cfg(riscv)] {
-            esp_riscv_rt::riscv::interrupt::free(f)
-        } else if #[cfg(xtensa)] {
-            xtensa_lx::interrupt::free(|_| f())
-        } else {
-            panic!()
-        }
-    }
-}
-
-// This is preferred over critical-section as this allows you to have multiple
-// locks active at the same time rather than using the global mutex that is
-// critical-section.
-#[allow(unused_variables)]
-pub(crate) fn lock<T>(state: &LockState, f: impl FnOnce() -> T) -> T {
-    // In regards to disabling interrupts, we only need to disable
-    // the interrupts that may be calling this function.
-
-    cfg_if::cfg_if! {
-        if #[cfg(multi_core)] {
-            let mut f = f;
-            let current_thread_id = multicore::thread_id();
-            loop {
+    fn acquire(&self) -> critical_section::RawRestoreState {
+        cfg_if::cfg_if! {
+            if #[cfg(single_core)] {
+                single_core::disable_interrupts()
+            } else if #[cfg(multi_core)] {
                 // We acquire the lock inside an interrupt-free context to prevent a subtle
                 // race condition:
                 // In case an interrupt handler tries to lock the same resource, it could win if
@@ -207,35 +126,99 @@ pub(crate) fn lock<T>(state: &LockState, f: impl FnOnce() -> T) -> T {
                 // If we allow reentrancy, the interrupt handler would technically be a different
                 // context with the same `current_thread_id`, so it would be allowed to lock the
                 // resource in a theoretically incorrect way.
-                let try_run_locked = || {
-                    // Only use `try_lock` here so that we don't spin in interrupt-free context.
-                    match state.inner.try_lock(current_thread_id) {
-                        Ok(()) => {
-                            let result = f();
-
-                            state.inner.unlock();
-
-                            Ok(result)
+                let try_lock = |current_thread_id| {
+                    let mut tkn = unsafe { single_core::disable_interrupts() };
+
+                    match self.inner.try_lock(current_thread_id) {
+                        Ok(()) => Some(tkn),
+                        Err(owner) if owner == current_thread_id => {
+                            tkn |= multicore::REENTRY_FLAG;
+                            Some(tkn)
                         }
-                        Err(owner) => {
-                            assert!(owner != current_thread_id, "lock is not re-entrant!");
-
-                            Err(f)
+                        Err(_) => {
+                            unsafe { single_core::reenable_interrupts(tkn) };
+                            None
                         }
                     }
                 };
 
-                match interrupt_free(try_run_locked) {
-                    Ok(result) => return result,
-                    Err(the_function) => f = the_function,
+                let current_thread_id = multicore::thread_id();
+                loop {
+                    match try_lock(current_thread_id) {
+                        Some(token) => return token,
+                        None => {}
+                    }
                 }
+            }
+        }
+    }
 
-                // Consider using core::hint::spin_loop(); Might need SW_INT.
+    /// # Safety
+    /// This function must only be called if the lock was acquired by the
+    /// current thread.
+    unsafe fn release(&self, token: critical_section::RawRestoreState) {
+        #[cfg(multi_core)]
+        {
+            if token & multicore::REENTRY_FLAG != 0 {
+                return;
             }
-        } else {
-            // Disabling interrupts is enough on single core chips to ensure mutual
-            // exclusion.
-            interrupt_free(f)
+
+            self.inner.unlock();
+        }
+
+        single_core::reenable_interrupts(token);
+    }
+}
+
+// This is preferred over critical-section as this allows you to have multiple
+// locks active at the same time rather than using the global mutex that is
+// critical-section.
+#[allow(unused_variables)]
+pub(crate) fn lock<T>(lock: &Lock, f: impl FnOnce() -> T) -> T {
+    // In regards to disabling interrupts, we only need to disable
+    // the interrupts that may be calling this function.
+
+    struct LockGuard<'a> {
+        lock: &'a Lock,
+        token: critical_section::RawRestoreState,
+    }
+
+    impl<'a> LockGuard<'a> {
+        fn new(lock: &'a Lock) -> Self {
+            let token = lock.acquire();
+
+            #[cfg(multi_core)]
+            assert!(
+                token & multicore::REENTRY_FLAG == 0,
+                "lock is not reentrant"
+            );
+
+            Self { lock, token }
+        }
+    }
+
+    impl<'a> Drop for LockGuard<'a> {
+        fn drop(&mut self) {
+            unsafe { self.lock.release(self.token) };
         }
     }
+
+    let _token = LockGuard::new(lock);
+    f()
+}
+
+struct CriticalSection;
+
+critical_section::set_impl!(CriticalSection);
+
+static CRITICAL_SECTION: Lock = Lock::new();
+
+unsafe impl critical_section::Impl for CriticalSection {
+    unsafe fn acquire() -> critical_section::RawRestoreState {
+        CRITICAL_SECTION.acquire()
+    }
+
+    unsafe fn release(token: critical_section::RawRestoreState) {
+        CRITICAL_SECTION.release(token);
+    }
 }
diff --git a/esp-hal/src/timer/systimer.rs b/esp-hal/src/timer/systimer.rs
index 07ab29d263..ed5f01fbbb 100644
--- a/esp-hal/src/timer/systimer.rs
+++ b/esp-hal/src/timer/systimer.rs
@@ -79,7 +79,7 @@ use fugit::{Instant, MicrosDurationU32, MicrosDurationU64};
 use super::{Error, Timer as _};
 use crate::{
     interrupt::{self, InterruptHandler},
-    lock::{lock, LockState},
+    lock::{lock, Lock},
     peripheral::Peripheral,
     peripherals::{Interrupt, SYSTIMER},
     system::{Peripheral as PeripheralEnable, PeripheralClockControl},
@@ -1008,8 +1008,8 @@ where
     }
 }
 
-static CONF_LOCK: LockState = LockState::new();
-static INT_ENA_LOCK: LockState = LockState::new();
+static CONF_LOCK: Lock = Lock::new();
+static INT_ENA_LOCK: Lock = Lock::new();
 
 // Async functionality of the system timer.
 mod asynch {
diff --git a/esp-hal/src/timer/timg.rs b/esp-hal/src/timer/timg.rs
index d4fea44753..5d018b216a 100644
--- a/esp-hal/src/timer/timg.rs
+++ b/esp-hal/src/timer/timg.rs
@@ -76,7 +76,7 @@ use crate::soc::constants::TIMG_DEFAULT_CLK_SRC;
 use crate::{
     clock::Clocks,
     interrupt::{self, InterruptHandler},
-    lock::{lock, LockState},
+    lock::{lock, Lock},
     peripheral::{Peripheral, PeripheralRef},
     peripherals::{timg0::RegisterBlock, Interrupt, TIMG0},
     private::Sealed,
@@ -87,7 +87,7 @@ use crate::{
     Mode,
 };
 
-static INT_ENA_LOCK: LockState = LockState::new();
+static INT_ENA_LOCK: Lock = Lock::new();
 
 /// A timer group consisting of
 #[cfg_attr(not(timg_timer1), doc = "a general purpose timer")]