Auto merge of #3475 - RalfJung:reduce-reuse-recycle, r=RalfJung

Address reuse improvements and fixes - when an address gets reused, establish a happens-before link in the data race model - do not reuse stack addresses, and make the reuse rate configurable Fixes #3450
rust-lang · Apr 19, 2024 · 9b36914 · 9b36914
2 parents ccb0293 + 2155a30
commit 9b36914
Show file tree

Hide file tree

Showing 48 changed files with 321 additions and 126 deletions.
diff --git a/README.md b/README.md
@@ -295,6 +295,16 @@ up the sysroot.  If you are using `miri` (the Miri driver) directly, see the
 Miri adds its own set of `-Z` flags, which are usually set via the `MIRIFLAGS`
 environment variable. We first document the most relevant and most commonly used flags:
 
+* `-Zmiri-address-reuse-rate=<rate>` changes the probability that a freed *non-stack* allocation
+  will be added to the pool for address reuse, and the probability that a new *non-stack* allocation
+  will be taken from the pool. Stack allocations never get added to or taken from the pool. The
+  default is `0.5`.
+* `-Zmiri-address-reuse-cross-thread-rate=<rate>` changes the probability that an allocation which
+  attempts to reuse a previously freed block of memory will also consider blocks freed by *other
+  threads*. The default is `0.1`, which means by default, in 90% of the cases where an address reuse
+  attempt is made, only addresses from the same thread will be considered. Reusing an address from
+  another thread induces synchronization between those threads, which can mask data races and weak
+  memory bugs.
 * `-Zmiri-compare-exchange-weak-failure-rate=<rate>` changes the failure rate of
   `compare_exchange_weak` operations. The default is `0.8` (so 4 out of 5 weak ops will fail).
   You can change it to any value between `0.0` and `1.0`, where `1.0` means it

diff --git a/src/alloc_addresses/mod.rs b/src/alloc_addresses/mod.rs
@@ -14,7 +14,8 @@ use rustc_span::Span;
 use rustc_target::abi::{Align, HasDataLayout, Size};
 
 use crate::*;
-use reuse_pool::ReusePool;
+
+use self::reuse_pool::ReusePool;
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum ProvenanceMode {
@@ -77,7 +78,7 @@ impl GlobalStateInner {
         GlobalStateInner {
             int_to_ptr_map: Vec::default(),
             base_addr: FxHashMap::default(),
-            reuse: ReusePool::new(),
+            reuse: ReusePool::new(config),
             exposed: FxHashSet::default(),
             next_base_addr: stack_addr,
             provenance_mode: config.provenance_mode,
@@ -141,7 +142,11 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         }
     }
 
-    fn addr_from_alloc_id(&self, alloc_id: AllocId, _kind: MemoryKind) -> InterpResult<'tcx, u64> {
+    fn addr_from_alloc_id(
+        &self,
+        alloc_id: AllocId,
+        memory_kind: MemoryKind,
+    ) -> InterpResult<'tcx, u64> {
         let ecx = self.eval_context_ref();
         let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
         let global_state = &mut *global_state;
@@ -159,9 +164,16 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 assert!(!matches!(kind, AllocKind::Dead));
 
                 // This allocation does not have a base address yet, pick or reuse one.
-                let base_addr = if let Some(reuse_addr) =
-                    global_state.reuse.take_addr(&mut *rng, size, align)
-                {
+                let base_addr = if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
+                    &mut *rng,
+                    size,
+                    align,
+                    memory_kind,
+                    ecx.get_active_thread(),
+                ) {
+                    if let Some(data_race) = &ecx.machine.data_race {
+                        data_race.validate_lock_acquire(&clock, ecx.get_active_thread());
+                    }
                     reuse_addr
                 } else {
                     // We have to pick a fresh address.
@@ -329,14 +341,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
     }
 }
 
-impl GlobalStateInner {
-    pub fn free_alloc_id(
-        &mut self,
-        rng: &mut impl Rng,
-        dead_id: AllocId,
-        size: Size,
-        align: Align,
-    ) {
+impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
+    pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
+        let global_state = self.alloc_addresses.get_mut();
+        let rng = self.rng.get_mut();
+
         // We can *not* remove this from `base_addr`, since the interpreter design requires that we
         // be able to retrieve an AllocId + offset for any memory access *before* we check if the
         // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
@@ -349,15 +358,27 @@ impl GlobalStateInner {
         // returns a dead allocation.
         // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
         // `int_to_ptr_map`.
-        let addr = *self.base_addr.get(&dead_id).unwrap();
-        let pos = self.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
-        let removed = self.int_to_ptr_map.remove(pos);
+        let addr = *global_state.base_addr.get(&dead_id).unwrap();
+        let pos =
+            global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
+        let removed = global_state.int_to_ptr_map.remove(pos);
         assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
         // We can also remove it from `exposed`, since this allocation can anyway not be returned by
         // `alloc_id_from_addr` any more.
-        self.exposed.remove(&dead_id);
+        global_state.exposed.remove(&dead_id);
         // Also remember this address for future reuse.
-        self.reuse.add_addr(rng, addr, size, align)
+        let thread = self.threads.get_active_thread_id();
+        global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
+            let mut clock = concurrency::VClock::default();
+            if let Some(data_race) = &self.data_race {
+                data_race.validate_lock_release(
+                    &mut clock,
+                    thread,
+                    self.threads.active_thread_ref().current_span(),
+                );
+            }
+            clock
+        })
     }
 }
 

diff --git a/src/alloc_addresses/reuse_pool.rs b/src/alloc_addresses/reuse_pool.rs
@@ -4,73 +4,112 @@ use rand::Rng;
 
 use rustc_target::abi::{Align, Size};
 
-const MAX_POOL_SIZE: usize = 64;
+use crate::{concurrency::VClock, MemoryKind, MiriConfig, ThreadId};
 
-// Just use fair coins, until we have evidence that other numbers are better.
-const ADDR_REMEMBER_CHANCE: f64 = 0.5;
-const ADDR_TAKE_CHANCE: f64 = 0.5;
+const MAX_POOL_SIZE: usize = 64;
 
 /// The pool strikes a balance between exploring more possible executions and making it more likely
 /// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
 /// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
 /// structure. Therefore we only reuse allocations when size and alignment match exactly.
 #[derive(Debug)]
 pub struct ReusePool {
+    address_reuse_rate: f64,
+    address_reuse_cross_thread_rate: f64,
     /// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
-    /// allocations as address-size pairs, the list must be sorted by the size.
+    /// allocations as address-size pairs, the list must be sorted by the size and then the thread ID.
     ///
     /// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
     /// less than 64 different possible value, that bounds the overall size of the pool.
-    pool: Vec<Vec<(u64, Size)>>,
+    ///
+    /// We also store the ID and the data-race clock of the thread that donated this pool element,
+    /// to ensure synchronization with the thread that picks up this address.
+    pool: Vec<Vec<(u64, Size, ThreadId, VClock)>>,
 }
 
 impl ReusePool {
-    pub fn new() -> Self {
-        ReusePool { pool: vec![] }
+    pub fn new(config: &MiriConfig) -> Self {
+        ReusePool {
+            address_reuse_rate: config.address_reuse_rate,
+            address_reuse_cross_thread_rate: config.address_reuse_cross_thread_rate,
+            pool: vec![],
+        }
     }
 
-    fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
+    fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size, ThreadId, VClock)> {
         let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
         if self.pool.len() <= pool_idx {
             self.pool.resize(pool_idx + 1, Vec::new());
         }
         &mut self.pool[pool_idx]
     }
 
-    pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
+    pub fn add_addr(
+        &mut self,
+        rng: &mut impl Rng,
+        addr: u64,
+        size: Size,
+        align: Align,
+        kind: MemoryKind,
+        thread: ThreadId,
+        clock: impl FnOnce() -> VClock,
+    ) {
         // Let's see if we even want to remember this address.
-        if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
+        // We don't remember stack addresses: there's a lot of them (so the perf impact is big),
+        // and we only want to reuse stack slots within the same thread or else we'll add a lot of
+        // undesired synchronization.
+        if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
             return;
         }
+        let clock = clock();
         // Determine the pool to add this to, and where in the pool to put it.
         let subpool = self.subpool(align);
-        let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
+        let pos = subpool.partition_point(|(_addr, other_size, other_thread, _)| {
+            (*other_size, *other_thread) < (size, thread)
+        });
         // Make sure the pool does not grow too big.
         if subpool.len() >= MAX_POOL_SIZE {
             // Pool full. Replace existing element, or last one if this would be even bigger.
             let clamped_pos = pos.min(subpool.len() - 1);
-            subpool[clamped_pos] = (addr, size);
+            subpool[clamped_pos] = (addr, size, thread, clock);
             return;
         }
         // Add address to pool, at the right position.
-        subpool.insert(pos, (addr, size));
+        subpool.insert(pos, (addr, size, thread, clock));
     }
 
-    pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
-        // Determine whether we'll even attempt a reuse.
-        if !rng.gen_bool(ADDR_TAKE_CHANCE) {
+    pub fn take_addr(
+        &mut self,
+        rng: &mut impl Rng,
+        size: Size,
+        align: Align,
+        kind: MemoryKind,
+        thread: ThreadId,
+    ) -> Option<(u64, VClock)> {
+        // Determine whether we'll even attempt a reuse. As above, we don't do reuse for stack addresses.
+        if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
             return None;
         }
+        let cross_thread_reuse = rng.gen_bool(self.address_reuse_cross_thread_rate);
         // Determine the pool to take this from.
         let subpool = self.subpool(align);
         // Let's see if we can find something of the right size. We want to find the full range of
-        // such items, beginning with the first, so we can't use `binary_search_by_key`.
-        let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
+        // such items, beginning with the first, so we can't use `binary_search_by_key`. If we do
+        // *not* want to consider other thread's allocations, we effectively use the lexicographic
+        // order on `(size, thread)`.
+        let begin = subpool.partition_point(|(_addr, other_size, other_thread, _)| {
+            *other_size < size
+                || (*other_size == size && !cross_thread_reuse && *other_thread < thread)
+        });
         let mut end = begin;
-        while let Some((_addr, other_size)) = subpool.get(end) {
+        while let Some((_addr, other_size, other_thread, _)) = subpool.get(end) {
             if *other_size != size {
                 break;
             }
+            if !cross_thread_reuse && *other_thread != thread {
+                // We entered the allocations of another thread.
+                break;
+            }
             end += 1;
         }
         if end == begin {
@@ -80,8 +119,9 @@ impl ReusePool {
         // Pick a random element with the desired size.
         let idx = rng.gen_range(begin..end);
         // Remove it from the pool and return.
-        let (chosen_addr, chosen_size) = subpool.remove(idx);
+        let (chosen_addr, chosen_size, chosen_thread, clock) = subpool.remove(idx);
         debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
-        Some(chosen_addr)
+        debug_assert!(cross_thread_reuse || chosen_thread == thread);
+        Some((chosen_addr, clock))
     }
 }