Skip to content

Commit

Permalink
Auto merge of #3475 - RalfJung:reduce-reuse-recycle, r=RalfJung
Browse files Browse the repository at this point in the history
Address reuse improvements and fixes

- when an address gets reused, establish a happens-before link in the data race model
- do not reuse stack addresses, and make the reuse rate configurable

Fixes #3450
  • Loading branch information
bors committed Apr 19, 2024
2 parents ccb0293 + 2155a30 commit 9b36914
Show file tree
Hide file tree
Showing 48 changed files with 321 additions and 126 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,16 @@ up the sysroot. If you are using `miri` (the Miri driver) directly, see the
Miri adds its own set of `-Z` flags, which are usually set via the `MIRIFLAGS`
environment variable. We first document the most relevant and most commonly used flags:

* `-Zmiri-address-reuse-rate=<rate>` changes the probability that a freed *non-stack* allocation
will be added to the pool for address reuse, and the probability that a new *non-stack* allocation
will be taken from the pool. Stack allocations never get added to or taken from the pool. The
default is `0.5`.
* `-Zmiri-address-reuse-cross-thread-rate=<rate>` changes the probability that an allocation which
attempts to reuse a previously freed block of memory will also consider blocks freed by *other
threads*. The default is `0.1`, which means by default, in 90% of the cases where an address reuse
attempt is made, only addresses from the same thread will be considered. Reusing an address from
another thread induces synchronization between those threads, which can mask data races and weak
memory bugs.
* `-Zmiri-compare-exchange-weak-failure-rate=<rate>` changes the failure rate of
`compare_exchange_weak` operations. The default is `0.8` (so 4 out of 5 weak ops will fail).
You can change it to any value between `0.0` and `1.0`, where `1.0` means it
Expand Down
59 changes: 40 additions & 19 deletions src/alloc_addresses/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ use rustc_span::Span;
use rustc_target::abi::{Align, HasDataLayout, Size};

use crate::*;
use reuse_pool::ReusePool;

use self::reuse_pool::ReusePool;

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ProvenanceMode {
Expand Down Expand Up @@ -77,7 +78,7 @@ impl GlobalStateInner {
GlobalStateInner {
int_to_ptr_map: Vec::default(),
base_addr: FxHashMap::default(),
reuse: ReusePool::new(),
reuse: ReusePool::new(config),
exposed: FxHashSet::default(),
next_base_addr: stack_addr,
provenance_mode: config.provenance_mode,
Expand Down Expand Up @@ -141,7 +142,11 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
}
}

fn addr_from_alloc_id(&self, alloc_id: AllocId, _kind: MemoryKind) -> InterpResult<'tcx, u64> {
fn addr_from_alloc_id(
&self,
alloc_id: AllocId,
memory_kind: MemoryKind,
) -> InterpResult<'tcx, u64> {
let ecx = self.eval_context_ref();
let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
let global_state = &mut *global_state;
Expand All @@ -159,9 +164,16 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
assert!(!matches!(kind, AllocKind::Dead));

// This allocation does not have a base address yet, pick or reuse one.
let base_addr = if let Some(reuse_addr) =
global_state.reuse.take_addr(&mut *rng, size, align)
{
let base_addr = if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
&mut *rng,
size,
align,
memory_kind,
ecx.get_active_thread(),
) {
if let Some(data_race) = &ecx.machine.data_race {
data_race.validate_lock_acquire(&clock, ecx.get_active_thread());
}
reuse_addr
} else {
// We have to pick a fresh address.
Expand Down Expand Up @@ -329,14 +341,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
}
}

impl GlobalStateInner {
pub fn free_alloc_id(
&mut self,
rng: &mut impl Rng,
dead_id: AllocId,
size: Size,
align: Align,
) {
impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
let global_state = self.alloc_addresses.get_mut();
let rng = self.rng.get_mut();

// We can *not* remove this from `base_addr`, since the interpreter design requires that we
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
Expand All @@ -349,15 +358,27 @@ impl GlobalStateInner {
// returns a dead allocation.
// To avoid a linear scan we first look up the address in `base_addr`, and then find it in
// `int_to_ptr_map`.
let addr = *self.base_addr.get(&dead_id).unwrap();
let pos = self.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
let removed = self.int_to_ptr_map.remove(pos);
let addr = *global_state.base_addr.get(&dead_id).unwrap();
let pos =
global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
let removed = global_state.int_to_ptr_map.remove(pos);
assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
// `alloc_id_from_addr` any more.
self.exposed.remove(&dead_id);
global_state.exposed.remove(&dead_id);
// Also remember this address for future reuse.
self.reuse.add_addr(rng, addr, size, align)
let thread = self.threads.get_active_thread_id();
global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
let mut clock = concurrency::VClock::default();
if let Some(data_race) = &self.data_race {
data_race.validate_lock_release(
&mut clock,
thread,
self.threads.active_thread_ref().current_span(),
);
}
clock
})
}
}

Expand Down
84 changes: 62 additions & 22 deletions src/alloc_addresses/reuse_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,73 +4,112 @@ use rand::Rng;

use rustc_target::abi::{Align, Size};

const MAX_POOL_SIZE: usize = 64;
use crate::{concurrency::VClock, MemoryKind, MiriConfig, ThreadId};

// Just use fair coins, until we have evidence that other numbers are better.
const ADDR_REMEMBER_CHANCE: f64 = 0.5;
const ADDR_TAKE_CHANCE: f64 = 0.5;
const MAX_POOL_SIZE: usize = 64;

/// The pool strikes a balance between exploring more possible executions and making it more likely
/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
/// structure. Therefore we only reuse allocations when size and alignment match exactly.
#[derive(Debug)]
pub struct ReusePool {
address_reuse_rate: f64,
address_reuse_cross_thread_rate: f64,
/// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
/// allocations as address-size pairs, the list must be sorted by the size.
/// allocations as address-size pairs, the list must be sorted by the size and then the thread ID.
///
/// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
/// less than 64 different possible value, that bounds the overall size of the pool.
pool: Vec<Vec<(u64, Size)>>,
///
/// We also store the ID and the data-race clock of the thread that donated this pool element,
/// to ensure synchronization with the thread that picks up this address.
pool: Vec<Vec<(u64, Size, ThreadId, VClock)>>,
}

impl ReusePool {
pub fn new() -> Self {
ReusePool { pool: vec![] }
pub fn new(config: &MiriConfig) -> Self {
ReusePool {
address_reuse_rate: config.address_reuse_rate,
address_reuse_cross_thread_rate: config.address_reuse_cross_thread_rate,
pool: vec![],
}
}

fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size, ThreadId, VClock)> {
let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
if self.pool.len() <= pool_idx {
self.pool.resize(pool_idx + 1, Vec::new());
}
&mut self.pool[pool_idx]
}

pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
pub fn add_addr(
&mut self,
rng: &mut impl Rng,
addr: u64,
size: Size,
align: Align,
kind: MemoryKind,
thread: ThreadId,
clock: impl FnOnce() -> VClock,
) {
// Let's see if we even want to remember this address.
if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
// We don't remember stack addresses: there's a lot of them (so the perf impact is big),
// and we only want to reuse stack slots within the same thread or else we'll add a lot of
// undesired synchronization.
if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
return;
}
let clock = clock();
// Determine the pool to add this to, and where in the pool to put it.
let subpool = self.subpool(align);
let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
let pos = subpool.partition_point(|(_addr, other_size, other_thread, _)| {
(*other_size, *other_thread) < (size, thread)
});
// Make sure the pool does not grow too big.
if subpool.len() >= MAX_POOL_SIZE {
// Pool full. Replace existing element, or last one if this would be even bigger.
let clamped_pos = pos.min(subpool.len() - 1);
subpool[clamped_pos] = (addr, size);
subpool[clamped_pos] = (addr, size, thread, clock);
return;
}
// Add address to pool, at the right position.
subpool.insert(pos, (addr, size));
subpool.insert(pos, (addr, size, thread, clock));
}

pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
// Determine whether we'll even attempt a reuse.
if !rng.gen_bool(ADDR_TAKE_CHANCE) {
pub fn take_addr(
&mut self,
rng: &mut impl Rng,
size: Size,
align: Align,
kind: MemoryKind,
thread: ThreadId,
) -> Option<(u64, VClock)> {
// Determine whether we'll even attempt a reuse. As above, we don't do reuse for stack addresses.
if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
return None;
}
let cross_thread_reuse = rng.gen_bool(self.address_reuse_cross_thread_rate);
// Determine the pool to take this from.
let subpool = self.subpool(align);
// Let's see if we can find something of the right size. We want to find the full range of
// such items, beginning with the first, so we can't use `binary_search_by_key`.
let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
// such items, beginning with the first, so we can't use `binary_search_by_key`. If we do
// *not* want to consider other thread's allocations, we effectively use the lexicographic
// order on `(size, thread)`.
let begin = subpool.partition_point(|(_addr, other_size, other_thread, _)| {
*other_size < size
|| (*other_size == size && !cross_thread_reuse && *other_thread < thread)
});
let mut end = begin;
while let Some((_addr, other_size)) = subpool.get(end) {
while let Some((_addr, other_size, other_thread, _)) = subpool.get(end) {
if *other_size != size {
break;
}
if !cross_thread_reuse && *other_thread != thread {
// We entered the allocations of another thread.
break;
}
end += 1;
}
if end == begin {
Expand All @@ -80,8 +119,9 @@ impl ReusePool {
// Pick a random element with the desired size.
let idx = rng.gen_range(begin..end);
// Remove it from the pool and return.
let (chosen_addr, chosen_size) = subpool.remove(idx);
let (chosen_addr, chosen_size, chosen_thread, clock) = subpool.remove(idx);
debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
Some(chosen_addr)
debug_assert!(cross_thread_reuse || chosen_thread == thread);
Some((chosen_addr, clock))
}
}
Loading

0 comments on commit 9b36914

Please sign in to comment.