diff --git a/alloc/Cargo.toml b/alloc/Cargo.toml
index 3960f71681264..612452a960a37 100644
--- a/alloc/Cargo.toml
+++ b/alloc/Cargo.toml
@@ -20,6 +20,10 @@ rand_xorshift = "0.3.0"
 name = "alloctests"
 path = "tests/lib.rs"
 
+[[test]]
+name = "vec_deque_alloc_error"
+path = "tests/vec_deque_alloc_error.rs"
+
 [[bench]]
 name = "allocbenches"
 path = "benches/lib.rs"
@@ -43,9 +47,6 @@ optimize_for_size = ["core/optimize_for_size"]
 
 [lints.rust.unexpected_cfgs]
 level = "warn"
-# x.py uses beta cargo, so `check-cfg` entries do not yet take effect
-# for rust-lang/rust. But for users of `-Zbuild-std` it does.
-# The unused warning is waiting for rust-lang/cargo#13925 to reach beta.
 check-cfg = [
     'cfg(bootstrap)',
     'cfg(no_global_oom_handling)',
diff --git a/alloc/src/alloc.rs b/alloc/src/alloc.rs
index 6677534eafc6e..1833a7f477f00 100644
--- a/alloc/src/alloc.rs
+++ b/alloc/src/alloc.rs
@@ -424,29 +424,3 @@ pub mod __alloc_error_handler {
         }
     }
 }
-
-#[cfg(not(no_global_oom_handling))]
-/// Specialize clones into pre-allocated, uninitialized memory.
-/// Used by `Box::clone` and `Rc`/`Arc::make_mut`.
-pub(crate) trait WriteCloneIntoRaw: Sized {
-    unsafe fn write_clone_into_raw(&self, target: *mut Self);
-}
-
-#[cfg(not(no_global_oom_handling))]
-impl<T: Clone> WriteCloneIntoRaw for T {
-    #[inline]
-    default unsafe fn write_clone_into_raw(&self, target: *mut Self) {
-        // Having allocated *first* may allow the optimizer to create
-        // the cloned value in-place, skipping the local and move.
-        unsafe { target.write(self.clone()) };
-    }
-}
-
-#[cfg(not(no_global_oom_handling))]
-impl<T: Copy> WriteCloneIntoRaw for T {
-    #[inline]
-    unsafe fn write_clone_into_raw(&self, target: *mut Self) {
-        // We can always copy in-place, without ever involving a local value.
-        unsafe { target.copy_from_nonoverlapping(self, 1) };
-    }
-}
diff --git a/alloc/src/boxed.rs b/alloc/src/boxed.rs
index 21d0050300170..f299aa0124dbe 100644
--- a/alloc/src/boxed.rs
+++ b/alloc/src/boxed.rs
@@ -145,8 +145,7 @@
 //! to `into_iter()` for boxed slices will defer to the slice implementation on editions before
 //! 2024:
 //!
-#![cfg_attr(bootstrap, doc = "```rust,edition2021,ignore")]
-#![cfg_attr(not(bootstrap), doc = "```rust,edition2021")]
+//! ```rust,edition2021
 //! // Rust 2015, 2018, and 2021:
 //!
 //! # #![allow(boxed_slice_into_iter)] // override our `deny(warnings)`
@@ -189,6 +188,8 @@
 use core::any::Any;
 use core::async_iter::AsyncIterator;
 use core::borrow;
+#[cfg(not(no_global_oom_handling))]
+use core::clone::CloneToUninit;
 use core::cmp::Ordering;
 use core::error::Error;
 use core::fmt;
@@ -208,7 +209,7 @@ use core::slice;
 use core::task::{Context, Poll};
 
 #[cfg(not(no_global_oom_handling))]
-use crate::alloc::{handle_alloc_error, WriteCloneIntoRaw};
+use crate::alloc::handle_alloc_error;
 use crate::alloc::{AllocError, Allocator, Global, Layout};
 #[cfg(not(no_global_oom_handling))]
 use crate::borrow::Cow;
@@ -1212,6 +1213,9 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
     /// let static_ref: &'static mut usize = Box::leak(x);
     /// *static_ref += 1;
     /// assert_eq!(*static_ref, 42);
+    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// # drop(unsafe { Box::from_raw(static_ref) });
     /// ```
     ///
     /// Unsized data:
@@ -1221,6 +1225,9 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
     /// let static_ref = Box::leak(x);
     /// static_ref[0] = 4;
     /// assert_eq!(*static_ref, [4, 2, 3]);
+    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// # drop(unsafe { Box::from_raw(static_ref) });
     /// ```
     #[stable(feature = "box_leak", since = "1.26.0")]
     #[inline]
@@ -1347,7 +1354,7 @@ impl<T: Clone, A: Allocator + Clone> Clone for Box<T, A> {
         // Pre-allocate memory to allow writing the cloned value directly.
         let mut boxed = Self::new_uninit_in(self.1.clone());
         unsafe {
-            (**self).write_clone_into_raw(boxed.as_mut_ptr());
+            (**self).clone_to_uninit(boxed.as_mut_ptr());
             boxed.assume_init()
         }
     }
@@ -2123,23 +2130,23 @@ impl<I> FromIterator<I> for Box<[I]> {
 
 /// This implementation is required to make sure that the `Box<[I]>: IntoIterator`
 /// implementation doesn't overlap with `IntoIterator for T where T: Iterator` blanket.
-#[stable(feature = "boxed_slice_into_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
 impl<I, A: Allocator> !Iterator for Box<[I], A> {}
 
 /// This implementation is required to make sure that the `&Box<[I]>: IntoIterator`
 /// implementation doesn't overlap with `IntoIterator for T where T: Iterator` blanket.
-#[stable(feature = "boxed_slice_into_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
 impl<'a, I, A: Allocator> !Iterator for &'a Box<[I], A> {}
 
 /// This implementation is required to make sure that the `&mut Box<[I]>: IntoIterator`
 /// implementation doesn't overlap with `IntoIterator for T where T: Iterator` blanket.
-#[stable(feature = "boxed_slice_into_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
 impl<'a, I, A: Allocator> !Iterator for &'a mut Box<[I], A> {}
 
 // Note: the `#[rustc_skip_during_method_dispatch(boxed_slice)]` on `trait IntoIterator`
 // hides this implementation from explicit `.into_iter()` calls on editions < 2024,
 // so those calls will still resolve to the slice implementation, by reference.
-#[stable(feature = "boxed_slice_into_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
 impl<I, A: Allocator> IntoIterator for Box<[I], A> {
     type IntoIter = vec::IntoIter<I, A>;
     type Item = I;
@@ -2148,7 +2155,7 @@ impl<I, A: Allocator> IntoIterator for Box<[I], A> {
     }
 }
 
-#[stable(feature = "boxed_slice_into_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
 impl<'a, I, A: Allocator> IntoIterator for &'a Box<[I], A> {
     type IntoIter = slice::Iter<'a, I>;
     type Item = &'a I;
@@ -2157,7 +2164,7 @@ impl<'a, I, A: Allocator> IntoIterator for &'a Box<[I], A> {
     }
 }
 
-#[stable(feature = "boxed_slice_into_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
 impl<'a, I, A: Allocator> IntoIterator for &'a mut Box<[I], A> {
     type IntoIter = slice::IterMut<'a, I>;
     type Item = &'a mut I;
@@ -2167,7 +2174,7 @@ impl<'a, I, A: Allocator> IntoIterator for &'a mut Box<[I], A> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "boxed_str_from_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_str_from_iter", since = "1.80.0")]
 impl FromIterator<char> for Box<str> {
     fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
         String::from_iter(iter).into_boxed_str()
@@ -2175,7 +2182,7 @@ impl FromIterator<char> for Box<str> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "boxed_str_from_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_str_from_iter", since = "1.80.0")]
 impl<'a> FromIterator<&'a char> for Box<str> {
     fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
         String::from_iter(iter).into_boxed_str()
@@ -2183,7 +2190,7 @@ impl<'a> FromIterator<&'a char> for Box<str> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "boxed_str_from_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_str_from_iter", since = "1.80.0")]
 impl<'a> FromIterator<&'a str> for Box<str> {
     fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
         String::from_iter(iter).into_boxed_str()
@@ -2191,7 +2198,7 @@ impl<'a> FromIterator<&'a str> for Box<str> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "boxed_str_from_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_str_from_iter", since = "1.80.0")]
 impl FromIterator<String> for Box<str> {
     fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
         String::from_iter(iter).into_boxed_str()
@@ -2199,7 +2206,7 @@ impl FromIterator<String> for Box<str> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "boxed_str_from_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_str_from_iter", since = "1.80.0")]
 impl<A: Allocator> FromIterator<Box<str, A>> for Box<str> {
     fn from_iter<T: IntoIterator<Item = Box<str, A>>>(iter: T) -> Self {
         String::from_iter(iter).into_boxed_str()
@@ -2207,7 +2214,7 @@ impl<A: Allocator> FromIterator<Box<str, A>> for Box<str> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "boxed_str_from_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_str_from_iter", since = "1.80.0")]
 impl<'a> FromIterator<Cow<'a, str>> for Box<str> {
     fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
         String::from_iter(iter).into_boxed_str()
@@ -2373,7 +2380,7 @@ impl dyn Error + Send {
         let err: Box<dyn Error> = self;
         <dyn Error>::downcast(err).map_err(|s| unsafe {
             // Reapply the `Send` marker.
-            Box::from_raw(Box::into_raw(s) as *mut (dyn Error + Send))
+            mem::transmute::<Box<dyn Error>, Box<dyn Error + Send>>(s)
         })
     }
 }
@@ -2386,8 +2393,8 @@ impl dyn Error + Send + Sync {
     pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<Self>> {
         let err: Box<dyn Error> = self;
         <dyn Error>::downcast(err).map_err(|s| unsafe {
-            // Reapply the `Send + Sync` marker.
-            Box::from_raw(Box::into_raw(s) as *mut (dyn Error + Send + Sync))
+            // Reapply the `Send + Sync` markers.
+            mem::transmute::<Box<dyn Error>, Box<dyn Error + Send + Sync>>(s)
         })
     }
 }
diff --git a/alloc/src/boxed/thin.rs b/alloc/src/boxed/thin.rs
index 8b145b67bf186..e9bfecba160a0 100644
--- a/alloc/src/boxed/thin.rs
+++ b/alloc/src/boxed/thin.rs
@@ -1,6 +1,7 @@
-// Based on
-// https://github.com/matthieu-m/rfc2580/blob/b58d1d3cba0d4b5e859d3617ea2d0943aaa31329/examples/thin.rs
-// by matthieu-m
+//! Based on
+//! <https://github.com/matthieu-m/rfc2580/blob/b58d1d3cba0d4b5e859d3617ea2d0943aaa31329/examples/thin.rs>
+//! by matthieu-m
+
 use crate::alloc::{self, Layout, LayoutError};
 use core::error::Error;
 use core::fmt::{self, Debug, Display, Formatter};
diff --git a/alloc/src/collections/binary_heap/mod.rs b/alloc/src/collections/binary_heap/mod.rs
index 846b9a1404d27..af01db19139e3 100644
--- a/alloc/src/collections/binary_heap/mod.rs
+++ b/alloc/src/collections/binary_heap/mod.rs
@@ -440,7 +440,7 @@ impl<T: Ord> BinaryHeap<T> {
     /// heap.push(4);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_const_unstable(feature = "const_binary_heap_constructor", issue = "112353")]
+    #[rustc_const_stable(feature = "const_binary_heap_constructor", since = "1.80.0")]
     #[must_use]
     pub const fn new() -> BinaryHeap<T> {
         BinaryHeap { data: vec![] }
@@ -484,7 +484,7 @@ impl<T: Ord, A: Allocator> BinaryHeap<T, A> {
     /// heap.push(4);
     /// ```
     #[unstable(feature = "allocator_api", issue = "32838")]
-    #[rustc_const_unstable(feature = "const_binary_heap_constructor", issue = "112353")]
+    #[rustc_const_unstable(feature = "const_binary_heap_new_in", issue = "112353")]
     #[must_use]
     pub const fn new_in(alloc: A) -> BinaryHeap<T, A> {
         BinaryHeap { data: Vec::new_in(alloc) }
@@ -1213,7 +1213,6 @@ impl<T, A: Allocator> BinaryHeap<T, A> {
     /// Basic usage:
     ///
     /// ```
-    /// #![feature(binary_heap_as_slice)]
     /// use std::collections::BinaryHeap;
     /// use std::io::{self, Write};
     ///
@@ -1222,7 +1221,7 @@ impl<T, A: Allocator> BinaryHeap<T, A> {
     /// io::sink().write(heap.as_slice()).unwrap();
     /// ```
     #[must_use]
-    #[unstable(feature = "binary_heap_as_slice", issue = "83659")]
+    #[stable(feature = "binary_heap_as_slice", since = "1.80.0")]
     pub fn as_slice(&self) -> &[T] {
         self.data.as_slice()
     }
diff --git a/alloc/src/collections/btree/map/tests.rs b/alloc/src/collections/btree/map/tests.rs
index 56620cf890db7..ba1f38dcc3e52 100644
--- a/alloc/src/collections/btree/map/tests.rs
+++ b/alloc/src/collections/btree/map/tests.rs
@@ -1796,18 +1796,18 @@ fn test_ord_absence() {
     }
 
     fn map_debug<K: Debug>(mut map: BTreeMap<K, ()>) {
-        format!("{map:?}");
-        format!("{:?}", map.iter());
-        format!("{:?}", map.iter_mut());
-        format!("{:?}", map.keys());
-        format!("{:?}", map.values());
-        format!("{:?}", map.values_mut());
+        let _ = format!("{map:?}");
+        let _ = format!("{:?}", map.iter());
+        let _ = format!("{:?}", map.iter_mut());
+        let _ = format!("{:?}", map.keys());
+        let _ = format!("{:?}", map.values());
+        let _ = format!("{:?}", map.values_mut());
         if true {
-            format!("{:?}", map.into_iter());
+            let _ = format!("{:?}", map.into_iter());
         } else if true {
-            format!("{:?}", map.into_keys());
+            let _ = format!("{:?}", map.into_keys());
         } else {
-            format!("{:?}", map.into_values());
+            let _ = format!("{:?}", map.into_values());
         }
     }
 
diff --git a/alloc/src/collections/btree/set/tests.rs b/alloc/src/collections/btree/set/tests.rs
index 688ce57e9da6a..48bf767413835 100644
--- a/alloc/src/collections/btree/set/tests.rs
+++ b/alloc/src/collections/btree/set/tests.rs
@@ -705,9 +705,9 @@ fn test_ord_absence() {
     }
 
     fn set_debug<K: Debug>(set: BTreeSet<K>) {
-        format!("{set:?}");
-        format!("{:?}", set.iter());
-        format!("{:?}", set.into_iter());
+        let _ = format!("{set:?}");
+        let _ = format!("{:?}", set.iter());
+        let _ = format!("{:?}", set.into_iter());
     }
 
     fn set_clone<K: Clone>(mut set: BTreeSet<K>) {
diff --git a/alloc/src/collections/linked_list.rs b/alloc/src/collections/linked_list.rs
index 1c90c171a155b..077483a174b10 100644
--- a/alloc/src/collections/linked_list.rs
+++ b/alloc/src/collections/linked_list.rs
@@ -1495,6 +1495,14 @@ impl<'a, T, A: Allocator> Cursor<'a, T, A> {
     pub fn back(&self) -> Option<&'a T> {
         self.list.back()
     }
+
+    /// Provides a reference to the cursor's parent list.
+    #[must_use]
+    #[inline(always)]
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn as_list(&self) -> &'a LinkedList<T, A> {
+        self.list
+    }
 }
 
 impl<'a, T, A: Allocator> CursorMut<'a, T, A> {
@@ -1605,6 +1613,18 @@ impl<'a, T, A: Allocator> CursorMut<'a, T, A> {
     pub fn as_cursor(&self) -> Cursor<'_, T, A> {
         Cursor { list: self.list, current: self.current, index: self.index }
     }
+
+    /// Provides a read-only reference to the cursor's parent list.
+    ///
+    /// The lifetime of the returned reference is bound to that of the
+    /// `CursorMut`, which means it cannot outlive the `CursorMut` and that the
+    /// `CursorMut` is frozen for the lifetime of the reference.
+    #[must_use]
+    #[inline(always)]
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn as_list(&self) -> &LinkedList<T, A> {
+        self.list
+    }
 }
 
 // Now the list editing operations
@@ -1705,7 +1725,7 @@ impl<'a, T, A: Allocator> CursorMut<'a, T, A> {
         unsafe {
             self.current = unlinked_node.as_ref().next;
             self.list.unlink_node(unlinked_node);
-            let unlinked_node = Box::from_raw(unlinked_node.as_ptr());
+            let unlinked_node = Box::from_raw_in(unlinked_node.as_ptr(), &self.list.alloc);
             Some(unlinked_node.element)
         }
     }
@@ -1946,7 +1966,7 @@ where
                 if (self.pred)(&mut node.as_mut().element) {
                     // `unlink_node` is okay with aliasing `element` references.
                     self.list.unlink_node(node);
-                    return Some(Box::from_raw(node.as_ptr()).element);
+                    return Some(Box::from_raw_in(node.as_ptr(), &self.list.alloc).element);
                 }
             }
         }
diff --git a/alloc/src/collections/linked_list/tests.rs b/alloc/src/collections/linked_list/tests.rs
index 8dcd59d12d927..d3744c5a9d0c9 100644
--- a/alloc/src/collections/linked_list/tests.rs
+++ b/alloc/src/collections/linked_list/tests.rs
@@ -1164,3 +1164,42 @@ fn test_drop_panic() {
 
     assert_eq!(unsafe { DROPS }, 8);
 }
+
+#[test]
+fn test_allocator() {
+    use core::alloc::AllocError;
+    use core::alloc::Allocator;
+    use core::alloc::Layout;
+    use core::cell::Cell;
+
+    struct A {
+        has_allocated: Cell<bool>,
+        has_deallocated: Cell<bool>,
+    }
+
+    unsafe impl Allocator for A {
+        fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
+            assert!(!self.has_allocated.get());
+            self.has_allocated.set(true);
+
+            Global.allocate(layout)
+        }
+
+        unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
+            assert!(!self.has_deallocated.get());
+            self.has_deallocated.set(true);
+
+            unsafe { Global.deallocate(ptr, layout) }
+        }
+    }
+
+    let alloc = &A { has_allocated: Cell::new(false), has_deallocated: Cell::new(false) };
+    {
+        let mut list = LinkedList::new_in(alloc);
+        list.push_back(5u32);
+        list.remove(0);
+    }
+
+    assert!(alloc.has_allocated.get());
+    assert!(alloc.has_deallocated.get());
+}
diff --git a/alloc/src/collections/vec_deque/into_iter.rs b/alloc/src/collections/vec_deque/into_iter.rs
index 692af7c197a30..4747517393c66 100644
--- a/alloc/src/collections/vec_deque/into_iter.rs
+++ b/alloc/src/collections/vec_deque/into_iter.rs
@@ -132,7 +132,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
     fn next_chunk<const N: usize>(
         &mut self,
     ) -> Result<[Self::Item; N], array::IntoIter<Self::Item, N>> {
-        let mut raw_arr = MaybeUninit::uninit_array();
+        let mut raw_arr = [const { MaybeUninit::uninit() }; N];
         let raw_arr_ptr = raw_arr.as_mut_ptr().cast();
         let (head, tail) = self.inner.as_slices();
 
diff --git a/alloc/src/collections/vec_deque/mod.rs b/alloc/src/collections/vec_deque/mod.rs
index 4643a6bbe2ecd..a07f250d7d88c 100644
--- a/alloc/src/collections/vec_deque/mod.rs
+++ b/alloc/src/collections/vec_deque/mod.rs
@@ -164,6 +164,20 @@ impl<T, A: Allocator> VecDeque<T, A> {
         self.buf.ptr()
     }
 
+    /// Appends an element to the buffer.
+    ///
+    /// # Safety
+    ///
+    /// May only be called if `deque.len() < deque.capacity()`
+    #[inline]
+    unsafe fn push_unchecked(&mut self, element: T) {
+        // SAFETY: Because of the precondition, it's guaranteed that there is space
+        // in the logical array after the last element.
+        unsafe { self.buffer_write(self.to_physical_idx(self.len), element) };
+        // This can't overflow because `deque.len() < deque.capacity() <= usize::MAX`.
+        self.len += 1;
+    }
+
     /// Moves an element out of the buffer
     #[inline]
     unsafe fn buffer_read(&mut self, off: usize) -> T {
@@ -982,6 +996,8 @@ impl<T, A: Allocator> VecDeque<T, A> {
         // `head` and `len` are at most `isize::MAX` and `target_cap < self.capacity()`, so nothing can
         // overflow.
         let tail_outside = (target_cap + 1..=self.capacity()).contains(&(self.head + self.len));
+        // Used in the drop guard below.
+        let old_head = self.head;
 
         if self.len == 0 {
             self.head = 0;
@@ -1034,12 +1050,74 @@ impl<T, A: Allocator> VecDeque<T, A> {
             }
             self.head = new_head;
         }
-        self.buf.shrink_to_fit(target_cap);
+
+        struct Guard<'a, T, A: Allocator> {
+            deque: &'a mut VecDeque<T, A>,
+            old_head: usize,
+            target_cap: usize,
+        }
+
+        impl<T, A: Allocator> Drop for Guard<'_, T, A> {
+            #[cold]
+            fn drop(&mut self) {
+                unsafe {
+                    // SAFETY: This is only called if `buf.shrink_to_fit` unwinds,
+                    // which is the only time it's safe to call `abort_shrink`.
+                    self.deque.abort_shrink(self.old_head, self.target_cap)
+                }
+            }
+        }
+
+        let guard = Guard { deque: self, old_head, target_cap };
+
+        guard.deque.buf.shrink_to_fit(target_cap);
+
+        // Don't drop the guard if we didn't unwind.
+        mem::forget(guard);
 
         debug_assert!(self.head < self.capacity() || self.capacity() == 0);
         debug_assert!(self.len <= self.capacity());
     }
 
+    /// Reverts the deque back into a consistent state in case `shrink_to` failed.
+    /// This is necessary to prevent UB if the backing allocator returns an error
+    /// from `shrink` and `handle_alloc_error` subsequently unwinds (see #123369).
+    ///
+    /// `old_head` refers to the head index before `shrink_to` was called. `target_cap`
+    /// is the capacity that it was trying to shrink to.
+    unsafe fn abort_shrink(&mut self, old_head: usize, target_cap: usize) {
+        // Moral equivalent of self.head + self.len <= target_cap. Won't overflow
+        // because `self.len <= target_cap`.
+        if self.head <= target_cap - self.len {
+            // The deque's buffer is contiguous, so no need to copy anything around.
+            return;
+        }
+
+        // `shrink_to` already copied the head to fit into the new capacity, so this won't overflow.
+        let head_len = target_cap - self.head;
+        // `self.head > target_cap - self.len` => `self.len > target_cap - self.head =: head_len` so this must be positive.
+        let tail_len = self.len - head_len;
+
+        if tail_len <= cmp::min(head_len, self.capacity() - target_cap) {
+            // There's enough spare capacity to copy the tail to the back (because `tail_len < self.capacity() - target_cap`),
+            // and copying the tail should be cheaper than copying the head (because `tail_len <= head_len`).
+
+            unsafe {
+                // The old tail and the new tail can't overlap because the head slice lies between them. The
+                // head slice ends at `target_cap`, so that's where we copy to.
+                self.copy_nonoverlapping(0, target_cap, tail_len);
+            }
+        } else {
+            // Either there's not enough spare capacity to make the deque contiguous, or the head is shorter than the tail
+            // (and therefore hopefully cheaper to copy).
+            unsafe {
+                // The old and the new head slice can overlap, so we can't use `copy_nonoverlapping` here.
+                self.copy(self.head, old_head, head_len);
+                self.head = old_head;
+            }
+        }
+    }
+
     /// Shortens the deque, keeping the first `len` elements and dropping
     /// the rest.
     ///
@@ -2847,6 +2925,14 @@ impl<T, A: Allocator> Extend<T> for VecDeque<T, A> {
     fn extend_reserve(&mut self, additional: usize) {
         self.reserve(additional);
     }
+
+    #[inline]
+    unsafe fn extend_one_unchecked(&mut self, item: T) {
+        // SAFETY: Our preconditions ensure the space has been reserved, and `extend_reserve` is implemented correctly.
+        unsafe {
+            self.push_unchecked(item);
+        }
+    }
 }
 
 #[stable(feature = "extend_ref", since = "1.2.0")]
@@ -2864,6 +2950,14 @@ impl<'a, T: 'a + Copy, A: Allocator> Extend<&'a T> for VecDeque<T, A> {
     fn extend_reserve(&mut self, additional: usize) {
         self.reserve(additional);
     }
+
+    #[inline]
+    unsafe fn extend_one_unchecked(&mut self, &item: &'a T) {
+        // SAFETY: Our preconditions ensure the space has been reserved, and `extend_reserve` is implemented correctly.
+        unsafe {
+            self.push_unchecked(item);
+        }
+    }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/alloc/src/collections/vec_deque/spec_extend.rs b/alloc/src/collections/vec_deque/spec_extend.rs
index dccf40ccb38aa..6a89abc3ef9b6 100644
--- a/alloc/src/collections/vec_deque/spec_extend.rs
+++ b/alloc/src/collections/vec_deque/spec_extend.rs
@@ -21,21 +21,12 @@ where
         //     self.push_back(item);
         // }
 
-        // May only be called if `deque.len() < deque.capacity()`
-        unsafe fn push_unchecked<T, A: Allocator>(deque: &mut VecDeque<T, A>, element: T) {
-            // SAFETY: Because of the precondition, it's guaranteed that there is space
-            // in the logical array after the last element.
-            unsafe { deque.buffer_write(deque.to_physical_idx(deque.len), element) };
-            // This can't overflow because `deque.len() < deque.capacity() <= usize::MAX`.
-            deque.len += 1;
-        }
-
         while let Some(element) = iter.next() {
             let (lower, _) = iter.size_hint();
             self.reserve(lower.saturating_add(1));
 
             // SAFETY: We just reserved space for at least one element.
-            unsafe { push_unchecked(self, element) };
+            unsafe { self.push_unchecked(element) };
 
             // Inner loop to avoid repeatedly calling `reserve`.
             while self.len < self.capacity() {
@@ -43,7 +34,7 @@ where
                     return;
                 };
                 // SAFETY: The loop condition guarantees that `self.len() < self.capacity()`.
-                unsafe { push_unchecked(self, element) };
+                unsafe { self.push_unchecked(element) };
             }
         }
     }
diff --git a/alloc/src/ffi/c_str.rs b/alloc/src/ffi/c_str.rs
index b13af93d06c57..f1eb195b88462 100644
--- a/alloc/src/ffi/c_str.rs
+++ b/alloc/src/ffi/c_str.rs
@@ -911,7 +911,7 @@ impl From<&CStr> for Rc<CStr> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "more_rc_default_impls", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "more_rc_default_impls", since = "1.80.0")]
 impl Default for Rc<CStr> {
     /// Creates an empty CStr inside an Rc
     ///
diff --git a/alloc/src/fmt.rs b/alloc/src/fmt.rs
index ae44cab8131b5..c6bba619ae646 100644
--- a/alloc/src/fmt.rs
+++ b/alloc/src/fmt.rs
@@ -12,6 +12,7 @@
 //! Some examples of the [`format!`] extension are:
 //!
 //! ```
+//! # #![allow(unused_must_use)]
 //! format!("Hello");                 // => "Hello"
 //! format!("Hello, {}!", "world");   // => "Hello, world!"
 //! format!("The number is {}", 1);   // => "The number is 1"
@@ -50,6 +51,7 @@
 //! the iterator advances. This leads to behavior like this:
 //!
 //! ```
+//! # #![allow(unused_must_use)]
 //! format!("{1} {} {0} {}", 1, 2); // => "2 1 1 2"
 //! ```
 //!
@@ -77,6 +79,7 @@
 //! For example, the following [`format!`] expressions all use named arguments:
 //!
 //! ```
+//! # #![allow(unused_must_use)]
 //! format!("{argument}", argument = "test");   // => "test"
 //! format!("{name} {}", 1, name = 2);          // => "2 1"
 //! format!("{a} {c} {b}", a="a", b='b', c=3);  // => "a 3 b"
@@ -86,6 +89,7 @@
 //! reference a variable with that name in the current scope.
 //!
 //! ```
+//! # #![allow(unused_must_use)]
 //! let argument = 2 + 2;
 //! format!("{argument}");   // => "4"
 //!
diff --git a/alloc/src/lib.rs b/alloc/src/lib.rs
index 4ac0c9b15be7a..a7715740cbd8f 100644
--- a/alloc/src/lib.rs
+++ b/alloc/src/lib.rs
@@ -103,6 +103,7 @@
 #![feature(assert_matches)]
 #![feature(async_fn_traits)]
 #![feature(async_iterator)]
+#![feature(clone_to_uninit)]
 #![feature(coerce_unsized)]
 #![feature(const_align_of_val)]
 #![feature(const_box)]
@@ -121,13 +122,12 @@
 #![feature(deref_pure_trait)]
 #![feature(dispatch_from_dyn)]
 #![feature(error_generic_member_access)]
-#![feature(error_in_core)]
 #![feature(exact_size_is_empty)]
 #![feature(extend_one)]
+#![feature(extend_one_unchecked)]
 #![feature(fmt_internals)]
 #![feature(fn_traits)]
 #![feature(hasher_prefixfree_extras)]
-#![feature(hint_assert_unchecked)]
 #![feature(inplace_iteration)]
 #![feature(iter_advance_by)]
 #![feature(iter_next_chunk)]
@@ -135,7 +135,6 @@
 #![feature(layout_for_ptr)]
 #![feature(local_waker)]
 #![feature(maybe_uninit_slice)]
-#![feature(maybe_uninit_uninit_array)]
 #![feature(maybe_uninit_uninit_array_transpose)]
 #![feature(panic_internals)]
 #![feature(pattern)]
@@ -166,18 +165,16 @@
 //
 // Language features:
 // tidy-alphabetical-start
-#![cfg_attr(bootstrap, feature(exclusive_range_pattern))]
+#![cfg_attr(bootstrap, feature(c_unwind))]
 #![cfg_attr(not(test), feature(coroutine_trait))]
 #![cfg_attr(test, feature(panic_update_hook))]
 #![cfg_attr(test, feature(test))]
 #![feature(allocator_internals)]
 #![feature(allow_internal_unstable)]
-#![feature(c_unwind)]
 #![feature(cfg_sanitize)]
 #![feature(const_mut_refs)]
 #![feature(const_precise_live_drops)]
 #![feature(const_ptr_write)]
-#![feature(const_trait_impl)]
 #![feature(const_try)]
 #![feature(decl_macro)]
 #![feature(dropck_eyepatch)]
@@ -260,6 +257,7 @@ pub mod vec;
 #[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")]
 pub mod __export {
     pub use core::format_args;
+    pub use core::hint::must_use;
 }
 
 #[cfg(test)]
diff --git a/alloc/src/macros.rs b/alloc/src/macros.rs
index 0f767df6063a3..8c6a367869ce0 100644
--- a/alloc/src/macros.rs
+++ b/alloc/src/macros.rs
@@ -41,18 +41,18 @@
 #[allow_internal_unstable(rustc_attrs, liballoc_internals)]
 macro_rules! vec {
     () => (
-        $crate::__rust_force_expr!($crate::vec::Vec::new())
+        $crate::vec::Vec::new()
     );
     ($elem:expr; $n:expr) => (
-        $crate::__rust_force_expr!($crate::vec::from_elem($elem, $n))
+        $crate::vec::from_elem($elem, $n)
     );
     ($($x:expr),+ $(,)?) => (
-        $crate::__rust_force_expr!(<[_]>::into_vec(
+        <[_]>::into_vec(
             // This rustc_box is not required, but it produces a dramatic improvement in compile
             // time when constructing arrays with many elements.
             #[rustc_box]
             $crate::boxed::Box::new([$($x),+])
-        ))
+        )
     );
 }
 
@@ -111,6 +111,7 @@ macro_rules! vec {
 /// # Examples
 ///
 /// ```
+/// # #![allow(unused_must_use)]
 /// format!("test");                             // => "test"
 /// format!("hello {}", "world!");               // => "hello world!"
 /// format!("x = {}, y = {val}", 10, val = 30);  // => "x = 10, y = 30"
@@ -119,20 +120,13 @@ macro_rules! vec {
 /// ```
 #[macro_export]
 #[stable(feature = "rust1", since = "1.0.0")]
+#[allow_internal_unstable(hint_must_use, liballoc_internals)]
 #[cfg_attr(not(test), rustc_diagnostic_item = "format_macro")]
 macro_rules! format {
-    ($($arg:tt)*) => {{
-        let res = $crate::fmt::format($crate::__export::format_args!($($arg)*));
-        res
-    }}
-}
-
-/// Force AST node to an expression to improve diagnostics in pattern position.
-#[doc(hidden)]
-#[macro_export]
-#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")]
-macro_rules! __rust_force_expr {
-    ($e:expr) => {
-        $e
-    };
+    ($($arg:tt)*) => {
+        $crate::__export::must_use({
+            let res = $crate::fmt::format($crate::__export::format_args!($($arg)*));
+            res
+        })
+    }
 }
diff --git a/alloc/src/raw_vec.rs b/alloc/src/raw_vec.rs
index 1134c7f833e2b..7b7dae5a057f0 100644
--- a/alloc/src/raw_vec.rs
+++ b/alloc/src/raw_vec.rs
@@ -429,6 +429,7 @@ impl<T, A: Allocator> RawVec<T, A> {
     ///
     /// Aborts on OOM.
     #[cfg(not(no_global_oom_handling))]
+    #[inline]
     pub fn shrink_to_fit(&mut self, cap: usize) {
         if let Err(err) = self.shrink(cap) {
             handle_error(err);
@@ -511,9 +512,25 @@ impl<T, A: Allocator> RawVec<T, A> {
     }
 
     #[cfg(not(no_global_oom_handling))]
+    #[inline]
     fn shrink(&mut self, cap: usize) -> Result<(), TryReserveError> {
         assert!(cap <= self.capacity(), "Tried to shrink to a larger capacity");
+        // SAFETY: Just checked this isn't trying to grow
+        unsafe { self.shrink_unchecked(cap) }
+    }
 
+    /// `shrink`, but without the capacity check.
+    ///
+    /// This is split out so that `shrink` can inline the check, since it
+    /// optimizes out in things like `shrink_to_fit`, without needing to
+    /// also inline all this code, as doing that ends up failing the
+    /// `vec-shrink-panic` codegen test when `shrink_to_fit` ends up being too
+    /// big for LLVM to be willing to inline.
+    ///
+    /// # Safety
+    /// `cap <= self.capacity()`
+    #[cfg(not(no_global_oom_handling))]
+    unsafe fn shrink_unchecked(&mut self, cap: usize) -> Result<(), TryReserveError> {
         let (ptr, layout) = if let Some(mem) = self.current_memory() { mem } else { return Ok(()) };
         // See current_memory() why this assert is here
         const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
diff --git a/alloc/src/rc.rs b/alloc/src/rc.rs
index 875c24c28e4a9..9982c8ea6dcbe 100644
--- a/alloc/src/rc.rs
+++ b/alloc/src/rc.rs
@@ -249,6 +249,8 @@ use std::boxed::Box;
 use core::any::Any;
 use core::borrow;
 use core::cell::Cell;
+#[cfg(not(no_global_oom_handling))]
+use core::clone::CloneToUninit;
 use core::cmp::Ordering;
 use core::fmt;
 use core::hash::{Hash, Hasher};
@@ -257,8 +259,6 @@ use core::intrinsics::abort;
 #[cfg(not(no_global_oom_handling))]
 use core::iter;
 use core::marker::{PhantomData, Unsize};
-#[cfg(not(no_global_oom_handling))]
-use core::mem::size_of_val;
 use core::mem::{self, align_of_val_raw, forget, ManuallyDrop};
 use core::ops::{CoerceUnsized, Deref, DerefMut, DerefPure, DispatchFromDyn, Receiver};
 use core::panic::{RefUnwindSafe, UnwindSafe};
@@ -270,8 +270,6 @@ use core::slice::from_raw_parts_mut;
 
 #[cfg(not(no_global_oom_handling))]
 use crate::alloc::handle_alloc_error;
-#[cfg(not(no_global_oom_handling))]
-use crate::alloc::WriteCloneIntoRaw;
 use crate::alloc::{AllocError, Allocator, Global, Layout};
 use crate::borrow::{Cow, ToOwned};
 #[cfg(not(no_global_oom_handling))]
@@ -667,16 +665,6 @@ impl<T> Rc<T> {
 }
 
 impl<T, A: Allocator> Rc<T, A> {
-    /// Returns a reference to the underlying allocator.
-    ///
-    /// Note: this is an associated function, which means that you have
-    /// to call it as `Rc::allocator(&r)` instead of `r.allocator()`. This
-    /// is so that there is no conflict with a method on the inner type.
-    #[inline]
-    #[unstable(feature = "allocator_api", issue = "32838")]
-    pub fn allocator(this: &Self) -> &A {
-        &this.alloc
-    }
     /// Constructs a new `Rc` in the provided allocator.
     ///
     /// # Examples
@@ -1289,6 +1277,8 @@ impl<T: ?Sized> Rc<T> {
     ///
     ///     let five = Rc::from_raw(ptr);
     ///     assert_eq!(2, Rc::strong_count(&five));
+    /// #   // Prevent leaks for Miri.
+    /// #   Rc::decrement_strong_count(ptr);
     /// }
     /// ```
     #[inline]
@@ -1333,6 +1323,17 @@ impl<T: ?Sized> Rc<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Rc<T, A> {
+    /// Returns a reference to the underlying allocator.
+    ///
+    /// Note: this is an associated function, which means that you have
+    /// to call it as `Rc::allocator(&r)` instead of `r.allocator()`. This
+    /// is so that there is no conflict with a method on the inner type.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(this: &Self) -> &A {
+        &this.alloc
+    }
+
     /// Consumes the `Rc`, returning the wrapped pointer.
     ///
     /// To avoid a memory leak the pointer must be converted back to an `Rc` using
@@ -1346,6 +1347,8 @@ impl<T: ?Sized, A: Allocator> Rc<T, A> {
     /// let x = Rc::new("hello".to_owned());
     /// let x_ptr = Rc::into_raw(x);
     /// assert_eq!(unsafe { &*x_ptr }, "hello");
+    /// # // Prevent leaks for Miri.
+    /// # drop(unsafe { Rc::from_raw(x_ptr) });
     /// ```
     #[must_use = "losing the pointer will leak memory"]
     #[stable(feature = "rc_raw", since = "1.17.0")]
@@ -1573,6 +1576,8 @@ impl<T: ?Sized, A: Allocator> Rc<T, A> {
     ///
     ///     let five = Rc::from_raw_in(ptr, System);
     ///     assert_eq!(2, Rc::strong_count(&five));
+    /// #   // Prevent leaks for Miri.
+    /// #   Rc::decrement_strong_count_in(ptr, System);
     /// }
     /// ```
     #[inline]
@@ -1751,7 +1756,8 @@ impl<T: ?Sized, A: Allocator> Rc<T, A> {
     }
 }
 
-impl<T: Clone, A: Allocator + Clone> Rc<T, A> {
+#[cfg(not(no_global_oom_handling))]
+impl<T: ?Sized + CloneToUninit, A: Allocator + Clone> Rc<T, A> {
     /// Makes a mutable reference into the given `Rc`.
     ///
     /// If there are other `Rc` pointers to the same allocation, then `make_mut` will
@@ -1802,31 +1808,52 @@ impl<T: Clone, A: Allocator + Clone> Rc<T, A> {
     /// assert!(76 == *data);
     /// assert!(weak.upgrade().is_none());
     /// ```
-    #[cfg(not(no_global_oom_handling))]
     #[inline]
     #[stable(feature = "rc_unique", since = "1.4.0")]
     pub fn make_mut(this: &mut Self) -> &mut T {
+        let size_of_val = size_of_val::<T>(&**this);
+
         if Rc::strong_count(this) != 1 {
             // Gotta clone the data, there are other Rcs.
-            // Pre-allocate memory to allow writing the cloned value directly.
-            let mut rc = Self::new_uninit_in(this.alloc.clone());
-            unsafe {
-                let data = Rc::get_mut_unchecked(&mut rc);
-                (**this).write_clone_into_raw(data.as_mut_ptr());
-                *this = rc.assume_init();
-            }
+
+            let this_data_ref: &T = &**this;
+            // `in_progress` drops the allocation if we panic before finishing initializing it.
+            let mut in_progress: UniqueRcUninit<T, A> =
+                UniqueRcUninit::new(this_data_ref, this.alloc.clone());
+
+            // Initialize with clone of this.
+            let initialized_clone = unsafe {
+                // Clone. If the clone panics, `in_progress` will be dropped and clean up.
+                this_data_ref.clone_to_uninit(in_progress.data_ptr());
+                // Cast type of pointer, now that it is initialized.
+                in_progress.into_rc()
+            };
+
+            // Replace `this` with newly constructed Rc.
+            *this = initialized_clone;
         } else if Rc::weak_count(this) != 0 {
             // Can just steal the data, all that's left is Weaks
-            let mut rc = Self::new_uninit_in(this.alloc.clone());
+
+            // We don't need panic-protection like the above branch does, but we might as well
+            // use the same mechanism.
+            let mut in_progress: UniqueRcUninit<T, A> =
+                UniqueRcUninit::new(&**this, this.alloc.clone());
             unsafe {
-                let data = Rc::get_mut_unchecked(&mut rc);
-                data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
+                // Initialize `in_progress` with move of **this.
+                // We have to express this in terms of bytes because `T: ?Sized`; there is no
+                // operation that just copies a value based on its `size_of_val()`.
+                ptr::copy_nonoverlapping(
+                    ptr::from_ref(&**this).cast::<u8>(),
+                    in_progress.data_ptr().cast::<u8>(),
+                    size_of_val,
+                );
 
                 this.inner().dec_strong();
                 // Remove implicit strong-weak ref (no need to craft a fake
                 // Weak here -- we know other Weaks can clean up for us)
                 this.inner().dec_weak();
-                ptr::write(this, rc.assume_init());
+                // Replace `this` with newly constructed Rc that has the moved data.
+                ptr::write(this, in_progress.into_rc());
             }
         }
         // This unsafety is ok because we're guaranteed that the pointer
@@ -2252,7 +2279,7 @@ impl<T: Default> Default for Rc<T> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "more_rc_default_impls", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "more_rc_default_impls", since = "1.80.0")]
 impl Default for Rc<str> {
     /// Creates an empty str inside an Rc
     ///
@@ -2264,7 +2291,7 @@ impl Default for Rc<str> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "more_rc_default_impls", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "more_rc_default_impls", since = "1.80.0")]
 impl<T> Default for Rc<[T]> {
     /// Creates an empty `[T]` inside an Rc
     ///
@@ -2974,6 +3001,13 @@ impl<T: ?Sized> Weak<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Weak<T, A> {
+    /// Returns a reference to the underlying allocator.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(&self) -> &A {
+        &self.alloc
+    }
+
     /// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
     ///
     /// The pointer is valid only if there are some strong references. The pointer may be dangling,
@@ -3518,7 +3552,7 @@ fn data_offset_align(align: usize) -> usize {
     layout.size() + layout.padding_needed_for(align)
 }
 
-/// A uniquely owned `Rc`
+/// A uniquely owned [`Rc`].
 ///
 /// This represents an `Rc` that is known to be uniquely owned -- that is, have exactly one strong
 /// reference. Multiple weak pointers can be created, but attempts to upgrade those to strong
@@ -3556,13 +3590,24 @@ fn data_offset_align(align: usize) -> usize {
 /// including fallible or async constructors.
 #[unstable(feature = "unique_rc_arc", issue = "112566")]
 #[derive(Debug)]
-pub struct UniqueRc<T> {
+pub struct UniqueRc<
+    T: ?Sized,
+    #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global,
+> {
     ptr: NonNull<RcBox<T>>,
     phantom: PhantomData<RcBox<T>>,
+    alloc: A,
+}
+
+#[unstable(feature = "unique_rc_arc", issue = "112566")]
+impl<T: ?Sized + Unsize<U>, U: ?Sized, A: Allocator> CoerceUnsized<UniqueRc<U, A>>
+    for UniqueRc<T, A>
+{
 }
 
+// Depends on A = Global
 impl<T> UniqueRc<T> {
-    /// Creates a new `UniqueRc`
+    /// Creates a new `UniqueRc`.
     ///
     /// Weak references to this `UniqueRc` can be created with [`UniqueRc::downgrade`]. Upgrading
     /// these weak references will fail before the `UniqueRc` has been converted into an [`Rc`].
@@ -3571,34 +3616,36 @@ impl<T> UniqueRc<T> {
     #[cfg(not(no_global_oom_handling))]
     #[unstable(feature = "unique_rc_arc", issue = "112566")]
     pub fn new(value: T) -> Self {
-        Self {
-            ptr: Box::leak(Box::new(RcBox {
+        Self::new_in(value, Global)
+    }
+}
+
+impl<T, A: Allocator> UniqueRc<T, A> {
+    /// Creates a new `UniqueRc` in the provided allocator.
+    ///
+    /// Weak references to this `UniqueRc` can be created with [`UniqueRc::downgrade`]. Upgrading
+    /// these weak references will fail before the `UniqueRc` has been converted into an [`Rc`].
+    /// After converting the `UniqueRc` into an [`Rc`], any weak references created beforehand will
+    /// point to the new [`Rc`].
+    #[cfg(not(no_global_oom_handling))]
+    #[unstable(feature = "unique_rc_arc", issue = "112566")]
+    pub fn new_in(value: T, alloc: A) -> Self {
+        let (ptr, alloc) = Box::into_unique(Box::new_in(
+            RcBox {
                 strong: Cell::new(0),
                 // keep one weak reference so if all the weak pointers that are created are dropped
                 // the UniqueRc still stays valid.
                 weak: Cell::new(1),
                 value,
-            }))
-            .into(),
-            phantom: PhantomData,
-        }
-    }
-
-    /// Creates a new weak reference to the `UniqueRc`
-    ///
-    /// Attempting to upgrade this weak reference will fail before the `UniqueRc` has been converted
-    /// to a [`Rc`] using [`UniqueRc::into_rc`].
-    #[unstable(feature = "unique_rc_arc", issue = "112566")]
-    pub fn downgrade(this: &Self) -> Weak<T> {
-        // SAFETY: This pointer was allocated at creation time and we guarantee that we only have
-        // one strong reference before converting to a regular Rc.
-        unsafe {
-            this.ptr.as_ref().inc_weak();
-        }
-        Weak { ptr: this.ptr, alloc: Global }
+            },
+            alloc,
+        ));
+        Self { ptr: ptr.into(), phantom: PhantomData, alloc }
     }
+}
 
-    /// Converts the `UniqueRc` into a regular [`Rc`]
+impl<T: ?Sized, A: Allocator> UniqueRc<T, A> {
+    /// Converts the `UniqueRc` into a regular [`Rc`].
     ///
     /// This consumes the `UniqueRc` and returns a regular [`Rc`] that contains the `value` that
     /// is passed to `into_rc`.
@@ -3606,19 +3653,41 @@ impl<T> UniqueRc<T> {
     /// Any weak references created before this method is called can now be upgraded to strong
     /// references.
     #[unstable(feature = "unique_rc_arc", issue = "112566")]
-    pub fn into_rc(this: Self) -> Rc<T> {
+    pub fn into_rc(this: Self) -> Rc<T, A> {
         let mut this = ManuallyDrop::new(this);
+
+        // Move the allocator out.
+        // SAFETY: `this.alloc` will not be accessed again, nor dropped because it is in
+        // a `ManuallyDrop`.
+        let alloc: A = unsafe { ptr::read(&this.alloc) };
+
         // SAFETY: This pointer was allocated at creation time so we know it is valid.
         unsafe {
             // Convert our weak reference into a strong reference
             this.ptr.as_mut().strong.set(1);
-            Rc::from_inner(this.ptr)
+            Rc::from_inner_in(this.ptr, alloc)
+        }
+    }
+}
+
+impl<T: ?Sized, A: Allocator + Clone> UniqueRc<T, A> {
+    /// Creates a new weak reference to the `UniqueRc`.
+    ///
+    /// Attempting to upgrade this weak reference will fail before the `UniqueRc` has been converted
+    /// to a [`Rc`] using [`UniqueRc::into_rc`].
+    #[unstable(feature = "unique_rc_arc", issue = "112566")]
+    pub fn downgrade(this: &Self) -> Weak<T, A> {
+        // SAFETY: This pointer was allocated at creation time and we guarantee that we only have
+        // one strong reference before converting to a regular Rc.
+        unsafe {
+            this.ptr.as_ref().inc_weak();
         }
+        Weak { ptr: this.ptr, alloc: this.alloc.clone() }
     }
 }
 
 #[unstable(feature = "unique_rc_arc", issue = "112566")]
-impl<T> Deref for UniqueRc<T> {
+impl<T: ?Sized, A: Allocator> Deref for UniqueRc<T, A> {
     type Target = T;
 
     fn deref(&self) -> &T {
@@ -3628,7 +3697,7 @@ impl<T> Deref for UniqueRc<T> {
 }
 
 #[unstable(feature = "unique_rc_arc", issue = "112566")]
-impl<T> DerefMut for UniqueRc<T> {
+impl<T: ?Sized, A: Allocator> DerefMut for UniqueRc<T, A> {
     fn deref_mut(&mut self) -> &mut T {
         // SAFETY: This pointer was allocated at creation time so we know it is valid. We know we
         // have unique ownership and therefore it's safe to make a mutable reference because
@@ -3638,7 +3707,7 @@ impl<T> DerefMut for UniqueRc<T> {
 }
 
 #[unstable(feature = "unique_rc_arc", issue = "112566")]
-unsafe impl<#[may_dangle] T> Drop for UniqueRc<T> {
+unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for UniqueRc<T, A> {
     fn drop(&mut self) {
         unsafe {
             // destroy the contained object
@@ -3648,8 +3717,72 @@ unsafe impl<#[may_dangle] T> Drop for UniqueRc<T> {
             self.ptr.as_ref().dec_weak();
 
             if self.ptr.as_ref().weak() == 0 {
-                Global.deallocate(self.ptr.cast(), Layout::for_value_raw(self.ptr.as_ptr()));
+                self.alloc.deallocate(self.ptr.cast(), Layout::for_value_raw(self.ptr.as_ptr()));
             }
         }
     }
 }
+
+/// A unique owning pointer to a [`RcBox`] **that does not imply the contents are initialized,**
+/// but will deallocate it (without dropping the value) when dropped.
+///
+/// This is a helper for [`Rc::make_mut()`] to ensure correct cleanup on panic.
+/// It is nearly a duplicate of `UniqueRc<MaybeUninit<T>, A>` except that it allows `T: !Sized`,
+/// which `MaybeUninit` does not.
+#[cfg(not(no_global_oom_handling))]
+struct UniqueRcUninit<T: ?Sized, A: Allocator> {
+    ptr: NonNull<RcBox<T>>,
+    layout_for_value: Layout,
+    alloc: Option<A>,
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: ?Sized, A: Allocator> UniqueRcUninit<T, A> {
+    /// Allocate a RcBox with layout suitable to contain `for_value` or a clone of it.
+    fn new(for_value: &T, alloc: A) -> UniqueRcUninit<T, A> {
+        let layout = Layout::for_value(for_value);
+        let ptr = unsafe {
+            Rc::allocate_for_layout(
+                layout,
+                |layout_for_rcbox| alloc.allocate(layout_for_rcbox),
+                |mem| mem.with_metadata_of(ptr::from_ref(for_value) as *const RcBox<T>),
+            )
+        };
+        Self { ptr: NonNull::new(ptr).unwrap(), layout_for_value: layout, alloc: Some(alloc) }
+    }
+
+    /// Returns the pointer to be written into to initialize the [`Rc`].
+    fn data_ptr(&mut self) -> *mut T {
+        let offset = data_offset_align(self.layout_for_value.align());
+        unsafe { self.ptr.as_ptr().byte_add(offset) as *mut T }
+    }
+
+    /// Upgrade this into a normal [`Rc`].
+    ///
+    /// # Safety
+    ///
+    /// The data must have been initialized (by writing to [`Self::data_ptr()`]).
+    unsafe fn into_rc(mut self) -> Rc<T, A> {
+        let ptr = self.ptr;
+        let alloc = self.alloc.take().unwrap();
+        mem::forget(self);
+        // SAFETY: The pointer is valid as per `UniqueRcUninit::new`, and the caller is responsible
+        // for having initialized the data.
+        unsafe { Rc::from_ptr_in(ptr.as_ptr(), alloc) }
+    }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: ?Sized, A: Allocator> Drop for UniqueRcUninit<T, A> {
+    fn drop(&mut self) {
+        // SAFETY:
+        // * new() produced a pointer safe to deallocate.
+        // * We own the pointer unless into_rc() was called, which forgets us.
+        unsafe {
+            self.alloc
+                .take()
+                .unwrap()
+                .deallocate(self.ptr.cast(), rcbox_layout_for_value_layout(self.layout_for_value));
+        }
+    }
+}
diff --git a/alloc/src/rc/tests.rs b/alloc/src/rc/tests.rs
index c8a40603d9db2..5e2e4beb94a2b 100644
--- a/alloc/src/rc/tests.rs
+++ b/alloc/src/rc/tests.rs
@@ -316,6 +316,24 @@ fn test_cowrc_clone_weak() {
     assert!(cow1_weak.upgrade().is_none());
 }
 
+/// This is similar to the doc-test for `Rc::make_mut()`, but on an unsized type (slice).
+#[test]
+fn test_cowrc_unsized() {
+    use std::rc::Rc;
+
+    let mut data: Rc<[i32]> = Rc::new([10, 20, 30]);
+
+    Rc::make_mut(&mut data)[0] += 1; // Won't clone anything
+    let mut other_data = Rc::clone(&data); // Won't clone inner data
+    Rc::make_mut(&mut data)[1] += 1; // Clones inner data
+    Rc::make_mut(&mut data)[2] += 1; // Won't clone anything
+    Rc::make_mut(&mut other_data)[0] *= 10; // Won't clone anything
+
+    // Now `data` and `other_data` point to different allocations.
+    assert_eq!(*data, [11, 21, 31]);
+    assert_eq!(*other_data, [110, 20, 30]);
+}
+
 #[test]
 fn test_show() {
     let foo = Rc::new(75);
@@ -606,6 +624,23 @@ fn test_unique_rc_drops_contents() {
     assert!(dropped);
 }
 
+/// Exercise the non-default allocator usage.
+#[test]
+fn test_unique_rc_with_alloc_drops_contents() {
+    let mut dropped = false;
+    struct DropMe<'a>(&'a mut bool);
+    impl Drop for DropMe<'_> {
+        fn drop(&mut self) {
+            *self.0 = true;
+        }
+    }
+    {
+        let rc = UniqueRc::new_in(DropMe(&mut dropped), std::alloc::System);
+        drop(rc);
+    }
+    assert!(dropped);
+}
+
 #[test]
 fn test_unique_rc_weak_clone_holding_ref() {
     let mut v = UniqueRc::new(0u8);
@@ -614,3 +649,12 @@ fn test_unique_rc_weak_clone_holding_ref() {
     let _ = w.clone(); // touch weak count
     *r = 123;
 }
+
+#[test]
+fn test_unique_rc_unsizing_coercion() {
+    let mut rc: UniqueRc<[u8]> = UniqueRc::new([0u8; 3]);
+    assert_eq!(rc.len(), 3);
+    rc[0] = 123;
+    let rc: Rc<[u8]> = UniqueRc::into_rc(rc);
+    assert_eq!(*rc, [123, 0, 0]);
+}
diff --git a/alloc/src/slice.rs b/alloc/src/slice.rs
index ebe6f7e7caa9b..c7960b3fb49c3 100644
--- a/alloc/src/slice.rs
+++ b/alloc/src/slice.rs
@@ -16,7 +16,7 @@ use core::borrow::{Borrow, BorrowMut};
 #[cfg(not(no_global_oom_handling))]
 use core::cmp::Ordering::{self, Less};
 #[cfg(not(no_global_oom_handling))]
-use core::mem::{self, SizedTypeProperties};
+use core::mem::{self, MaybeUninit};
 #[cfg(not(no_global_oom_handling))]
 use core::ptr;
 #[cfg(not(no_global_oom_handling))]
@@ -24,7 +24,7 @@ use core::slice::sort;
 
 use crate::alloc::Allocator;
 #[cfg(not(no_global_oom_handling))]
-use crate::alloc::{self, Global};
+use crate::alloc::Global;
 #[cfg(not(no_global_oom_handling))]
 use crate::borrow::ToOwned;
 use crate::boxed::Box;
@@ -174,23 +174,32 @@ pub(crate) mod hack {
 
 #[cfg(not(test))]
 impl<T> [T] {
-    /// Sorts the slice.
+    /// Sorts the slice, preserving initial order of equal elements.
     ///
-    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
+    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*))
+    /// worst-case.
+    ///
+    /// If `T: Ord` does not implement a total order the resulting order is unspecified. All
+    /// original elements will remain in the slice and any possible modifications via interior
+    /// mutability are observed in the input. Same is true if `T: Ord` panics.
     ///
     /// When applicable, unstable sorting is preferred because it is generally faster than stable
-    /// sorting and it doesn't allocate auxiliary memory.
-    /// See [`sort_unstable`](slice::sort_unstable).
+    /// sorting and it doesn't allocate auxiliary memory. See
+    /// [`sort_unstable`](slice::sort_unstable). The exception are partially sorted slices, which
+    /// may be better served with `slice::sort`.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is an adaptive, iterative merge sort inspired by
-    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
-    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
-    /// two or more sorted sequences concatenated one after another.
+    /// The current implementation is based on [driftsort] by Orson Peters and Lukas Bergdoll, which
+    /// combines the fast average case of quicksort with the fast worst case and partial run
+    /// detection of mergesort, achieving linear time on fully sorted and reversed inputs. On inputs
+    /// with k distinct elements, the expected time to sort the data is *O*(*n* \* log(*k*)).
+    ///
+    /// The auxiliary memory allocation behavior depends on the input length. Short slices are
+    /// handled without allocation, medium sized slices allocate `self.len()` and beyond that it
+    /// clamps at `self.len() / 2`.
     ///
-    /// Also, it allocates temporary storage half the size of `self`, but for short slices a
-    /// non-allocating insertion sort is used instead.
+    /// If `T: Ord` does not implement a total order, the implementation may panic.
     ///
     /// # Examples
     ///
@@ -200,6 +209,8 @@ impl<T> [T] {
     /// v.sort();
     /// assert!(v == [-5, -3, 1, 2, 4]);
     /// ```
+    ///
+    /// [driftsort]: https://github.com/Voultapher/driftsort
     #[cfg(not(no_global_oom_handling))]
     #[rustc_allow_incoherent_impl]
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -211,13 +222,18 @@ impl<T> [T] {
         stable_sort(self, T::lt);
     }
 
-    /// Sorts the slice with a comparator function.
+    /// Sorts the slice with a comparator function, preserving initial order of equal elements.
     ///
-    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
+    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*))
+    /// worst-case.
     ///
-    /// The comparator function must define a total ordering for the elements in the slice. If
-    /// the ordering is not total, the order of the elements is unspecified. An order is a
-    /// total order if it is (for all `a`, `b` and `c`):
+    /// The comparator function should define a total ordering for the elements in the slice. If the
+    /// ordering is not total, the order of the elements is unspecified.
+    ///
+    /// If the comparator function does not implement a total order the resulting order is
+    /// unspecified. All original elements will remain in the slice and any possible modifications
+    /// via interior mutability are observed in the input. Same is true if the comparator function
+    /// panics. A total order (for all `a`, `b` and `c`):
     ///
     /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
     /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
@@ -227,23 +243,22 @@ impl<T> [T] {
     ///
     /// ```
     /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
-    /// floats.sort_by(|a, b| a.partial_cmp(b).unwrap());
+    /// floats.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
     /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
     /// ```
     ///
-    /// When applicable, unstable sorting is preferred because it is generally faster than stable
-    /// sorting and it doesn't allocate auxiliary memory.
-    /// See [`sort_unstable_by`](slice::sort_unstable_by).
-    ///
     /// # Current implementation
     ///
-    /// The current algorithm is an adaptive, iterative merge sort inspired by
-    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
-    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
-    /// two or more sorted sequences concatenated one after another.
+    /// The current implementation is based on [driftsort] by Orson Peters and Lukas Bergdoll, which
+    /// combines the fast average case of quicksort with the fast worst case and partial run
+    /// detection of mergesort, achieving linear time on fully sorted and reversed inputs. On inputs
+    /// with k distinct elements, the expected time to sort the data is *O*(*n* \* log(*k*)).
+    ///
+    /// The auxiliary memory allocation behavior depends on the input length. Short slices are
+    /// handled without allocation, medium sized slices allocate `self.len()` and beyond that it
+    /// clamps at `self.len() / 2`.
     ///
-    /// Also, it allocates temporary storage half the size of `self`, but for short slices a
-    /// non-allocating insertion sort is used instead.
+    /// If `T: Ord` does not implement a total order, the implementation may panic.
     ///
     /// # Examples
     ///
@@ -256,6 +271,8 @@ impl<T> [T] {
     /// v.sort_by(|a, b| b.cmp(a));
     /// assert!(v == [5, 4, 3, 2, 1]);
     /// ```
+    ///
+    /// [driftsort]: https://github.com/Voultapher/driftsort
     #[cfg(not(no_global_oom_handling))]
     #[rustc_allow_incoherent_impl]
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -267,28 +284,27 @@ impl<T> [T] {
         stable_sort(self, |a, b| compare(a, b) == Less);
     }
 
-    /// Sorts the slice with a key extraction function.
+    /// Sorts the slice with a key extraction function, preserving initial order of equal elements.
     ///
     /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
     /// worst-case, where the key function is *O*(*m*).
     ///
-    /// For expensive key functions (e.g. functions that are not simple property accesses or
-    /// basic operations), [`sort_by_cached_key`](slice::sort_by_cached_key) is likely to be
-    /// significantly faster, as it does not recompute element keys.
-    ///
-    /// When applicable, unstable sorting is preferred because it is generally faster than stable
-    /// sorting and it doesn't allocate auxiliary memory.
-    /// See [`sort_unstable_by_key`](slice::sort_unstable_by_key).
+    /// If `K: Ord` does not implement a total order the resulting order is unspecified.
+    /// All original elements will remain in the slice and any possible modifications via interior
+    /// mutability are observed in the input. Same is true if `K: Ord` panics.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is an adaptive, iterative merge sort inspired by
-    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
-    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
-    /// two or more sorted sequences concatenated one after another.
+    /// The current implementation is based on [driftsort] by Orson Peters and Lukas Bergdoll, which
+    /// combines the fast average case of quicksort with the fast worst case and partial run
+    /// detection of mergesort, achieving linear time on fully sorted and reversed inputs. On inputs
+    /// with k distinct elements, the expected time to sort the data is *O*(*n* \* log(*k*)).
+    ///
+    /// The auxiliary memory allocation behavior depends on the input length. Short slices are
+    /// handled without allocation, medium sized slices allocate `self.len()` and beyond that it
+    /// clamps at `self.len() / 2`.
     ///
-    /// Also, it allocates temporary storage half the size of `self`, but for short slices a
-    /// non-allocating insertion sort is used instead.
+    /// If `K: Ord` does not implement a total order, the implementation may panic.
     ///
     /// # Examples
     ///
@@ -298,6 +314,8 @@ impl<T> [T] {
     /// v.sort_by_key(|k| k.abs());
     /// assert!(v == [1, 2, -3, 4, -5]);
     /// ```
+    ///
+    /// [driftsort]: https://github.com/Voultapher/driftsort
     #[cfg(not(no_global_oom_handling))]
     #[rustc_allow_incoherent_impl]
     #[stable(feature = "slice_sort_by_key", since = "1.7.0")]
@@ -310,27 +328,30 @@ impl<T> [T] {
         stable_sort(self, |a, b| f(a).lt(&f(b)));
     }
 
-    /// Sorts the slice with a key extraction function.
+    /// Sorts the slice with a key extraction function, preserving initial order of equal elements.
     ///
-    /// During sorting, the key function is called at most once per element, by using
-    /// temporary storage to remember the results of key evaluation.
-    /// The order of calls to the key function is unspecified and may change in future versions
-    /// of the standard library.
+    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \*
+    /// log(*n*)) worst-case, where the key function is *O*(*m*).
     ///
-    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
-    /// worst-case, where the key function is *O*(*m*).
+    /// During sorting, the key function is called at most once per element, by using temporary
+    /// storage to remember the results of key evaluation. The order of calls to the key function is
+    /// unspecified and may change in future versions of the standard library.
     ///
-    /// For simple key functions (e.g., functions that are property accesses or
-    /// basic operations), [`sort_by_key`](slice::sort_by_key) is likely to be
-    /// faster.
+    /// If `K: Ord` does not implement a total order the resulting order is unspecified.
+    /// All original elements will remain in the slice and any possible modifications via interior
+    /// mutability are observed in the input. Same is true if `K: Ord` panics.
+    ///
+    /// For simple key functions (e.g., functions that are property accesses or basic operations),
+    /// [`sort_by_key`](slice::sort_by_key) is likely to be faster.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
-    /// which combines the fast average case of randomized quicksort with the fast worst case of
-    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
-    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
-    /// deterministic behavior.
+    /// The current implementation is based on [instruction-parallel-network sort][ipnsort] by Lukas
+    /// Bergdoll, which combines the fast average case of randomized quicksort with the fast worst
+    /// case of heapsort, while achieving linear time on fully sorted and reversed inputs. And
+    /// *O*(*k* \* log(*n*)) where *k* is the number of distinct elements in the input. It leverages
+    /// superscalar out-of-order execution capabilities commonly found in CPUs, to efficiently
+    /// perform the operation.
     ///
     /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
     /// length of the slice.
@@ -344,7 +365,7 @@ impl<T> [T] {
     /// assert!(v == [-3, -5, 2, 32, 4]);
     /// ```
     ///
-    /// [pdqsort]: https://github.com/orlp/pdqsort
+    /// [ipnsort]: https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort
     #[cfg(not(no_global_oom_handling))]
     #[rustc_allow_incoherent_impl]
     #[stable(feature = "slice_sort_by_cached_key", since = "1.34.0")]
@@ -361,7 +382,7 @@ impl<T> [T] {
                     $slice.iter().map($f).enumerate().map(|(i, k)| (k, i as $t)).collect();
                 // The elements of `indices` are unique, as they are indexed, so any sort will be
                 // stable with respect to the original slice. We use `sort_unstable` here because
-                // it requires less memory allocation.
+                // it requires no memory allocation.
                 indices.sort_unstable();
                 for i in 0..$slice.len() {
                     let mut index = indices[i].1;
@@ -374,24 +395,24 @@ impl<T> [T] {
             }};
         }
 
-        let sz_u8 = mem::size_of::<(K, u8)>();
-        let sz_u16 = mem::size_of::<(K, u16)>();
-        let sz_u32 = mem::size_of::<(K, u32)>();
-        let sz_usize = mem::size_of::<(K, usize)>();
-
         let len = self.len();
         if len < 2 {
             return;
         }
-        if sz_u8 < sz_u16 && len <= (u8::MAX as usize) {
-            return sort_by_key!(u8, self, f);
-        }
-        if sz_u16 < sz_u32 && len <= (u16::MAX as usize) {
-            return sort_by_key!(u16, self, f);
-        }
-        if sz_u32 < sz_usize && len <= (u32::MAX as usize) {
+
+        // Avoids binary-size usage in cases where the alignment doesn't work out to make this
+        // beneficial or on 32-bit platforms.
+        let is_using_u32_as_idx_type_helpful =
+            const { mem::size_of::<(K, u32)>() < mem::size_of::<(K, usize)>() };
+
+        // It's possible to instantiate this for u8 and u16 but, doing so is very wasteful in terms
+        // of compile-times and binary-size, the peak saved heap memory for u16 is (u8 + u16) -> 4
+        // bytes * u16::MAX vs (u8 + u32) -> 8 bytes * u16::MAX, the saved heap memory is at peak
+        // ~262KB.
+        if is_using_u32_as_idx_type_helpful && len <= (u32::MAX as usize) {
             return sort_by_key!(u32, self, f);
         }
+
         sort_by_key!(usize, self, f)
     }
 
@@ -843,46 +864,17 @@ fn stable_sort<T, F>(v: &mut [T], mut is_less: F)
 where
     F: FnMut(&T, &T) -> bool,
 {
-    if T::IS_ZST {
-        // Sorting has no meaningful behavior on zero-sized types. Do nothing.
-        return;
-    }
-
-    let elem_alloc_fn = |len: usize| -> *mut T {
-        // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
-        // v.len(). Alloc in general will only be used as 'shadow-region' to store temporary swap
-        // elements.
-        unsafe { alloc::alloc(alloc::Layout::array::<T>(len).unwrap_unchecked()) as *mut T }
-    };
-
-    let elem_dealloc_fn = |buf_ptr: *mut T, len: usize| {
-        // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
-        // v.len(). The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
-        // len.
-        unsafe {
-            alloc::dealloc(buf_ptr as *mut u8, alloc::Layout::array::<T>(len).unwrap_unchecked());
-        }
-    };
-
-    let run_alloc_fn = |len: usize| -> *mut sort::TimSortRun {
-        // SAFETY: Creating the layout is safe as long as merge_sort never calls this with an
-        // obscene length or 0.
-        unsafe {
-            alloc::alloc(alloc::Layout::array::<sort::TimSortRun>(len).unwrap_unchecked())
-                as *mut sort::TimSortRun
-        }
-    };
+    sort::stable::sort::<T, F, Vec<T>>(v, &mut is_less);
+}
 
-    let run_dealloc_fn = |buf_ptr: *mut sort::TimSortRun, len: usize| {
-        // SAFETY: The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
-        // len.
-        unsafe {
-            alloc::dealloc(
-                buf_ptr as *mut u8,
-                alloc::Layout::array::<sort::TimSortRun>(len).unwrap_unchecked(),
-            );
-        }
-    };
+#[cfg(not(no_global_oom_handling))]
+#[unstable(issue = "none", feature = "std_internals")]
+impl<T> sort::stable::BufGuard<T> for Vec<T> {
+    fn with_capacity(capacity: usize) -> Self {
+        Vec::with_capacity(capacity)
+    }
 
-    sort::merge_sort(v, &mut is_less, elem_alloc_fn, elem_dealloc_fn, run_alloc_fn, run_dealloc_fn);
+    fn as_uninit_slice_mut(&mut self) -> &mut [MaybeUninit<T>] {
+        self.spare_capacity_mut()
+    }
 }
diff --git a/alloc/src/slice/tests.rs b/alloc/src/slice/tests.rs
index 54bc4e77b16f0..0b972a13898eb 100644
--- a/alloc/src/slice/tests.rs
+++ b/alloc/src/slice/tests.rs
@@ -34,7 +34,7 @@ macro_rules! do_test {
             }
 
             let v = $input.to_owned();
-            let _ = std::panic::catch_unwind(move || {
+            let _ = panic::catch_unwind(move || {
                 let mut v = v;
                 let mut panic_countdown = panic_countdown;
                 v.$func(|a, b| {
@@ -240,6 +240,7 @@ fn panic_safe() {
 
 #[test]
 #[cfg_attr(miri, ignore)] // Miri is too slow
+#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
 fn test_sort() {
     let mut rng = test_rng();
 
@@ -294,15 +295,20 @@ fn test_sort() {
         }
     }
 
-    // Sort using a completely random comparison function.
-    // This will reorder the elements *somehow*, but won't panic.
-    let mut v = [0; 500];
-    for i in 0..v.len() {
+    const ORD_VIOLATION_MAX_LEN: usize = 500;
+    let mut v = [0; ORD_VIOLATION_MAX_LEN];
+    for i in 0..ORD_VIOLATION_MAX_LEN {
         v[i] = i as i32;
     }
-    v.sort_by(|_, _| *[Less, Equal, Greater].choose(&mut rng).unwrap());
+
+    // Sort using a completely random comparison function. This will reorder the elements *somehow*,
+    // it may panic but the original elements must still be present.
+    let _ = panic::catch_unwind(move || {
+        v.sort_by(|_, _| *[Less, Equal, Greater].choose(&mut rng).unwrap());
+    });
+
     v.sort();
-    for i in 0..v.len() {
+    for i in 0..ORD_VIOLATION_MAX_LEN {
         assert_eq!(v[i], i as i32);
     }
 
diff --git a/alloc/src/str.rs b/alloc/src/str.rs
index 3e23612d0c13c..3bb808a6c73ab 100644
--- a/alloc/src/str.rs
+++ b/alloc/src/str.rs
@@ -206,15 +206,16 @@ impl BorrowMut<str> for String {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl ToOwned for str {
     type Owned = String;
+
     #[inline]
     fn to_owned(&self) -> String {
         unsafe { String::from_utf8_unchecked(self.as_bytes().to_owned()) }
     }
 
+    #[inline]
     fn clone_into(&self, target: &mut String) {
-        let mut b = mem::take(target).into_bytes();
-        self.as_bytes().clone_into(&mut b);
-        *target = unsafe { String::from_utf8_unchecked(b) }
+        target.clear();
+        target.push_str(self);
     }
 }
 
diff --git a/alloc/src/string.rs b/alloc/src/string.rs
index 36078da7c35a6..07ffd3e151914 100644
--- a/alloc/src/string.rs
+++ b/alloc/src/string.rs
@@ -1984,6 +1984,9 @@ impl String {
     /// let x = String::from("bucket");
     /// let static_ref: &'static mut str = x.leak();
     /// assert_eq!(static_ref, "bucket");
+    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// # drop(unsafe { Box::from_raw(static_ref) });
     /// ```
     #[stable(feature = "string_leak", since = "1.72.0")]
     #[inline]
diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs
index 7dcaa59dcd1c7..a905a1e6b7e62 100644
--- a/alloc/src/sync.rs
+++ b/alloc/src/sync.rs
@@ -10,6 +10,8 @@
 
 use core::any::Any;
 use core::borrow;
+#[cfg(not(no_global_oom_handling))]
+use core::clone::CloneToUninit;
 use core::cmp::Ordering;
 use core::fmt;
 use core::hash::{Hash, Hasher};
@@ -18,8 +20,6 @@ use core::intrinsics::abort;
 #[cfg(not(no_global_oom_handling))]
 use core::iter;
 use core::marker::{PhantomData, Unsize};
-#[cfg(not(no_global_oom_handling))]
-use core::mem::size_of_val;
 use core::mem::{self, align_of_val_raw};
 use core::ops::{CoerceUnsized, Deref, DerefPure, DispatchFromDyn, Receiver};
 use core::panic::{RefUnwindSafe, UnwindSafe};
@@ -32,8 +32,6 @@ use core::sync::atomic::Ordering::{Acquire, Relaxed, Release};
 
 #[cfg(not(no_global_oom_handling))]
 use crate::alloc::handle_alloc_error;
-#[cfg(not(no_global_oom_handling))]
-use crate::alloc::WriteCloneIntoRaw;
 use crate::alloc::{AllocError, Allocator, Global, Layout};
 use crate::borrow::{Cow, ToOwned};
 use crate::boxed::Box;
@@ -199,11 +197,7 @@ macro_rules! acquire {
 ///
 /// Sharing some immutable data between threads:
 ///
-// Note that we **do not** run these tests here. The windows builders get super
-// unhappy if a thread outlives the main thread and then exits at the same time
-// (something deadlocks) so we just avoid this entirely by not running these
-// tests.
-/// ```no_run
+/// ```
 /// use std::sync::Arc;
 /// use std::thread;
 ///
@@ -222,7 +216,7 @@ macro_rules! acquire {
 ///
 /// [`AtomicUsize`]: core::sync::atomic::AtomicUsize "sync::atomic::AtomicUsize"
 ///
-/// ```no_run
+/// ```
 /// use std::sync::Arc;
 /// use std::sync::atomic::{AtomicUsize, Ordering};
 /// use std::thread;
@@ -683,16 +677,6 @@ impl<T> Arc<T> {
 }
 
 impl<T, A: Allocator> Arc<T, A> {
-    /// Returns a reference to the underlying allocator.
-    ///
-    /// Note: this is an associated function, which means that you have
-    /// to call it as `Arc::allocator(&a)` instead of `a.allocator()`. This
-    /// is so that there is no conflict with a method on the inner type.
-    #[inline]
-    #[unstable(feature = "allocator_api", issue = "32838")]
-    pub fn allocator(this: &Self) -> &A {
-        &this.alloc
-    }
     /// Constructs a new `Arc<T>` in the provided allocator.
     ///
     /// # Examples
@@ -942,15 +926,18 @@ impl<T, A: Allocator> Arc<T, A> {
     /// This will succeed even if there are outstanding weak references.
     ///
     /// It is strongly recommended to use [`Arc::into_inner`] instead if you don't
-    /// want to keep the `Arc` in the [`Err`] case.
-    /// Immediately dropping the [`Err`] payload, like in the expression
-    /// `Arc::try_unwrap(this).ok()`, can still cause the strong count to
-    /// drop to zero and the inner value of the `Arc` to be dropped:
-    /// For instance if two threads each execute this expression in parallel, then
-    /// there is a race condition. The threads could first both check whether they
-    /// have the last clone of their `Arc` via `Arc::try_unwrap`, and then
-    /// both drop their `Arc` in the call to [`ok`][`Result::ok`],
-    /// taking the strong count from two down to zero.
+    /// keep the `Arc` in the [`Err`] case.
+    /// Immediately dropping the [`Err`]-value, as the expression
+    /// `Arc::try_unwrap(this).ok()` does, can cause the strong count to
+    /// drop to zero and the inner value of the `Arc` to be dropped.
+    /// For instance, if two threads execute such an expression in parallel,
+    /// there is a race condition without the possibility of unsafety:
+    /// The threads could first both check whether they own the last instance
+    /// in `Arc::try_unwrap`, determine that they both do not, and then both
+    /// discard and drop their instance in the call to [`ok`][`Result::ok`].
+    /// In this scenario, the value inside the `Arc` is safely destroyed
+    /// by exactly one of the threads, but neither thread will ever be able
+    /// to use the value.
     ///
     /// # Examples
     ///
@@ -1427,6 +1414,8 @@ impl<T: ?Sized> Arc<T> {
     ///     // the `Arc` between threads.
     ///     let five = Arc::from_raw(ptr);
     ///     assert_eq!(2, Arc::strong_count(&five));
+    /// #   // Prevent leaks for Miri.
+    /// #   Arc::decrement_strong_count(ptr);
     /// }
     /// ```
     #[inline]
@@ -1473,6 +1462,17 @@ impl<T: ?Sized> Arc<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Arc<T, A> {
+    /// Returns a reference to the underlying allocator.
+    ///
+    /// Note: this is an associated function, which means that you have
+    /// to call it as `Arc::allocator(&a)` instead of `a.allocator()`. This
+    /// is so that there is no conflict with a method on the inner type.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(this: &Self) -> &A {
+        &this.alloc
+    }
+
     /// Consumes the `Arc`, returning the wrapped pointer.
     ///
     /// To avoid a memory leak the pointer must be converted back to an `Arc` using
@@ -1486,6 +1486,8 @@ impl<T: ?Sized, A: Allocator> Arc<T, A> {
     /// let x = Arc::new("hello".to_owned());
     /// let x_ptr = Arc::into_raw(x);
     /// assert_eq!(unsafe { &*x_ptr }, "hello");
+    /// # // Prevent leaks for Miri.
+    /// # drop(unsafe { Arc::from_raw(x_ptr) });
     /// ```
     #[must_use = "losing the pointer will leak memory"]
     #[stable(feature = "rc_raw", since = "1.17.0")]
@@ -1768,6 +1770,8 @@ impl<T: ?Sized, A: Allocator> Arc<T, A> {
     ///     // the `Arc` between threads.
     ///     let five = Arc::from_raw_in(ptr, System);
     ///     assert_eq!(2, Arc::strong_count(&five));
+    /// #   // Prevent leaks for Miri.
+    /// #   Arc::decrement_strong_count_in(ptr, System);
     /// }
     /// ```
     #[inline]
@@ -2149,7 +2153,8 @@ unsafe impl<T: ?Sized, A: Allocator> DerefPure for Arc<T, A> {}
 #[unstable(feature = "receiver_trait", issue = "none")]
 impl<T: ?Sized> Receiver for Arc<T> {}
 
-impl<T: Clone, A: Allocator + Clone> Arc<T, A> {
+#[cfg(not(no_global_oom_handling))]
+impl<T: ?Sized + CloneToUninit, A: Allocator + Clone> Arc<T, A> {
     /// Makes a mutable reference into the given `Arc`.
     ///
     /// If there are other `Arc` pointers to the same allocation, then `make_mut` will
@@ -2200,10 +2205,11 @@ impl<T: Clone, A: Allocator + Clone> Arc<T, A> {
     /// assert!(76 == *data);
     /// assert!(weak.upgrade().is_none());
     /// ```
-    #[cfg(not(no_global_oom_handling))]
     #[inline]
     #[stable(feature = "arc_unique", since = "1.4.0")]
     pub fn make_mut(this: &mut Self) -> &mut T {
+        let size_of_val = mem::size_of_val::<T>(&**this);
+
         // Note that we hold both a strong reference and a weak reference.
         // Thus, releasing our strong reference only will not, by itself, cause
         // the memory to be deallocated.
@@ -2214,13 +2220,19 @@ impl<T: Clone, A: Allocator + Clone> Arc<T, A> {
         // deallocated.
         if this.inner().strong.compare_exchange(1, 0, Acquire, Relaxed).is_err() {
             // Another strong pointer exists, so we must clone.
-            // Pre-allocate memory to allow writing the cloned value directly.
-            let mut arc = Self::new_uninit_in(this.alloc.clone());
-            unsafe {
-                let data = Arc::get_mut_unchecked(&mut arc);
-                (**this).write_clone_into_raw(data.as_mut_ptr());
-                *this = arc.assume_init();
-            }
+
+            let this_data_ref: &T = &**this;
+            // `in_progress` drops the allocation if we panic before finishing initializing it.
+            let mut in_progress: UniqueArcUninit<T, A> =
+                UniqueArcUninit::new(this_data_ref, this.alloc.clone());
+
+            let initialized_clone = unsafe {
+                // Clone. If the clone panics, `in_progress` will be dropped and clean up.
+                this_data_ref.clone_to_uninit(in_progress.data_ptr());
+                // Cast type of pointer, now that it is initialized.
+                in_progress.into_arc()
+            };
+            *this = initialized_clone;
         } else if this.inner().weak.load(Relaxed) != 1 {
             // Relaxed suffices in the above because this is fundamentally an
             // optimization: we are always racing with weak pointers being
@@ -2239,11 +2251,22 @@ impl<T: Clone, A: Allocator + Clone> Arc<T, A> {
             let _weak = Weak { ptr: this.ptr, alloc: this.alloc.clone() };
 
             // Can just steal the data, all that's left is Weaks
-            let mut arc = Self::new_uninit_in(this.alloc.clone());
+            //
+            // We don't need panic-protection like the above branch does, but we might as well
+            // use the same mechanism.
+            let mut in_progress: UniqueArcUninit<T, A> =
+                UniqueArcUninit::new(&**this, this.alloc.clone());
             unsafe {
-                let data = Arc::get_mut_unchecked(&mut arc);
-                data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
-                ptr::write(this, arc.assume_init());
+                // Initialize `in_progress` with move of **this.
+                // We have to express this in terms of bytes because `T: ?Sized`; there is no
+                // operation that just copies a value based on its `size_of_val()`.
+                ptr::copy_nonoverlapping(
+                    ptr::from_ref(&**this).cast::<u8>(),
+                    in_progress.data_ptr().cast::<u8>(),
+                    size_of_val,
+                );
+
+                ptr::write(this, in_progress.into_arc());
             }
         } else {
             // We were the sole reference of either kind; bump back up the
@@ -2699,6 +2722,13 @@ impl<T: ?Sized> Weak<T> {
 }
 
 impl<T: ?Sized, A: Allocator> Weak<T, A> {
+    /// Returns a reference to the underlying allocator.
+    #[inline]
+    #[unstable(feature = "allocator_api", issue = "32838")]
+    pub fn allocator(&self) -> &A {
+        &self.alloc
+    }
+
     /// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
     ///
     /// The pointer is valid only if there are some strong references. The pointer may be dangling,
@@ -3407,7 +3437,7 @@ static STATIC_INNER_SLICE: SliceArcInnerForStatic = SliceArcInnerForStatic {
 };
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "more_rc_default_impls", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "more_rc_default_impls", since = "1.80.0")]
 impl Default for Arc<str> {
     /// Creates an empty str inside an Arc
     ///
@@ -3422,7 +3452,7 @@ impl Default for Arc<str> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "more_rc_default_impls", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "more_rc_default_impls", since = "1.80.0")]
 impl Default for Arc<core::ffi::CStr> {
     /// Creates an empty CStr inside an Arc
     ///
@@ -3441,7 +3471,7 @@ impl Default for Arc<core::ffi::CStr> {
 }
 
 #[cfg(not(no_global_oom_handling))]
-#[stable(feature = "more_rc_default_impls", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "more_rc_default_impls", since = "1.80.0")]
 impl<T> Default for Arc<[T]> {
     /// Creates an empty `[T]` inside an Arc
     ///
@@ -3808,6 +3838,68 @@ fn data_offset_align(align: usize) -> usize {
     layout.size() + layout.padding_needed_for(align)
 }
 
+/// A unique owning pointer to a [`ArcInner`] **that does not imply the contents are initialized,**
+/// but will deallocate it (without dropping the value) when dropped.
+///
+/// This is a helper for [`Arc::make_mut()`] to ensure correct cleanup on panic.
+#[cfg(not(no_global_oom_handling))]
+struct UniqueArcUninit<T: ?Sized, A: Allocator> {
+    ptr: NonNull<ArcInner<T>>,
+    layout_for_value: Layout,
+    alloc: Option<A>,
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: ?Sized, A: Allocator> UniqueArcUninit<T, A> {
+    /// Allocate a ArcInner with layout suitable to contain `for_value` or a clone of it.
+    fn new(for_value: &T, alloc: A) -> UniqueArcUninit<T, A> {
+        let layout = Layout::for_value(for_value);
+        let ptr = unsafe {
+            Arc::allocate_for_layout(
+                layout,
+                |layout_for_arcinner| alloc.allocate(layout_for_arcinner),
+                |mem| mem.with_metadata_of(ptr::from_ref(for_value) as *const ArcInner<T>),
+            )
+        };
+        Self { ptr: NonNull::new(ptr).unwrap(), layout_for_value: layout, alloc: Some(alloc) }
+    }
+
+    /// Returns the pointer to be written into to initialize the [`Arc`].
+    fn data_ptr(&mut self) -> *mut T {
+        let offset = data_offset_align(self.layout_for_value.align());
+        unsafe { self.ptr.as_ptr().byte_add(offset) as *mut T }
+    }
+
+    /// Upgrade this into a normal [`Arc`].
+    ///
+    /// # Safety
+    ///
+    /// The data must have been initialized (by writing to [`Self::data_ptr()`]).
+    unsafe fn into_arc(mut self) -> Arc<T, A> {
+        let ptr = self.ptr;
+        let alloc = self.alloc.take().unwrap();
+        mem::forget(self);
+        // SAFETY: The pointer is valid as per `UniqueArcUninit::new`, and the caller is responsible
+        // for having initialized the data.
+        unsafe { Arc::from_ptr_in(ptr.as_ptr(), alloc) }
+    }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: ?Sized, A: Allocator> Drop for UniqueArcUninit<T, A> {
+    fn drop(&mut self) {
+        // SAFETY:
+        // * new() produced a pointer safe to deallocate.
+        // * We own the pointer unless into_arc() was called, which forgets us.
+        unsafe {
+            self.alloc.take().unwrap().deallocate(
+                self.ptr.cast(),
+                arcinner_layout_for_value_layout(self.layout_for_value),
+            );
+        }
+    }
+}
+
 #[stable(feature = "arc_error", since = "1.52.0")]
 impl<T: core::error::Error + ?Sized> core::error::Error for Arc<T> {
     #[allow(deprecated, deprecated_in_future)]
diff --git a/alloc/src/sync/tests.rs b/alloc/src/sync/tests.rs
index 49eae718c1690..1b123aa58f205 100644
--- a/alloc/src/sync/tests.rs
+++ b/alloc/src/sync/tests.rs
@@ -396,7 +396,7 @@ fn show_arc() {
 
 // Make sure deriving works with Arc<T>
 #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Debug, Default)]
-struct Foo {
+struct _Foo {
     inner: Arc<i32>,
 }
 
diff --git a/alloc/src/testing/crash_test.rs b/alloc/src/testing/crash_test.rs
index bcf5f5f72510e..ff72f99b2cbed 100644
--- a/alloc/src/testing/crash_test.rs
+++ b/alloc/src/testing/crash_test.rs
@@ -1,5 +1,4 @@
-// We avoid relying on anything else in the crate, apart from the `Debug` trait.
-use crate::fmt::Debug;
+use crate::fmt::Debug; // the `Debug` trait is the only thing we use from `crate::fmt`
 use std::cmp::Ordering;
 use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
 
diff --git a/alloc/src/vec/in_place_collect.rs b/alloc/src/vec/in_place_collect.rs
index 22541a2b9d82f..0dc193d82c535 100644
--- a/alloc/src/vec/in_place_collect.rs
+++ b/alloc/src/vec/in_place_collect.rs
@@ -154,6 +154,7 @@
 //! }
 //! vec.truncate(write_idx);
 //! ```
+
 use crate::alloc::{handle_alloc_error, Global};
 use core::alloc::Allocator;
 use core::alloc::Layout;
diff --git a/alloc/src/vec/into_iter.rs b/alloc/src/vec/into_iter.rs
index c47989337708f..10f62e4bb62d8 100644
--- a/alloc/src/vec/into_iter.rs
+++ b/alloc/src/vec/into_iter.rs
@@ -120,10 +120,15 @@ impl<T, A: Allocator> IntoIter<T, A> {
     /// This is roughly equivalent to the following, but more efficient
     ///
     /// ```
-    /// # let mut into_iter = Vec::<u8>::with_capacity(10).into_iter();
+    /// # let mut vec = Vec::<u8>::with_capacity(10);
+    /// # let ptr = vec.as_mut_ptr();
+    /// # let mut into_iter = vec.into_iter();
     /// let mut into_iter = std::mem::replace(&mut into_iter, Vec::new().into_iter());
     /// (&mut into_iter).for_each(drop);
     /// std::mem::forget(into_iter);
+    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// # drop(unsafe { Vec::<u8>::from_raw_parts(ptr, 0, 10) });
     /// ```
     ///
     /// This method is used by in-place iteration, refer to the vec::in_place_collect
@@ -254,7 +259,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
 
     #[inline]
     fn next_chunk<const N: usize>(&mut self) -> Result<[T; N], core::array::IntoIter<T, N>> {
-        let mut raw_ary = MaybeUninit::uninit_array();
+        let mut raw_ary = [const { MaybeUninit::uninit() }; N];
 
         let len = self.len();
 
diff --git a/alloc/src/vec/mod.rs b/alloc/src/vec/mod.rs
index b2e22d8715a8b..729d5dd4fe4d2 100644
--- a/alloc/src/vec/mod.rs
+++ b/alloc/src/vec/mod.rs
@@ -1101,6 +1101,7 @@ impl<T, A: Allocator> Vec<T, A> {
     /// ```
     #[cfg(not(no_global_oom_handling))]
     #[stable(feature = "rust1", since = "1.0.0")]
+    #[inline]
     pub fn shrink_to_fit(&mut self) {
         // The capacity is never less than the length, and there's nothing to do when
         // they are equal, so we can avoid the panic case in `RawVec::shrink_to_fit`
@@ -1472,6 +1473,9 @@ impl<T, A: Allocator> Vec<T, A> {
     /// // 2. `0 <= capacity` always holds whatever `capacity` is.
     /// unsafe {
     ///     vec.set_len(0);
+    /// #   // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// #   // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// #   vec.set_len(3);
     /// }
     /// ```
     ///
@@ -2390,6 +2394,9 @@ impl<T, A: Allocator> Vec<T, A> {
     /// let static_ref: &'static mut [usize] = x.leak();
     /// static_ref[0] += 1;
     /// assert_eq!(static_ref, &[2, 2, 3]);
+    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// # drop(unsafe { Box::from_raw(static_ref) });
     /// ```
     #[stable(feature = "vec_leak", since = "1.47.0")]
     #[inline]
@@ -2643,15 +2650,13 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(slice_flatten)]
-    ///
     /// let mut vec = vec![[1, 2, 3], [4, 5, 6], [7, 8, 9]];
     /// assert_eq!(vec.pop(), Some([7, 8, 9]));
     ///
     /// let mut flattened = vec.into_flattened();
     /// assert_eq!(flattened.pop(), Some(6));
     /// ```
-    #[unstable(feature = "slice_flatten", issue = "95629")]
+    #[stable(feature = "slice_flatten", since = "1.80.0")]
     pub fn into_flattened(self) -> Vec<T, A> {
         let (ptr, len, cap, alloc) = self.into_raw_parts_with_alloc();
         let (new_len, new_cap) = if T::IS_ZST {
@@ -3049,6 +3054,16 @@ impl<T, A: Allocator> Extend<T> for Vec<T, A> {
     fn extend_reserve(&mut self, additional: usize) {
         self.reserve(additional);
     }
+
+    #[inline]
+    unsafe fn extend_one_unchecked(&mut self, item: T) {
+        // SAFETY: Our preconditions ensure the space has been reserved, and `extend_reserve` is implemented correctly.
+        unsafe {
+            let len = self.len();
+            ptr::write(self.as_mut_ptr().add(len), item);
+            self.set_len(len + 1);
+        }
+    }
 }
 
 impl<T, A: Allocator> Vec<T, A> {
@@ -3245,6 +3260,16 @@ impl<'a, T: Copy + 'a, A: Allocator> Extend<&'a T> for Vec<T, A> {
     fn extend_reserve(&mut self, additional: usize) {
         self.reserve(additional);
     }
+
+    #[inline]
+    unsafe fn extend_one_unchecked(&mut self, &item: &'a T) {
+        // SAFETY: Our preconditions ensure the space has been reserved, and `extend_reserve` is implemented correctly.
+        unsafe {
+            let len = self.len();
+            ptr::write(self.as_mut_ptr().add(len), item);
+            self.set_len(len + 1);
+        }
+    }
 }
 
 /// Implements comparison of vectors, [lexicographically](Ord#lexicographical-comparison).
diff --git a/alloc/tests/arc.rs b/alloc/tests/arc.rs
index d564a30b10394..c37a80dca95c8 100644
--- a/alloc/tests/arc.rs
+++ b/alloc/tests/arc.rs
@@ -209,3 +209,21 @@ fn weak_may_dangle() {
     // `val` dropped here while still borrowed
     // borrow might be used here, when `val` is dropped and runs the `Drop` code for type `std::sync::Weak`
 }
+
+/// This is similar to the doc-test for `Arc::make_mut()`, but on an unsized type (slice).
+#[test]
+fn make_mut_unsized() {
+    use alloc::sync::Arc;
+
+    let mut data: Arc<[i32]> = Arc::new([10, 20, 30]);
+
+    Arc::make_mut(&mut data)[0] += 1; // Won't clone anything
+    let mut other_data = Arc::clone(&data); // Won't clone inner data
+    Arc::make_mut(&mut data)[1] += 1; // Clones inner data
+    Arc::make_mut(&mut data)[2] += 1; // Won't clone anything
+    Arc::make_mut(&mut other_data)[0] *= 10; // Won't clone anything
+
+    // Now `data` and `other_data` point to different allocations.
+    assert_eq!(*data, [11, 21, 31]);
+    assert_eq!(*other_data, [110, 20, 30]);
+}
diff --git a/alloc/tests/fmt.rs b/alloc/tests/fmt.rs
index 379e09ab69a3c..ce24a40f4c051 100644
--- a/alloc/tests/fmt.rs
+++ b/alloc/tests/fmt.rs
@@ -217,19 +217,19 @@ fn test_format_macro_interface() {
 
     // make sure that format! doesn't move out of local variables
     let a = Box::new(3);
-    format!("{a}");
-    format!("{a}");
+    let _ = format!("{a}");
+    let _ = format!("{a}");
 
     // make sure that format! doesn't cause spurious unused-unsafe warnings when
     // it's inside of an outer unsafe block
     unsafe {
         let a: isize = ::std::mem::transmute(3_usize);
-        format!("{a}");
+        let _ = format!("{a}");
     }
 
     // test that trailing commas are acceptable
-    format!("{}", "test",);
-    format!("{foo}", foo = "test",);
+    let _ = format!("{}", "test",);
+    let _ = format!("{foo}", foo = "test",);
 }
 
 // Basic test to make sure that we can invoke the `write!` macro with an
diff --git a/alloc/tests/lib.rs b/alloc/tests/lib.rs
index 0eae4ca4b8ba3..89538f272f069 100644
--- a/alloc/tests/lib.rs
+++ b/alloc/tests/lib.rs
@@ -24,7 +24,6 @@
 #![feature(binary_heap_into_iter_sorted)]
 #![feature(binary_heap_drain_sorted)]
 #![feature(slice_ptr_get)]
-#![feature(binary_heap_as_slice)]
 #![feature(inplace_iteration)]
 #![feature(iter_advance_by)]
 #![feature(iter_next_chunk)]
@@ -36,7 +35,6 @@
 #![feature(const_str_from_utf8)]
 #![feature(panic_update_hook)]
 #![feature(pointer_is_aligned_to)]
-#![feature(slice_flatten)]
 #![feature(thin_box)]
 #![feature(strict_provenance)]
 #![feature(drain_keep_rest)]
diff --git a/alloc/tests/vec_deque_alloc_error.rs b/alloc/tests/vec_deque_alloc_error.rs
new file mode 100644
index 0000000000000..8b516ddbc5c55
--- /dev/null
+++ b/alloc/tests/vec_deque_alloc_error.rs
@@ -0,0 +1,50 @@
+#![feature(alloc_error_hook, allocator_api)]
+
+use std::{
+    alloc::{set_alloc_error_hook, AllocError, Allocator, Layout, System},
+    collections::VecDeque,
+    panic::{catch_unwind, AssertUnwindSafe},
+    ptr::NonNull,
+};
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
+fn test_shrink_to_unwind() {
+    // This tests that `shrink_to` leaves the deque in a consistent state when
+    // the call to `RawVec::shrink_to_fit` unwinds. The code is adapted from #123369
+    // but changed to hopefully not have any UB even if the test fails.
+
+    struct BadAlloc;
+
+    unsafe impl Allocator for BadAlloc {
+        fn allocate(&self, l: Layout) -> Result<NonNull<[u8]>, AllocError> {
+            // We allocate zeroed here so that the whole buffer of the deque
+            // is always initialized. That way, even if the deque is left in
+            // an inconsistent state, no uninitialized memory should be accessed.
+            System.allocate_zeroed(l)
+        }
+
+        unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
+            unsafe { System.deallocate(ptr, layout) }
+        }
+
+        unsafe fn shrink(
+            &self,
+            _ptr: NonNull<u8>,
+            _old_layout: Layout,
+            _new_layout: Layout,
+        ) -> Result<NonNull<[u8]>, AllocError> {
+            Err(AllocError)
+        }
+    }
+
+    set_alloc_error_hook(|_| panic!("alloc error"));
+
+    let mut v = VecDeque::with_capacity_in(15, BadAlloc);
+    v.push_back(1);
+    v.push_front(2);
+    // This should unwind because it calls `BadAlloc::shrink` and then `handle_alloc_error` which unwinds.
+    assert!(catch_unwind(AssertUnwindSafe(|| v.shrink_to_fit())).is_err());
+    // This should only pass if the deque is left in a consistent state.
+    assert_eq!(v, [2, 1]);
+}
diff --git a/backtrace b/backtrace
index e15130618237e..72265bea21089 160000
--- a/backtrace
+++ b/backtrace
@@ -1 +1 @@
-Subproject commit e15130618237eb3e2d4b622549f9647b4c1d9ca3
+Subproject commit 72265bea210891ae47bbe6d4f17b493ef0606619
diff --git a/core/Cargo.toml b/core/Cargo.toml
index 0c2642341235b..cace4582b489a 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -39,9 +39,6 @@ debug_refcell = []
 
 [lints.rust.unexpected_cfgs]
 level = "warn"
-# x.py uses beta cargo, so `check-cfg` entries do not yet take effect
-# for rust-lang/rust. But for users of `-Zbuild-std` it does.
-# The unused warning is waiting for rust-lang/cargo#13925 to reach beta.
 check-cfg = [
     'cfg(bootstrap)',
     'cfg(no_fp_fmt_parse)',
diff --git a/core/src/any.rs b/core/src/any.rs
index 37cb8e7d303af..59f3b6841d531 100644
--- a/core/src/any.rs
+++ b/core/src/any.rs
@@ -602,7 +602,7 @@ impl dyn Any + Send + Sync {
 /// While `TypeId` implements `Hash`, `PartialOrd`, and `Ord`, it is worth
 /// noting that the hashes and ordering will vary between Rust releases. Beware
 /// of relying on them inside of your code!
-#[derive(Clone, Copy, Debug, Eq, PartialOrd, Ord)]
+#[derive(Clone, Copy, Eq, PartialOrd, Ord)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct TypeId {
     // We avoid using `u128` because that imposes higher alignment requirements on many platforms.
@@ -644,6 +644,10 @@ impl TypeId {
         let t2 = t as u64;
         TypeId { t: (t1, t2) }
     }
+
+    fn as_u128(self) -> u128 {
+        u128::from(self.t.0) << 64 | u128::from(self.t.1)
+    }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -666,6 +670,13 @@ impl hash::Hash for TypeId {
     }
 }
 
+#[stable(feature = "rust1", since = "1.0.0")]
+impl fmt::Debug for TypeId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        write!(f, "TypeId({:#034x})", self.as_u128())
+    }
+}
+
 /// Returns the name of a type as a string slice.
 ///
 /// # Note
diff --git a/core/src/array/iter.rs b/core/src/array/iter.rs
index b314d0536a35a..3585bf07b597c 100644
--- a/core/src/array/iter.rs
+++ b/core/src/array/iter.rs
@@ -101,7 +101,6 @@ impl<T, const N: usize> IntoIter<T, N> {
     /// ```
     /// #![feature(array_into_iter_constructors)]
     /// #![feature(maybe_uninit_uninit_array_transpose)]
-    /// #![feature(maybe_uninit_uninit_array)]
     /// use std::array::IntoIter;
     /// use std::mem::MaybeUninit;
     ///
@@ -111,7 +110,7 @@ impl<T, const N: usize> IntoIter<T, N> {
     /// fn next_chunk<T: Copy, const N: usize>(
     ///     it: &mut impl Iterator<Item = T>,
     /// ) -> Result<[T; N], IntoIter<T, N>> {
-    ///     let mut buffer = MaybeUninit::uninit_array();
+    ///     let mut buffer = [const { MaybeUninit::uninit() }; N];
     ///     let mut i = 0;
     ///     while i < N {
     ///         match it.next() {
@@ -203,7 +202,7 @@ impl<T, const N: usize> IntoIter<T, N> {
     #[unstable(feature = "array_into_iter_constructors", issue = "91583")]
     #[rustc_const_unstable(feature = "const_array_into_iter_constructors", issue = "91583")]
     pub const fn empty() -> Self {
-        let buffer = MaybeUninit::uninit_array();
+        let buffer = [const { MaybeUninit::uninit() }; N];
         let initialized = 0..0;
 
         // SAFETY: We're telling it that none of the elements are initialized,
@@ -405,7 +404,8 @@ impl<T: Clone, const N: usize> Clone for IntoIter<T, N> {
     fn clone(&self) -> Self {
         // Note, we don't really need to match the exact same alive range, so
         // we can just clone into offset 0 regardless of where `self` is.
-        let mut new = Self { data: MaybeUninit::uninit_array(), alive: IndexRange::zero_to(0) };
+        let mut new =
+            Self { data: [const { MaybeUninit::uninit() }; N], alive: IndexRange::zero_to(0) };
 
         // Clone all alive elements.
         for (src, dst) in iter::zip(self.as_slice(), &mut new.data) {
diff --git a/core/src/array/mod.rs b/core/src/array/mod.rs
index 05874ab6c4cbb..8285c64ed2966 100644
--- a/core/src/array/mod.rs
+++ b/core/src/array/mod.rs
@@ -10,7 +10,7 @@ use crate::convert::Infallible;
 use crate::error::Error;
 use crate::fmt;
 use crate::hash::{self, Hash};
-use crate::iter::UncheckedIterator;
+use crate::iter::{repeat_n, UncheckedIterator};
 use crate::mem::{self, MaybeUninit};
 use crate::ops::{
     ChangeOutputType, ControlFlow, FromResidual, Index, IndexMut, NeverShortCircuit, Residual, Try,
@@ -27,6 +27,33 @@ pub(crate) use drain::drain_array_with;
 #[stable(feature = "array_value_iter", since = "1.51.0")]
 pub use iter::IntoIter;
 
+/// Creates an array of type `[T; N]` by repeatedly cloning a value.
+///
+/// This is the same as `[val; N]`, but it also works for types that do not
+/// implement [`Copy`].
+///
+/// The provided value will be used as an element of the resulting array and
+/// will be cloned N - 1 times to fill up the rest. If N is zero, the value
+/// will be dropped.
+///
+/// # Example
+///
+/// Creating muliple copies of a `String`:
+/// ```rust
+/// #![feature(array_repeat)]
+///
+/// use std::array;
+///
+/// let string = "Hello there!".to_string();
+/// let strings = array::repeat(string);
+/// assert_eq!(strings, ["Hello there!", "Hello there!"]);
+/// ```
+#[inline]
+#[unstable(feature = "array_repeat", issue = "126695")]
+pub fn repeat<T: Clone, const N: usize>(val: T) -> [T; N] {
+    from_trusted_iterator(repeat_n(val, N))
+}
+
 /// Creates an array of type [T; N], where each element `T` is the returned value from `cb`
 /// using that element's index.
 ///
@@ -100,7 +127,7 @@ where
     R: Try,
     R::Residual: Residual<[R::Output; N]>,
 {
-    let mut array = MaybeUninit::uninit_array::<N>();
+    let mut array = [const { MaybeUninit::uninit() }; N];
     match try_from_fn_erased(&mut array, cb) {
         ControlFlow::Break(r) => FromResidual::from_residual(r),
         ControlFlow::Continue(()) => {
@@ -533,11 +560,9 @@ impl<T, const N: usize> [T; N] {
     /// assert_eq!(c, Some(a));
     /// ```
     #[unstable(feature = "array_try_map", issue = "79711")]
-    pub fn try_map<F, R>(self, f: F) -> ChangeOutputType<R, [R::Output; N]>
+    pub fn try_map<R>(self, f: impl FnMut(T) -> R) -> ChangeOutputType<R, [R::Output; N]>
     where
-        F: FnMut(T) -> R,
-        R: Try,
-        R::Residual: Residual<[R::Output; N]>,
+        R: Try<Residual: Residual<[R::Output; N]>>,
     {
         drain_array_with(self, |iter| try_from_trusted_iterator(iter.map(f)))
     }
@@ -893,7 +918,7 @@ impl<T> Drop for Guard<'_, T> {
 pub(crate) fn iter_next_chunk<T, const N: usize>(
     iter: &mut impl Iterator<Item = T>,
 ) -> Result<[T; N], IntoIter<T, N>> {
-    let mut array = MaybeUninit::uninit_array::<N>();
+    let mut array = [const { MaybeUninit::uninit() }; N];
     let r = iter_next_chunk_erased(&mut array, iter);
     match r {
         Ok(()) => {
diff --git a/core/src/borrow.rs b/core/src/borrow.rs
index bc026d0a44634..ccb1cc4e974d6 100644
--- a/core/src/borrow.rs
+++ b/core/src/borrow.rs
@@ -184,6 +184,7 @@ pub trait Borrow<Borrowed: ?Sized> {
 /// an underlying type by providing a mutable reference. See [`Borrow<T>`]
 /// for more information on borrowing as another type.
 #[stable(feature = "rust1", since = "1.0.0")]
+#[rustc_diagnostic_item = "BorrowMut"]
 pub trait BorrowMut<Borrowed: ?Sized>: Borrow<Borrowed> {
     /// Mutably borrows from an owned value.
     ///
diff --git a/core/src/cell.rs b/core/src/cell.rs
index 4b491ffdafa70..b3189f14f9e47 100644
--- a/core/src/cell.rs
+++ b/core/src/cell.rs
@@ -82,6 +82,20 @@
 //!
 //! The corresponding [`Sync`] version of `OnceCell<T>` is [`OnceLock<T>`].
 //!
+//! ## `LazyCell<T, F>`
+//!
+//! A common pattern with OnceCell is, for a given OnceCell, to use the same function on every
+//! call to [`OnceCell::get_or_init`] with that cell. This is what is offered by [`LazyCell`],
+//! which pairs cells of `T` with functions of `F`, and always calls `F` before it yields `&T`.
+//! This happens implicitly by simply attempting to dereference the LazyCell to get its contents,
+//! so its use is much more transparent with a place which has been initialized by a constant.
+//!
+//! More complicated patterns that don't fit this description can be built on `OnceCell<T>` instead.
+//!
+//! `LazyCell` works by providing an implementation of `impl Deref` that calls the function,
+//! so you can just use it by dereference (e.g. `*lazy_cell` or `lazy_cell.deref()`).
+//!
+//! The corresponding [`Sync`] version of `LazyCell<T, F>` is [`LazyLock<T, F>`].
 //!
 //! # When to choose interior mutability
 //!
@@ -230,6 +244,7 @@
 //! [`RwLock<T>`]: ../../std/sync/struct.RwLock.html
 //! [`Mutex<T>`]: ../../std/sync/struct.Mutex.html
 //! [`OnceLock<T>`]: ../../std/sync/struct.OnceLock.html
+//! [`LazyLock<T, F>`]: ../../std/sync/struct.LazyLock.html
 //! [`Sync`]: ../../std/marker/trait.Sync.html
 //! [`atomic`]: crate::sync::atomic
 
@@ -238,14 +253,14 @@
 use crate::cmp::Ordering;
 use crate::fmt::{self, Debug, Display};
 use crate::marker::{PhantomData, Unsize};
-use crate::mem::{self, size_of};
+use crate::mem;
 use crate::ops::{CoerceUnsized, Deref, DerefMut, DerefPure, DispatchFromDyn};
 use crate::ptr::{self, NonNull};
 
 mod lazy;
 mod once;
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 pub use lazy::LazyCell;
 #[stable(feature = "once_cell", since = "1.70.0")]
 pub use once::OnceCell;
diff --git a/core/src/cell/lazy.rs b/core/src/cell/lazy.rs
index 1b213f6a2941b..21452d40f9ded 100644
--- a/core/src/cell/lazy.rs
+++ b/core/src/cell/lazy.rs
@@ -18,8 +18,6 @@ enum State<T, F> {
 /// # Examples
 ///
 /// ```
-/// #![feature(lazy_cell)]
-///
 /// use std::cell::LazyCell;
 ///
 /// let lazy: LazyCell<i32> = LazyCell::new(|| {
@@ -36,7 +34,7 @@ enum State<T, F> {
 /// //   92
 /// //   92
 /// ```
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 pub struct LazyCell<T, F = fn() -> T> {
     state: UnsafeCell<State<T, F>>,
 }
@@ -47,8 +45,6 @@ impl<T, F: FnOnce() -> T> LazyCell<T, F> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(lazy_cell)]
-    ///
     /// use std::cell::LazyCell;
     ///
     /// let hello = "Hello, World!".to_string();
@@ -58,7 +54,8 @@ impl<T, F: FnOnce() -> T> LazyCell<T, F> {
     /// assert_eq!(&*lazy, "HELLO, WORLD!");
     /// ```
     #[inline]
-    #[unstable(feature = "lazy_cell", issue = "109736")]
+    #[stable(feature = "lazy_cell", since = "1.80.0")]
+    #[rustc_const_stable(feature = "lazy_cell", since = "1.80.0")]
     pub const fn new(f: F) -> LazyCell<T, F> {
         LazyCell { state: UnsafeCell::new(State::Uninit(f)) }
     }
@@ -70,8 +67,7 @@ impl<T, F: FnOnce() -> T> LazyCell<T, F> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(lazy_cell)]
-    /// #![feature(lazy_cell_consume)]
+    /// #![feature(lazy_cell_into_inner)]
     ///
     /// use std::cell::LazyCell;
     ///
@@ -82,7 +78,7 @@ impl<T, F: FnOnce() -> T> LazyCell<T, F> {
     /// assert_eq!(&*lazy, "HELLO, WORLD!");
     /// assert_eq!(LazyCell::into_inner(lazy).ok(), Some("HELLO, WORLD!".to_string()));
     /// ```
-    #[unstable(feature = "lazy_cell_consume", issue = "109736")]
+    #[unstable(feature = "lazy_cell_into_inner", issue = "125623")]
     pub fn into_inner(this: Self) -> Result<T, F> {
         match this.state.into_inner() {
             State::Init(data) => Ok(data),
@@ -99,8 +95,6 @@ impl<T, F: FnOnce() -> T> LazyCell<T, F> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(lazy_cell)]
-    ///
     /// use std::cell::LazyCell;
     ///
     /// let lazy = LazyCell::new(|| 92);
@@ -109,7 +103,7 @@ impl<T, F: FnOnce() -> T> LazyCell<T, F> {
     /// assert_eq!(&*lazy, &92);
     /// ```
     #[inline]
-    #[unstable(feature = "lazy_cell", issue = "109736")]
+    #[stable(feature = "lazy_cell", since = "1.80.0")]
     pub fn force(this: &LazyCell<T, F>) -> &T {
         // SAFETY:
         // This invalidates any mutable references to the data. The resulting
@@ -173,7 +167,7 @@ impl<T, F> LazyCell<T, F> {
     }
 }
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T, F: FnOnce() -> T> Deref for LazyCell<T, F> {
     type Target = T;
     #[inline]
@@ -182,7 +176,7 @@ impl<T, F: FnOnce() -> T> Deref for LazyCell<T, F> {
     }
 }
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T: Default> Default for LazyCell<T> {
     /// Creates a new lazy value using `Default` as the initializing function.
     #[inline]
@@ -191,7 +185,7 @@ impl<T: Default> Default for LazyCell<T> {
     }
 }
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T: fmt::Debug, F> fmt::Debug for LazyCell<T, F> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let mut d = f.debug_tuple("LazyCell");
diff --git a/core/src/cell/once.rs b/core/src/cell/once.rs
index a7c3dfc982d12..872b4da4dbfda 100644
--- a/core/src/cell/once.rs
+++ b/core/src/cell/once.rs
@@ -2,12 +2,12 @@ use crate::cell::UnsafeCell;
 use crate::fmt;
 use crate::mem;
 
-/// A cell which can be written to only once.
+/// A cell which can nominally be written to only once.
 ///
 /// This allows obtaining a shared `&T` reference to its inner value without copying or replacing
 /// it (unlike [`Cell`]), and without runtime borrow checks (unlike [`RefCell`]). However,
 /// only immutable references can be obtained unless one has a mutable reference to the cell
-/// itself.
+/// itself. In the same vein, the cell can only be re-initialized with such a mutable reference.
 ///
 /// For a thread-safe version of this struct, see [`std::sync::OnceLock`].
 ///
diff --git a/core/src/char/methods.rs b/core/src/char/methods.rs
index 458be49fb152a..4186565c131ed 100644
--- a/core/src/char/methods.rs
+++ b/core/src/char/methods.rs
@@ -223,7 +223,10 @@ impl char {
     /// assert_eq!('❤', c);
     /// ```
     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
-    #[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
+    #[rustc_const_stable(
+        feature = "const_char_from_u32_unchecked",
+        since = "CURRENT_RUSTC_VERSION"
+    )]
     #[must_use]
     #[inline]
     pub const unsafe fn from_u32_unchecked(i: u32) -> char {
diff --git a/core/src/char/mod.rs b/core/src/char/mod.rs
index f3683fe3f9c83..3c641a2e01c93 100644
--- a/core/src/char/mod.rs
+++ b/core/src/char/mod.rs
@@ -24,7 +24,6 @@ mod convert;
 mod decode;
 mod methods;
 
-// stable re-exports
 #[stable(feature = "try_from", since = "1.34.0")]
 pub use self::convert::CharTryFromError;
 #[stable(feature = "char_from_str", since = "1.20.0")]
@@ -32,11 +31,10 @@ pub use self::convert::ParseCharError;
 #[stable(feature = "decode_utf16", since = "1.9.0")]
 pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
 
-// perma-unstable re-exports
 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
-pub use self::methods::encode_utf16_raw;
+pub use self::methods::encode_utf16_raw; // perma-unstable
 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
-pub use self::methods::encode_utf8_raw;
+pub use self::methods::encode_utf8_raw; // perma-unstable
 
 use crate::ascii;
 use crate::error::Error;
@@ -123,7 +121,7 @@ pub const fn from_u32(i: u32) -> Option<char> {
 /// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`].
 /// instead.
 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
-#[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
+#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "CURRENT_RUSTC_VERSION")]
 #[must_use]
 #[inline]
 pub const unsafe fn from_u32_unchecked(i: u32) -> char {
diff --git a/core/src/clone.rs b/core/src/clone.rs
index d448c5338fc46..939b2be6dfaf1 100644
--- a/core/src/clone.rs
+++ b/core/src/clone.rs
@@ -36,6 +36,9 @@
 
 #![stable(feature = "rust1", since = "1.0.0")]
 
+use crate::mem::{self, MaybeUninit};
+use crate::ptr;
+
 /// A common trait for the ability to explicitly duplicate an object.
 ///
 /// Differs from [`Copy`] in that [`Copy`] is implicit and an inexpensive bit-wise copy, while
@@ -204,6 +207,189 @@ pub struct AssertParamIsCopy<T: Copy + ?Sized> {
     _field: crate::marker::PhantomData<T>,
 }
 
+/// A generalization of [`Clone`] to dynamically-sized types stored in arbitrary containers.
+///
+/// This trait is implemented for all types implementing [`Clone`], and also [slices](slice) of all
+/// such types. You may also implement this trait to enable cloning trait objects and custom DSTs
+/// (structures containing dynamically-sized fields).
+///
+/// # Safety
+///
+/// Implementations must ensure that when `.clone_to_uninit(dst)` returns normally rather than
+/// panicking, it always leaves `*dst` initialized as a valid value of type `Self`.
+///
+/// # See also
+///
+/// * [`Clone::clone_from`] is a safe function which may be used instead when `Self` is a [`Sized`]
+///   and the destination is already initialized; it may be able to reuse allocations owned by
+///   the destination.
+/// * [`ToOwned`], which allocates a new destination container.
+///
+/// [`ToOwned`]: ../../std/borrow/trait.ToOwned.html
+#[unstable(feature = "clone_to_uninit", issue = "126799")]
+pub unsafe trait CloneToUninit {
+    /// Performs copy-assignment from `self` to `dst`.
+    ///
+    /// This is analogous to `std::ptr::write(dst, self.clone())`,
+    /// except that `self` may be a dynamically-sized type ([`!Sized`](Sized)).
+    ///
+    /// Before this function is called, `dst` may point to uninitialized memory.
+    /// After this function is called, `dst` will point to initialized memory; it will be
+    /// sound to create a `&Self` reference from the pointer.
+    ///
+    /// # Safety
+    ///
+    /// Behavior is undefined if any of the following conditions are violated:
+    ///
+    /// * `dst` must be [valid] for writes.
+    /// * `dst` must be properly aligned.
+    /// * `dst` must have the same [pointer metadata] (slice length or `dyn` vtable) as `self`.
+    ///
+    /// [valid]: ptr#safety
+    /// [pointer metadata]: crate::ptr::metadata()
+    ///
+    /// # Panics
+    ///
+    /// This function may panic. (For example, it might panic if memory allocation for a clone
+    /// of a value owned by `self` fails.)
+    /// If the call panics, then `*dst` should be treated as uninitialized memory; it must not be
+    /// read or dropped, because even if it was previously valid, it may have been partially
+    /// overwritten.
+    ///
+    /// The caller may also need to take care to deallocate the allocation pointed to by `dst`,
+    /// if applicable, to avoid a memory leak, and may need to take other precautions to ensure
+    /// soundness in the presence of unwinding.
+    ///
+    /// Implementors should avoid leaking values by, upon unwinding, dropping all component values
+    /// that might have already been created. (For example, if a `[Foo]` of length 3 is being
+    /// cloned, and the second of the three calls to `Foo::clone()` unwinds, then the first `Foo`
+    /// cloned should be dropped.)
+    unsafe fn clone_to_uninit(&self, dst: *mut Self);
+}
+
+#[unstable(feature = "clone_to_uninit", issue = "126799")]
+unsafe impl<T: Clone> CloneToUninit for T {
+    default unsafe fn clone_to_uninit(&self, dst: *mut Self) {
+        // SAFETY: The safety conditions of clone_to_uninit() are a superset of those of
+        // ptr::write().
+        unsafe {
+            // We hope the optimizer will figure out to create the cloned value in-place,
+            // skipping ever storing it on the stack and the copy to the destination.
+            ptr::write(dst, self.clone());
+        }
+    }
+}
+
+// Specialized implementation for types that are [`Copy`], not just [`Clone`],
+// and can therefore be copied bitwise.
+#[unstable(feature = "clone_to_uninit", issue = "126799")]
+unsafe impl<T: Copy> CloneToUninit for T {
+    unsafe fn clone_to_uninit(&self, dst: *mut Self) {
+        // SAFETY: The safety conditions of clone_to_uninit() are a superset of those of
+        // ptr::copy_nonoverlapping().
+        unsafe {
+            ptr::copy_nonoverlapping(self, dst, 1);
+        }
+    }
+}
+
+#[unstable(feature = "clone_to_uninit", issue = "126799")]
+unsafe impl<T: Clone> CloneToUninit for [T] {
+    #[cfg_attr(debug_assertions, track_caller)]
+    default unsafe fn clone_to_uninit(&self, dst: *mut Self) {
+        let len = self.len();
+        // This is the most likely mistake to make, so check it as a debug assertion.
+        debug_assert_eq!(
+            len,
+            dst.len(),
+            "clone_to_uninit() source and destination must have equal lengths",
+        );
+
+        // SAFETY: The produced `&mut` is valid because:
+        // * The caller is obligated to provide a pointer which is valid for writes.
+        // * All bytes pointed to are in MaybeUninit, so we don't care about the memory's
+        //   initialization status.
+        let uninit_ref = unsafe { &mut *(dst as *mut [MaybeUninit<T>]) };
+
+        // Copy the elements
+        let mut initializing = InitializingSlice::from_fully_uninit(uninit_ref);
+        for element_ref in self.iter() {
+            // If the clone() panics, `initializing` will take care of the cleanup.
+            initializing.push(element_ref.clone());
+        }
+        // If we reach here, then the entire slice is initialized, and we've satisfied our
+        // responsibilities to the caller. Disarm the cleanup guard by forgetting it.
+        mem::forget(initializing);
+    }
+}
+
+#[unstable(feature = "clone_to_uninit", issue = "126799")]
+unsafe impl<T: Copy> CloneToUninit for [T] {
+    #[cfg_attr(debug_assertions, track_caller)]
+    unsafe fn clone_to_uninit(&self, dst: *mut Self) {
+        let len = self.len();
+        // This is the most likely mistake to make, so check it as a debug assertion.
+        debug_assert_eq!(
+            len,
+            dst.len(),
+            "clone_to_uninit() source and destination must have equal lengths",
+        );
+
+        // SAFETY: The safety conditions of clone_to_uninit() are a superset of those of
+        // ptr::copy_nonoverlapping().
+        unsafe {
+            ptr::copy_nonoverlapping(self.as_ptr(), dst.as_mut_ptr(), len);
+        }
+    }
+}
+
+/// Ownership of a collection of values stored in a non-owned `[MaybeUninit<T>]`, some of which
+/// are not yet initialized. This is sort of like a `Vec` that doesn't own its allocation.
+/// Its responsibility is to provide cleanup on unwind by dropping the values that *are*
+/// initialized, unless disarmed by forgetting.
+///
+/// This is a helper for `impl<T: Clone> CloneToUninit for [T]`.
+struct InitializingSlice<'a, T> {
+    data: &'a mut [MaybeUninit<T>],
+    /// Number of elements of `*self.data` that are initialized.
+    initialized_len: usize,
+}
+
+impl<'a, T> InitializingSlice<'a, T> {
+    #[inline]
+    fn from_fully_uninit(data: &'a mut [MaybeUninit<T>]) -> Self {
+        Self { data, initialized_len: 0 }
+    }
+
+    /// Push a value onto the end of the initialized part of the slice.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the slice is already fully initialized.
+    #[inline]
+    fn push(&mut self, value: T) {
+        MaybeUninit::write(&mut self.data[self.initialized_len], value);
+        self.initialized_len += 1;
+    }
+}
+
+impl<'a, T> Drop for InitializingSlice<'a, T> {
+    #[cold] // will only be invoked on unwind
+    fn drop(&mut self) {
+        let initialized_slice = ptr::slice_from_raw_parts_mut(
+            MaybeUninit::slice_as_mut_ptr(self.data),
+            self.initialized_len,
+        );
+        // SAFETY:
+        // * the pointer is valid because it was made from a mutable reference
+        // * `initialized_len` counts the initialized elements as an invariant of this type,
+        //   so each of the pointed-to elements is initialized and may be dropped.
+        unsafe {
+            ptr::drop_in_place::<[T]>(initialized_slice);
+        }
+    }
+}
+
 /// Implementations of `Clone` for primitive types.
 ///
 /// Implementations that cannot be described in Rust
diff --git a/core/src/cmp.rs b/core/src/cmp.rs
index f3f757ce69df7..cff75870790c5 100644
--- a/core/src/cmp.rs
+++ b/core/src/cmp.rs
@@ -245,7 +245,6 @@ use self::Ordering::*;
     append_const_msg
 )]
 #[rustc_diagnostic_item = "PartialEq"]
-#[const_trait]
 pub trait PartialEq<Rhs: ?Sized = Self> {
     /// This method tests for `self` and `other` values to be equal, and is used
     /// by `==`.
@@ -1475,8 +1474,7 @@ mod impls {
     macro_rules! partial_eq_impl {
         ($($t:ty)*) => ($(
             #[stable(feature = "rust1", since = "1.0.0")]
-            #[rustc_const_unstable(feature = "const_cmp", issue = "92391")]
-            impl const PartialEq for $t {
+            impl PartialEq for $t {
                 #[inline]
                 fn eq(&self, other: &$t) -> bool { (*self) == (*other) }
                 #[inline]
diff --git a/core/src/error.md b/core/src/error.md
index a5deb71e6b80a..4b62391cafc37 100644
--- a/core/src/error.md
+++ b/core/src/error.md
@@ -17,8 +17,8 @@ The following are the primary interfaces of the panic system and the
 responsibilities they cover:
 
 * [`panic!`] and [`panic_any`] (Constructing, Propagated automatically)
-* [`PanicInfo`] (Reporting)
-* [`set_hook`], [`take_hook`], and [`#[panic_handler]`][panic-handler] (Reporting)
+* [`set_hook`], [`take_hook`], and [`PanicHookInfo`] (Reporting)
+* [`#[panic_handler]`][panic-handler] and [`PanicInfo`] (Reporting in no_std)
 * [`catch_unwind`] and [`resume_unwind`] (Discarding, Propagating)
 
 The following are the primary interfaces of the error system and the
@@ -125,6 +125,7 @@ expect-as-precondition style error messages remember to focus on the word
 should be available and executable by the current user".
 
 [`panic_any`]: ../../std/panic/fn.panic_any.html
+[`PanicHookInfo`]: ../../std/panic/struct.PanicHookInfo.html
 [`PanicInfo`]: crate::panic::PanicInfo
 [`catch_unwind`]: ../../std/panic/fn.catch_unwind.html
 [`resume_unwind`]: ../../std/panic/fn.resume_unwind.html
diff --git a/core/src/error.rs b/core/src/error.rs
index a3f2b767054e1..ca8983d4cbcfe 100644
--- a/core/src/error.rs
+++ b/core/src/error.rs
@@ -1,5 +1,5 @@
 #![doc = include_str!("error.md")]
-#![unstable(feature = "error_in_core", issue = "103765")]
+#![stable(feature = "error_in_core", since = "CURRENT_RUSTC_VERSION")]
 
 #[cfg(test)]
 mod tests;
@@ -130,7 +130,6 @@ pub trait Error: Debug + Display {
     ///
     /// ```rust
     /// #![feature(error_generic_member_access)]
-    /// #![feature(error_in_core)]
     /// use core::fmt;
     /// use core::error::{request_ref, Request};
     ///
@@ -361,8 +360,7 @@ impl dyn Error {
 /// Get a string value from an error.
 ///
 /// ```rust
-/// # #![feature(error_generic_member_access)]
-/// # #![feature(error_in_core)]
+/// #![feature(error_generic_member_access)]
 /// use std::error::Error;
 /// use core::error::request_value;
 ///
@@ -385,8 +383,7 @@ where
 /// Get a string reference from an error.
 ///
 /// ```rust
-/// # #![feature(error_generic_member_access)]
-/// # #![feature(error_in_core)]
+/// #![feature(error_generic_member_access)]
 /// use core::error::Error;
 /// use core::error::request_ref;
 ///
@@ -407,9 +404,9 @@ fn request_by_type_tag<'a, I>(err: &'a (impl Error + ?Sized)) -> Option<I::Reifi
 where
     I: tags::Type<'a>,
 {
-    let mut tagged = TaggedOption::<'a, I>(None);
+    let mut tagged = Tagged { tag_id: TypeId::of::<I>(), value: TaggedOption::<'a, I>(None) };
     err.provide(tagged.as_request());
-    tagged.0
+    tagged.value.0
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -458,7 +455,6 @@ where
 ///
 /// ```
 /// #![feature(error_generic_member_access)]
-/// #![feature(error_in_core)]
 /// use core::fmt;
 /// use core::error::Request;
 /// use core::error::request_ref;
@@ -511,16 +507,9 @@ where
 ///
 #[unstable(feature = "error_generic_member_access", issue = "99301")]
 #[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435
-pub struct Request<'a>(dyn Erased<'a> + 'a);
+pub struct Request<'a>(Tagged<dyn Erased<'a> + 'a>);
 
 impl<'a> Request<'a> {
-    /// Create a new `&mut Request` from a `&mut dyn Erased` trait object.
-    fn new<'b>(erased: &'b mut (dyn Erased<'a> + 'a)) -> &'b mut Request<'a> {
-        // SAFETY: transmuting `&mut (dyn Erased<'a> + 'a)` to `&mut Request<'a>` is safe since
-        // `Request` is repr(transparent).
-        unsafe { &mut *(erased as *mut dyn Erased<'a> as *mut Request<'a>) }
-    }
-
     /// Provide a value or other type with only static lifetimes.
     ///
     /// # Examples
@@ -529,7 +518,6 @@ impl<'a> Request<'a> {
     ///
     /// ```rust
     /// #![feature(error_generic_member_access)]
-    /// #![feature(error_in_core)]
     ///
     /// use core::error::Request;
     ///
@@ -564,7 +552,6 @@ impl<'a> Request<'a> {
     ///
     /// ```rust
     /// #![feature(error_generic_member_access)]
-    /// #![feature(error_in_core)]
     ///
     /// use core::error::Request;
     ///
@@ -600,7 +587,6 @@ impl<'a> Request<'a> {
     ///
     /// ```rust
     /// #![feature(error_generic_member_access)]
-    /// #![feature(error_in_core)]
     ///
     /// use core::error::Request;
     ///
@@ -633,7 +619,6 @@ impl<'a> Request<'a> {
     ///
     /// ```rust
     /// #![feature(error_generic_member_access)]
-    /// #![feature(error_in_core)]
     ///
     /// use core::error::Request;
     ///
@@ -700,7 +685,6 @@ impl<'a> Request<'a> {
     ///
     /// ```rust
     /// #![feature(error_generic_member_access)]
-    /// #![feature(error_in_core)]
     ///
     /// use core::error::Request;
     /// use core::error::request_value;
@@ -788,7 +772,6 @@ impl<'a> Request<'a> {
     ///
     /// ```rust
     /// #![feature(error_generic_member_access)]
-    /// #![feature(error_in_core)]
     ///
     /// use core::error::Request;
     /// use core::error::request_ref;
@@ -945,32 +928,33 @@ pub(crate) mod tags {
 /// An `Option` with a type tag `I`.
 ///
 /// Since this struct implements `Erased`, the type can be erased to make a dynamically typed
-/// option. The type can be checked dynamically using `Erased::tag_id` and since this is statically
+/// option. The type can be checked dynamically using `Tagged::tag_id` and since this is statically
 /// checked for the concrete type, there is some degree of type safety.
 #[repr(transparent)]
 pub(crate) struct TaggedOption<'a, I: tags::Type<'a>>(pub Option<I::Reified>);
 
-impl<'a, I: tags::Type<'a>> TaggedOption<'a, I> {
+impl<'a, I: tags::Type<'a>> Tagged<TaggedOption<'a, I>> {
     pub(crate) fn as_request(&mut self) -> &mut Request<'a> {
-        Request::new(self as &mut (dyn Erased<'a> + 'a))
+        let erased = self as &mut Tagged<dyn Erased<'a> + 'a>;
+        // SAFETY: transmuting `&mut Tagged<dyn Erased<'a> + 'a>` to `&mut Request<'a>` is safe since
+        // `Request` is repr(transparent).
+        unsafe { &mut *(erased as *mut Tagged<dyn Erased<'a>> as *mut Request<'a>) }
     }
 }
 
 /// Represents a type-erased but identifiable object.
 ///
 /// This trait is exclusively implemented by the `TaggedOption` type.
-unsafe trait Erased<'a>: 'a {
-    /// The `TypeId` of the erased type.
-    fn tag_id(&self) -> TypeId;
-}
+unsafe trait Erased<'a>: 'a {}
 
-unsafe impl<'a, I: tags::Type<'a>> Erased<'a> for TaggedOption<'a, I> {
-    fn tag_id(&self) -> TypeId {
-        TypeId::of::<I>()
-    }
+unsafe impl<'a, I: tags::Type<'a>> Erased<'a> for TaggedOption<'a, I> {}
+
+struct Tagged<E: ?Sized> {
+    tag_id: TypeId,
+    value: E,
 }
 
-impl<'a> dyn Erased<'a> + 'a {
+impl<'a> Tagged<dyn Erased<'a> + 'a> {
     /// Returns some reference to the dynamic value if it is tagged with `I`,
     /// or `None` otherwise.
     #[inline]
@@ -978,9 +962,9 @@ impl<'a> dyn Erased<'a> + 'a {
     where
         I: tags::Type<'a>,
     {
-        if self.tag_id() == TypeId::of::<I>() {
+        if self.tag_id == TypeId::of::<I>() {
             // SAFETY: Just checked whether we're pointing to an I.
-            Some(unsafe { &*(self as *const Self).cast::<TaggedOption<'a, I>>() })
+            Some(&unsafe { &*(self as *const Self).cast::<Tagged<TaggedOption<'a, I>>>() }.value)
         } else {
             None
         }
@@ -993,9 +977,12 @@ impl<'a> dyn Erased<'a> + 'a {
     where
         I: tags::Type<'a>,
     {
-        if self.tag_id() == TypeId::of::<I>() {
-            // SAFETY: Just checked whether we're pointing to an I.
-            Some(unsafe { &mut *(self as *mut Self).cast::<TaggedOption<'a, I>>() })
+        if self.tag_id == TypeId::of::<I>() {
+            Some(
+                // SAFETY: Just checked whether we're pointing to an I.
+                &mut unsafe { &mut *(self as *mut Self).cast::<Tagged<TaggedOption<'a, I>>>() }
+                    .value,
+            )
         } else {
             None
         }
@@ -1021,8 +1008,15 @@ impl<'a> Iterator for Source<'a> {
         self.current = self.current.and_then(Error::source);
         current
     }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        if self.current.is_some() { (1, None) } else { (0, Some(0)) }
+    }
 }
 
+#[unstable(feature = "error_iter", issue = "58520")]
+impl<'a> crate::iter::FusedIterator for Source<'a> {}
+
 #[stable(feature = "error_by_ref", since = "1.51.0")]
 impl<'a, T: Error + ?Sized> Error for &'a T {
     #[allow(deprecated, deprecated_in_future)]
diff --git a/core/src/escape.rs b/core/src/escape.rs
index f6ec30b9f793a..b213cc2b9167c 100644
--- a/core/src/escape.rs
+++ b/core/src/escape.rs
@@ -60,7 +60,7 @@ const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>)
 const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 10 && N < u8::MAX as usize) };
 
-    let c = u32::from(c);
+    let c = c as u32;
 
     // OR-ing `1` ensures that for `c == 0` the code computes that
     // one digit should be printed.
diff --git a/core/src/ffi/c_str.rs b/core/src/ffi/c_str.rs
index 297f52e756bc6..563f0a324e3f1 100644
--- a/core/src/ffi/c_str.rs
+++ b/core/src/ffi/c_str.rs
@@ -263,8 +263,6 @@ impl CStr {
     /// ```
     ///
     /// ```
-    /// #![feature(const_cstr_from_ptr)]
-    ///
     /// use std::ffi::{c_char, CStr};
     ///
     /// const HELLO_PTR: *const c_char = {
@@ -280,11 +278,11 @@ impl CStr {
     #[inline] // inline is necessary for codegen to see strlen.
     #[must_use]
     #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
+    #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
     pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr {
         // SAFETY: The caller has provided a pointer that points to a valid C
         // string with a NUL terminator less than `isize::MAX` from `ptr`.
-        let len = unsafe { const_strlen(ptr) };
+        let len = unsafe { strlen(ptr) };
 
         // SAFETY: The caller has provided a valid pointer with length less than
         // `isize::MAX`, so `from_raw_parts` is safe. The content remains valid
@@ -515,7 +513,10 @@ impl CStr {
     #[inline]
     #[must_use]
     const fn as_non_null_ptr(&self) -> NonNull<c_char> {
-        NonNull::from(&self.inner).as_non_null_ptr()
+        // FIXME(effects) replace with `NonNull::from`
+        // SAFETY: a reference is never null
+        unsafe { NonNull::new_unchecked(&self.inner as *const [c_char] as *mut [c_char]) }
+            .as_non_null_ptr()
     }
 
     /// Returns the length of `self`. Like C's `strlen`, this does not include the nul terminator.
@@ -539,7 +540,7 @@ impl CStr {
     #[must_use]
     #[doc(alias("len", "strlen"))]
     #[stable(feature = "cstr_count_bytes", since = "1.79.0")]
-    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
+    #[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
     pub const fn count_bytes(&self) -> usize {
         self.inner.len() - 1
     }
@@ -739,7 +740,10 @@ impl AsRef<CStr> for CStr {
 /// The pointer must point to a valid buffer that contains a NUL terminator. The NUL must be
 /// located within `isize::MAX` from `ptr`.
 #[inline]
-const unsafe fn const_strlen(ptr: *const c_char) -> usize {
+#[unstable(feature = "cstr_internals", issue = "none")]
+#[rustc_const_stable(feature = "const_cstr_from_ptr", since = "CURRENT_RUSTC_VERSION")]
+#[rustc_allow_const_fn_unstable(const_eval_select)]
+const unsafe fn strlen(ptr: *const c_char) -> usize {
     const fn strlen_ct(s: *const c_char) -> usize {
         let mut len = 0;
 
@@ -777,8 +781,15 @@ const unsafe fn const_strlen(ptr: *const c_char) -> usize {
 pub struct Bytes<'a> {
     // since we know the string is nul-terminated, we only need one pointer
     ptr: NonNull<u8>,
-    phantom: PhantomData<&'a u8>,
+    phantom: PhantomData<&'a [c_char]>,
 }
+
+#[unstable(feature = "cstr_bytes", issue = "112115")]
+unsafe impl Send for Bytes<'_> {}
+
+#[unstable(feature = "cstr_bytes", issue = "112115")]
+unsafe impl Sync for Bytes<'_> {}
+
 impl<'a> Bytes<'a> {
     #[inline]
     fn new(s: &'a CStr) -> Self {
@@ -811,7 +822,7 @@ impl Iterator for Bytes<'_> {
             if ret == 0 {
                 None
             } else {
-                self.ptr = self.ptr.offset(1);
+                self.ptr = self.ptr.add(1);
                 Some(ret)
             }
         }
@@ -821,6 +832,12 @@ impl Iterator for Bytes<'_> {
     fn size_hint(&self) -> (usize, Option<usize>) {
         if self.is_empty() { (0, Some(0)) } else { (1, None) }
     }
+
+    #[inline]
+    fn count(self) -> usize {
+        // SAFETY: We always hold a valid pointer to a C string
+        unsafe { strlen(self.ptr.as_ptr().cast()) }
+    }
 }
 
 #[unstable(feature = "cstr_bytes", issue = "112115")]
diff --git a/core/src/ffi/mod.rs b/core/src/ffi/mod.rs
index 27dacbb23d958..88adc378477fd 100644
--- a/core/src/ffi/mod.rs
+++ b/core/src/ffi/mod.rs
@@ -10,8 +10,6 @@
 #![allow(non_camel_case_types)]
 
 use crate::fmt;
-use crate::marker::PhantomData;
-use crate::ops::{Deref, DerefMut};
 
 #[doc(no_inline)]
 #[stable(feature = "core_c_str", since = "1.64.0")]
@@ -28,6 +26,20 @@ pub use self::c_str::CStr;
 #[unstable(feature = "c_str_module", issue = "112134")]
 pub mod c_str;
 
+#[unstable(
+    feature = "c_variadic",
+    issue = "44930",
+    reason = "the `c_variadic` feature has not been properly tested on all supported platforms"
+)]
+pub use self::va_list::{VaList, VaListImpl};
+
+#[unstable(
+    feature = "c_variadic",
+    issue = "44930",
+    reason = "the `c_variadic` feature has not been properly tested on all supported platforms"
+)]
+pub mod va_list;
+
 macro_rules! type_alias {
     {
       $Docfile:tt, $Alias:ident = $Real:ty;
@@ -133,7 +145,8 @@ mod c_char_definition {
                 any(target_arch = "aarch64", target_arch = "riscv64")
             ),
             all(target_os = "nto", target_arch = "aarch64"),
-            target_os = "horizon"
+            target_os = "horizon",
+            target_os = "aix",
         ))] {
             pub type c_char = u8;
         } else {
@@ -204,403 +217,6 @@ impl fmt::Debug for c_void {
     }
 }
 
-/// Basic implementation of a `va_list`.
-// The name is WIP, using `VaListImpl` for now.
-#[cfg(any(
-    all(
-        not(target_arch = "aarch64"),
-        not(target_arch = "powerpc"),
-        not(target_arch = "s390x"),
-        not(target_arch = "x86_64")
-    ),
-    all(target_arch = "aarch64", target_vendor = "apple"),
-    target_family = "wasm",
-    target_os = "uefi",
-    windows,
-))]
-#[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-#[lang = "va_list"]
-pub struct VaListImpl<'f> {
-    ptr: *mut c_void,
-
-    // Invariant over `'f`, so each `VaListImpl<'f>` object is tied to
-    // the region of the function it's defined in
-    _marker: PhantomData<&'f mut &'f c_void>,
-}
-
-#[cfg(any(
-    all(
-        not(target_arch = "aarch64"),
-        not(target_arch = "powerpc"),
-        not(target_arch = "s390x"),
-        not(target_arch = "x86_64")
-    ),
-    all(target_arch = "aarch64", target_vendor = "apple"),
-    target_family = "wasm",
-    target_os = "uefi",
-    windows,
-))]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'f> fmt::Debug for VaListImpl<'f> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "va_list* {:p}", self.ptr)
-    }
-}
-
-/// AArch64 ABI implementation of a `va_list`. See the
-/// [AArch64 Procedure Call Standard] for more details.
-///
-/// [AArch64 Procedure Call Standard]:
-/// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
-#[cfg(all(
-    target_arch = "aarch64",
-    not(target_vendor = "apple"),
-    not(target_os = "uefi"),
-    not(windows),
-))]
-#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
-#[derive(Debug)]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-#[lang = "va_list"]
-pub struct VaListImpl<'f> {
-    stack: *mut c_void,
-    gr_top: *mut c_void,
-    vr_top: *mut c_void,
-    gr_offs: i32,
-    vr_offs: i32,
-    _marker: PhantomData<&'f mut &'f c_void>,
-}
-
-/// PowerPC ABI implementation of a `va_list`.
-#[cfg(all(target_arch = "powerpc", not(target_os = "uefi"), not(windows)))]
-#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
-#[derive(Debug)]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-#[lang = "va_list"]
-pub struct VaListImpl<'f> {
-    gpr: u8,
-    fpr: u8,
-    reserved: u16,
-    overflow_arg_area: *mut c_void,
-    reg_save_area: *mut c_void,
-    _marker: PhantomData<&'f mut &'f c_void>,
-}
-
-/// s390x ABI implementation of a `va_list`.
-#[cfg(target_arch = "s390x")]
-#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
-#[derive(Debug)]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-#[lang = "va_list"]
-pub struct VaListImpl<'f> {
-    gpr: i64,
-    fpr: i64,
-    overflow_arg_area: *mut c_void,
-    reg_save_area: *mut c_void,
-    _marker: PhantomData<&'f mut &'f c_void>,
-}
-
-/// x86_64 ABI implementation of a `va_list`.
-#[cfg(all(target_arch = "x86_64", not(target_os = "uefi"), not(windows)))]
-#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
-#[derive(Debug)]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-#[lang = "va_list"]
-pub struct VaListImpl<'f> {
-    gp_offset: i32,
-    fp_offset: i32,
-    overflow_arg_area: *mut c_void,
-    reg_save_area: *mut c_void,
-    _marker: PhantomData<&'f mut &'f c_void>,
-}
-
-/// A wrapper for a `va_list`
-#[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435
-#[derive(Debug)]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-pub struct VaList<'a, 'f: 'a> {
-    #[cfg(any(
-        all(
-            not(target_arch = "aarch64"),
-            not(target_arch = "powerpc"),
-            not(target_arch = "s390x"),
-            not(target_arch = "x86_64")
-        ),
-        all(target_arch = "aarch64", target_vendor = "apple"),
-        target_family = "wasm",
-        target_os = "uefi",
-        windows,
-    ))]
-    inner: VaListImpl<'f>,
-
-    #[cfg(all(
-        any(
-            target_arch = "aarch64",
-            target_arch = "powerpc",
-            target_arch = "s390x",
-            target_arch = "x86_64"
-        ),
-        any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
-        not(target_family = "wasm"),
-        not(target_os = "uefi"),
-        not(windows),
-    ))]
-    inner: &'a mut VaListImpl<'f>,
-
-    _marker: PhantomData<&'a mut VaListImpl<'f>>,
-}
-
-#[cfg(any(
-    all(
-        not(target_arch = "aarch64"),
-        not(target_arch = "powerpc"),
-        not(target_arch = "s390x"),
-        not(target_arch = "x86_64")
-    ),
-    all(target_arch = "aarch64", target_vendor = "apple"),
-    target_family = "wasm",
-    target_os = "uefi",
-    windows,
-))]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'f> VaListImpl<'f> {
-    /// Convert a `VaListImpl` into a `VaList` that is binary-compatible with C's `va_list`.
-    #[inline]
-    pub fn as_va_list<'a>(&'a mut self) -> VaList<'a, 'f> {
-        VaList { inner: VaListImpl { ..*self }, _marker: PhantomData }
-    }
-}
-
-#[cfg(all(
-    any(
-        target_arch = "aarch64",
-        target_arch = "powerpc",
-        target_arch = "s390x",
-        target_arch = "x86_64"
-    ),
-    any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
-    not(target_family = "wasm"),
-    not(target_os = "uefi"),
-    not(windows),
-))]
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'f> VaListImpl<'f> {
-    /// Convert a `VaListImpl` into a `VaList` that is binary-compatible with C's `va_list`.
-    #[inline]
-    pub fn as_va_list<'a>(&'a mut self) -> VaList<'a, 'f> {
-        VaList { inner: self, _marker: PhantomData }
-    }
-}
-
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'a, 'f: 'a> Deref for VaList<'a, 'f> {
-    type Target = VaListImpl<'f>;
-
-    #[inline]
-    fn deref(&self) -> &VaListImpl<'f> {
-        &self.inner
-    }
-}
-
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'a, 'f: 'a> DerefMut for VaList<'a, 'f> {
-    #[inline]
-    fn deref_mut(&mut self) -> &mut VaListImpl<'f> {
-        &mut self.inner
-    }
-}
-
-// The VaArgSafe trait needs to be used in public interfaces, however, the trait
-// itself must not be allowed to be used outside this module. Allowing users to
-// implement the trait for a new type (thereby allowing the va_arg intrinsic to
-// be used on a new type) is likely to cause undefined behavior.
-//
-// FIXME(dlrobertson): In order to use the VaArgSafe trait in a public interface
-// but also ensure it cannot be used elsewhere, the trait needs to be public
-// within a private module. Once RFC 2145 has been implemented look into
-// improving this.
-mod sealed_trait {
-    /// Trait which permits the allowed types to be used with [super::VaListImpl::arg].
-    #[unstable(
-        feature = "c_variadic",
-        reason = "the `c_variadic` feature has not been properly tested on \
-                  all supported platforms",
-        issue = "44930"
-    )]
-    pub trait VaArgSafe {}
-}
-
-macro_rules! impl_va_arg_safe {
-    ($($t:ty),+) => {
-        $(
-            #[unstable(feature = "c_variadic",
-                       reason = "the `c_variadic` feature has not been properly tested on \
-                                 all supported platforms",
-                       issue = "44930")]
-            impl sealed_trait::VaArgSafe for $t {}
-        )+
-    }
-}
-
-impl_va_arg_safe! {i8, i16, i32, i64, usize}
-impl_va_arg_safe! {u8, u16, u32, u64, isize}
-impl_va_arg_safe! {f64}
-
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<T> sealed_trait::VaArgSafe for *mut T {}
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<T> sealed_trait::VaArgSafe for *const T {}
-
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'f> VaListImpl<'f> {
-    /// Advance to the next arg.
-    #[inline]
-    pub unsafe fn arg<T: sealed_trait::VaArgSafe>(&mut self) -> T {
-        // SAFETY: the caller must uphold the safety contract for `va_arg`.
-        unsafe { va_arg(self) }
-    }
-
-    /// Copies the `va_list` at the current location.
-    pub unsafe fn with_copy<F, R>(&self, f: F) -> R
-    where
-        F: for<'copy> FnOnce(VaList<'copy, 'f>) -> R,
-    {
-        let mut ap = self.clone();
-        let ret = f(ap.as_va_list());
-        // SAFETY: the caller must uphold the safety contract for `va_end`.
-        unsafe {
-            va_end(&mut ap);
-        }
-        ret
-    }
-}
-
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'f> Clone for VaListImpl<'f> {
-    #[inline]
-    fn clone(&self) -> Self {
-        let mut dest = crate::mem::MaybeUninit::uninit();
-        // SAFETY: we write to the `MaybeUninit`, thus it is initialized and `assume_init` is legal
-        unsafe {
-            va_copy(dest.as_mut_ptr(), self);
-            dest.assume_init()
-        }
-    }
-}
-
-#[unstable(
-    feature = "c_variadic",
-    reason = "the `c_variadic` feature has not been properly tested on \
-              all supported platforms",
-    issue = "44930"
-)]
-impl<'f> Drop for VaListImpl<'f> {
-    fn drop(&mut self) {
-        // FIXME: this should call `va_end`, but there's no clean way to
-        // guarantee that `drop` always gets inlined into its caller,
-        // so the `va_end` would get directly called from the same function as
-        // the corresponding `va_copy`. `man va_end` states that C requires this,
-        // and LLVM basically follows the C semantics, so we need to make sure
-        // that `va_end` is always called from the same function as `va_copy`.
-        // For more details, see https://github.com/rust-lang/rust/pull/59625
-        // and https://llvm.org/docs/LangRef.html#llvm-va-end-intrinsic.
-        //
-        // This works for now, since `va_end` is a no-op on all current LLVM targets.
-    }
-}
-
-extern "rust-intrinsic" {
-    /// Destroy the arglist `ap` after initialization with `va_start` or
-    /// `va_copy`.
-    #[rustc_nounwind]
-    fn va_end(ap: &mut VaListImpl<'_>);
-
-    /// Copies the current location of arglist `src` to the arglist `dst`.
-    #[rustc_nounwind]
-    fn va_copy<'f>(dest: *mut VaListImpl<'f>, src: &VaListImpl<'f>);
-
-    /// Loads an argument of type `T` from the `va_list` `ap` and increment the
-    /// argument `ap` points to.
-    #[rustc_nounwind]
-    fn va_arg<T: sealed_trait::VaArgSafe>(ap: &mut VaListImpl<'_>) -> T;
-}
-
 // Link the MSVC default lib
 #[cfg(all(windows, target_env = "msvc"))]
 #[link(
diff --git a/core/src/ffi/va_list.rs b/core/src/ffi/va_list.rs
new file mode 100644
index 0000000000000..6a2e8b67d0c2a
--- /dev/null
+++ b/core/src/ffi/va_list.rs
@@ -0,0 +1,301 @@
+//! C's "variable arguments"
+//!
+//! Better known as "varargs".
+
+use crate::ffi::c_void;
+
+#[allow(unused_imports)]
+use crate::fmt;
+use crate::marker::PhantomData;
+use crate::ops::{Deref, DerefMut};
+
+/// Basic implementation of a `va_list`.
+// The name is WIP, using `VaListImpl` for now.
+#[cfg(any(
+    all(
+        not(target_arch = "aarch64"),
+        not(target_arch = "powerpc"),
+        not(target_arch = "s390x"),
+        not(target_arch = "x86_64")
+    ),
+    all(target_arch = "aarch64", target_vendor = "apple"),
+    target_family = "wasm",
+    target_os = "uefi",
+    windows,
+))]
+#[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435
+#[lang = "va_list"]
+pub struct VaListImpl<'f> {
+    ptr: *mut c_void,
+
+    // Invariant over `'f`, so each `VaListImpl<'f>` object is tied to
+    // the region of the function it's defined in
+    _marker: PhantomData<&'f mut &'f c_void>,
+}
+
+#[cfg(any(
+    all(
+        not(target_arch = "aarch64"),
+        not(target_arch = "powerpc"),
+        not(target_arch = "s390x"),
+        not(target_arch = "x86_64")
+    ),
+    all(target_arch = "aarch64", target_vendor = "apple"),
+    target_family = "wasm",
+    target_os = "uefi",
+    windows,
+))]
+impl<'f> fmt::Debug for VaListImpl<'f> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "va_list* {:p}", self.ptr)
+    }
+}
+
+/// AArch64 ABI implementation of a `va_list`. See the
+/// [AArch64 Procedure Call Standard] for more details.
+///
+/// [AArch64 Procedure Call Standard]:
+/// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+#[cfg(all(
+    target_arch = "aarch64",
+    not(target_vendor = "apple"),
+    not(target_os = "uefi"),
+    not(windows),
+))]
+#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
+#[derive(Debug)]
+#[lang = "va_list"]
+pub struct VaListImpl<'f> {
+    stack: *mut c_void,
+    gr_top: *mut c_void,
+    vr_top: *mut c_void,
+    gr_offs: i32,
+    vr_offs: i32,
+    _marker: PhantomData<&'f mut &'f c_void>,
+}
+
+/// PowerPC ABI implementation of a `va_list`.
+#[cfg(all(target_arch = "powerpc", not(target_os = "uefi"), not(windows)))]
+#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
+#[derive(Debug)]
+#[lang = "va_list"]
+pub struct VaListImpl<'f> {
+    gpr: u8,
+    fpr: u8,
+    reserved: u16,
+    overflow_arg_area: *mut c_void,
+    reg_save_area: *mut c_void,
+    _marker: PhantomData<&'f mut &'f c_void>,
+}
+
+/// s390x ABI implementation of a `va_list`.
+#[cfg(target_arch = "s390x")]
+#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
+#[derive(Debug)]
+#[lang = "va_list"]
+pub struct VaListImpl<'f> {
+    gpr: i64,
+    fpr: i64,
+    overflow_arg_area: *mut c_void,
+    reg_save_area: *mut c_void,
+    _marker: PhantomData<&'f mut &'f c_void>,
+}
+
+/// x86_64 ABI implementation of a `va_list`.
+#[cfg(all(target_arch = "x86_64", not(target_os = "uefi"), not(windows)))]
+#[cfg_attr(not(doc), repr(C))] // work around https://github.com/rust-lang/rust/issues/66401
+#[derive(Debug)]
+#[lang = "va_list"]
+pub struct VaListImpl<'f> {
+    gp_offset: i32,
+    fp_offset: i32,
+    overflow_arg_area: *mut c_void,
+    reg_save_area: *mut c_void,
+    _marker: PhantomData<&'f mut &'f c_void>,
+}
+
+/// A wrapper for a `va_list`
+#[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435
+#[derive(Debug)]
+pub struct VaList<'a, 'f: 'a> {
+    #[cfg(any(
+        all(
+            not(target_arch = "aarch64"),
+            not(target_arch = "powerpc"),
+            not(target_arch = "s390x"),
+            not(target_arch = "x86_64")
+        ),
+        all(target_arch = "aarch64", target_vendor = "apple"),
+        target_family = "wasm",
+        target_os = "uefi",
+        windows,
+    ))]
+    inner: VaListImpl<'f>,
+
+    #[cfg(all(
+        any(
+            target_arch = "aarch64",
+            target_arch = "powerpc",
+            target_arch = "s390x",
+            target_arch = "x86_64"
+        ),
+        any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
+        not(target_family = "wasm"),
+        not(target_os = "uefi"),
+        not(windows),
+    ))]
+    inner: &'a mut VaListImpl<'f>,
+
+    _marker: PhantomData<&'a mut VaListImpl<'f>>,
+}
+
+#[cfg(any(
+    all(
+        not(target_arch = "aarch64"),
+        not(target_arch = "powerpc"),
+        not(target_arch = "s390x"),
+        not(target_arch = "x86_64")
+    ),
+    all(target_arch = "aarch64", target_vendor = "apple"),
+    target_family = "wasm",
+    target_os = "uefi",
+    windows,
+))]
+impl<'f> VaListImpl<'f> {
+    /// Convert a `VaListImpl` into a `VaList` that is binary-compatible with C's `va_list`.
+    #[inline]
+    pub fn as_va_list<'a>(&'a mut self) -> VaList<'a, 'f> {
+        VaList { inner: VaListImpl { ..*self }, _marker: PhantomData }
+    }
+}
+
+#[cfg(all(
+    any(
+        target_arch = "aarch64",
+        target_arch = "powerpc",
+        target_arch = "s390x",
+        target_arch = "x86_64"
+    ),
+    any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
+    not(target_family = "wasm"),
+    not(target_os = "uefi"),
+    not(windows),
+))]
+impl<'f> VaListImpl<'f> {
+    /// Convert a `VaListImpl` into a `VaList` that is binary-compatible with C's `va_list`.
+    #[inline]
+    pub fn as_va_list<'a>(&'a mut self) -> VaList<'a, 'f> {
+        VaList { inner: self, _marker: PhantomData }
+    }
+}
+
+impl<'a, 'f: 'a> Deref for VaList<'a, 'f> {
+    type Target = VaListImpl<'f>;
+
+    #[inline]
+    fn deref(&self) -> &VaListImpl<'f> {
+        &self.inner
+    }
+}
+
+impl<'a, 'f: 'a> DerefMut for VaList<'a, 'f> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut VaListImpl<'f> {
+        &mut self.inner
+    }
+}
+
+// The VaArgSafe trait needs to be used in public interfaces, however, the trait
+// itself must not be allowed to be used outside this module. Allowing users to
+// implement the trait for a new type (thereby allowing the va_arg intrinsic to
+// be used on a new type) is likely to cause undefined behavior.
+//
+// FIXME(dlrobertson): In order to use the VaArgSafe trait in a public interface
+// but also ensure it cannot be used elsewhere, the trait needs to be public
+// within a private module. Once RFC 2145 has been implemented look into
+// improving this.
+mod sealed_trait {
+    /// Trait which permits the allowed types to be used with [super::VaListImpl::arg].
+    pub unsafe trait VaArgSafe {}
+}
+
+macro_rules! impl_va_arg_safe {
+    ($($t:ty),+) => {
+        $(
+            unsafe impl sealed_trait::VaArgSafe for $t {}
+        )+
+    }
+}
+
+impl_va_arg_safe! {i8, i16, i32, i64, usize}
+impl_va_arg_safe! {u8, u16, u32, u64, isize}
+impl_va_arg_safe! {f64}
+
+unsafe impl<T> sealed_trait::VaArgSafe for *mut T {}
+unsafe impl<T> sealed_trait::VaArgSafe for *const T {}
+
+impl<'f> VaListImpl<'f> {
+    /// Advance to the next arg.
+    #[inline]
+    pub unsafe fn arg<T: sealed_trait::VaArgSafe>(&mut self) -> T {
+        // SAFETY: the caller must uphold the safety contract for `va_arg`.
+        unsafe { va_arg(self) }
+    }
+
+    /// Copies the `va_list` at the current location.
+    pub unsafe fn with_copy<F, R>(&self, f: F) -> R
+    where
+        F: for<'copy> FnOnce(VaList<'copy, 'f>) -> R,
+    {
+        let mut ap = self.clone();
+        let ret = f(ap.as_va_list());
+        // SAFETY: the caller must uphold the safety contract for `va_end`.
+        unsafe {
+            va_end(&mut ap);
+        }
+        ret
+    }
+}
+
+impl<'f> Clone for VaListImpl<'f> {
+    #[inline]
+    fn clone(&self) -> Self {
+        let mut dest = crate::mem::MaybeUninit::uninit();
+        // SAFETY: we write to the `MaybeUninit`, thus it is initialized and `assume_init` is legal
+        unsafe {
+            va_copy(dest.as_mut_ptr(), self);
+            dest.assume_init()
+        }
+    }
+}
+
+impl<'f> Drop for VaListImpl<'f> {
+    fn drop(&mut self) {
+        // FIXME: this should call `va_end`, but there's no clean way to
+        // guarantee that `drop` always gets inlined into its caller,
+        // so the `va_end` would get directly called from the same function as
+        // the corresponding `va_copy`. `man va_end` states that C requires this,
+        // and LLVM basically follows the C semantics, so we need to make sure
+        // that `va_end` is always called from the same function as `va_copy`.
+        // For more details, see https://github.com/rust-lang/rust/pull/59625
+        // and https://llvm.org/docs/LangRef.html#llvm-va-end-intrinsic.
+        //
+        // This works for now, since `va_end` is a no-op on all current LLVM targets.
+    }
+}
+
+extern "rust-intrinsic" {
+    /// Destroy the arglist `ap` after initialization with `va_start` or
+    /// `va_copy`.
+    #[rustc_nounwind]
+    fn va_end(ap: &mut VaListImpl<'_>);
+
+    /// Copies the current location of arglist `src` to the arglist `dst`.
+    #[rustc_nounwind]
+    fn va_copy<'f>(dest: *mut VaListImpl<'f>, src: &VaListImpl<'f>);
+
+    /// Loads an argument of type `T` from the `va_list` `ap` and increment the
+    /// argument `ap` points to.
+    #[rustc_nounwind]
+    fn va_arg<T: sealed_trait::VaArgSafe>(ap: &mut VaListImpl<'_>) -> T;
+}
diff --git a/core/src/fmt/float.rs b/core/src/fmt/float.rs
index 7f23d3c09567c..80c45fce2f0a0 100644
--- a/core/src/fmt/float.rs
+++ b/core/src/fmt/float.rs
@@ -35,8 +35,8 @@ fn float_to_decimal_common_exact<T>(
 where
     T: flt2dec::DecodableFloat,
 {
-    let mut buf: [MaybeUninit<u8>; 1024] = MaybeUninit::uninit_array(); // enough for f32 and f64
-    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 4] = MaybeUninit::uninit_array();
+    let mut buf: [MaybeUninit<u8>; 1024] = [MaybeUninit::uninit(); 1024]; // enough for f32 and f64
+    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 4] = [MaybeUninit::uninit(); 4];
     let formatted = flt2dec::to_exact_fixed_str(
         flt2dec::strategy::grisu::format_exact,
         *num,
@@ -62,8 +62,9 @@ where
     T: flt2dec::DecodableFloat,
 {
     // enough for f32 and f64
-    let mut buf: [MaybeUninit<u8>; flt2dec::MAX_SIG_DIGITS] = MaybeUninit::uninit_array();
-    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 4] = MaybeUninit::uninit_array();
+    let mut buf: [MaybeUninit<u8>; flt2dec::MAX_SIG_DIGITS] =
+        [MaybeUninit::uninit(); flt2dec::MAX_SIG_DIGITS];
+    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 4] = [MaybeUninit::uninit(); 4];
     let formatted = flt2dec::to_shortest_str(
         flt2dec::strategy::grisu::format_shortest,
         *num,
@@ -107,8 +108,8 @@ fn float_to_exponential_common_exact<T>(
 where
     T: flt2dec::DecodableFloat,
 {
-    let mut buf: [MaybeUninit<u8>; 1024] = MaybeUninit::uninit_array(); // enough for f32 and f64
-    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 6] = MaybeUninit::uninit_array();
+    let mut buf: [MaybeUninit<u8>; 1024] = [MaybeUninit::uninit(); 1024]; // enough for f32 and f64
+    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 6] = [MaybeUninit::uninit(); 6];
     let formatted = flt2dec::to_exact_exp_str(
         flt2dec::strategy::grisu::format_exact,
         *num,
@@ -135,8 +136,9 @@ where
     T: flt2dec::DecodableFloat,
 {
     // enough for f32 and f64
-    let mut buf: [MaybeUninit<u8>; flt2dec::MAX_SIG_DIGITS] = MaybeUninit::uninit_array();
-    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 6] = MaybeUninit::uninit_array();
+    let mut buf: [MaybeUninit<u8>; flt2dec::MAX_SIG_DIGITS] =
+        [MaybeUninit::uninit(); flt2dec::MAX_SIG_DIGITS];
+    let mut parts: [MaybeUninit<numfmt::Part<'_>>; 6] = [MaybeUninit::uninit(); 6];
     let formatted = flt2dec::to_shortest_exp_str(
         flt2dec::strategy::grisu::format_shortest,
         *num,
diff --git a/core/src/fmt/mod.rs b/core/src/fmt/mod.rs
index 1324fb6e056be..25ab5b2db9641 100644
--- a/core/src/fmt/mod.rs
+++ b/core/src/fmt/mod.rs
@@ -338,23 +338,19 @@ pub struct Arguments<'a> {
 impl<'a> Arguments<'a> {
     #[inline]
     #[rustc_const_unstable(feature = "const_fmt_arguments_new", issue = "none")]
-    pub const fn new_const(pieces: &'a [&'static str]) -> Self {
-        if pieces.len() > 1 {
-            // Since panic!() expands to panic_fmt(format_args!()), using panic! here is both a
-            // bit silly and also significantly increases the amount of MIR generated by panics.
-            crate::panicking::panic_nounwind("invalid args");
-        }
+    pub const fn new_const<const N: usize>(pieces: &'a [&'static str; N]) -> Self {
+        const { assert!(N <= 1) };
         Arguments { pieces, fmt: None, args: &[] }
     }
 
     /// When using the format_args!() macro, this function is used to generate the
     /// Arguments structure.
     #[inline]
-    pub fn new_v1(pieces: &'a [&'static str], args: &'a [rt::Argument<'a>]) -> Arguments<'a> {
-        if pieces.len() < args.len() || pieces.len() > args.len() + 1 {
-            // See Arguments::new_const for why we don't use panic!.
-            crate::panicking::panic_nounwind("invalid args");
-        }
+    pub fn new_v1<const P: usize, const A: usize>(
+        pieces: &'a [&'static str; P],
+        args: &'a [rt::Argument<'a>; A],
+    ) -> Arguments<'a> {
+        const { assert!(P >= A && P <= A + 1, "invalid args") }
         Arguments { pieces, fmt: None, args }
     }
 
@@ -463,6 +459,12 @@ impl<'a> Arguments<'a> {
     }
 }
 
+// Manually implementing these results in better error messages.
+#[stable(feature = "rust1", since = "1.0.0")]
+impl !Send for Arguments<'_> {}
+#[stable(feature = "rust1", since = "1.0.0")]
+impl !Sync for Arguments<'_> {}
+
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Debug for Arguments<'_> {
     fn fmt(&self, fmt: &mut Formatter<'_>) -> Result {
@@ -515,7 +517,10 @@ impl Display for Arguments<'_> {
 ///
 /// let origin = Point { x: 0, y: 0 };
 ///
-/// assert_eq!(format!("The origin is: {origin:?}"), "The origin is: Point { x: 0, y: 0 }");
+/// assert_eq!(
+///     format!("The origin is: {origin:?}"),
+///     "The origin is: Point { x: 0, y: 0 }",
+/// );
 /// ```
 ///
 /// Manually implementing:
@@ -539,7 +544,10 @@ impl Display for Arguments<'_> {
 ///
 /// let origin = Point { x: 0, y: 0 };
 ///
-/// assert_eq!(format!("The origin is: {origin:?}"), "The origin is: Point { x: 0, y: 0 }");
+/// assert_eq!(
+///     format!("The origin is: {origin:?}"),
+///     "The origin is: Point { x: 0, y: 0 }",
+/// );
 /// ```
 ///
 /// There are a number of helper methods on the [`Formatter`] struct to help you with manual
@@ -580,11 +588,11 @@ impl Display for Arguments<'_> {
 ///
 /// let origin = Point { x: 0, y: 0 };
 ///
-/// assert_eq!(format!("The origin is: {origin:#?}"),
-/// "The origin is: Point {
+/// let expected = "The origin is: Point {
 ///     x: 0,
 ///     y: 0,
-/// }");
+/// }";
+/// assert_eq!(format!("The origin is: {origin:#?}"), expected);
 /// ```
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -736,8 +744,10 @@ pub trait Display {
     ///     }
     /// }
     ///
-    /// assert_eq!("(1.987, 2.983)",
-    ///            format!("{}", Position { longitude: 1.987, latitude: 2.983, }));
+    /// assert_eq!(
+    ///     "(1.987, 2.983)",
+    ///     format!("{}", Position { longitude: 1.987, latitude: 2.983, }),
+    /// );
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
     fn fmt(&self, f: &mut Formatter<'_>) -> Result;
@@ -2482,8 +2492,7 @@ impl Display for char {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: ?Sized> Pointer for *const T {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        // Cast is needed here because `.expose_provenance()` requires `T: Sized`.
-        pointer_fmt_inner((*self as *const ()).expose_provenance(), f)
+        pointer_fmt_inner(self.expose_provenance(), f)
     }
 }
 
diff --git a/core/src/fmt/num.rs b/core/src/fmt/num.rs
index ab2158394bf1e..3a5a5af8bf5d3 100644
--- a/core/src/fmt/num.rs
+++ b/core/src/fmt/num.rs
@@ -212,6 +212,7 @@ static DEC_DIGITS_LUT: &[u8; 200] = b"0001020304050607080910111213141516171819\
 
 macro_rules! impl_Display {
     ($($t:ident),* as $u:ident via $conv_fn:ident named $name:ident) => {
+        #[cfg(not(feature = "optimize_for_size"))]
         fn $name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             // 2^128 is about 3*10^38, so 39 gives an extra byte of space
             let mut buf = [MaybeUninit::<u8>::uninit(); 39];
@@ -277,6 +278,38 @@ macro_rules! impl_Display {
             f.pad_integral(is_nonnegative, "", buf_slice)
         }
 
+        #[cfg(feature = "optimize_for_size")]
+        fn $name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            // 2^128 is about 3*10^38, so 39 gives an extra byte of space
+            let mut buf = [MaybeUninit::<u8>::uninit(); 39];
+            let mut curr = buf.len();
+            let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
+
+            // SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning
+            // `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at
+            // each step this is kept the same as `n` is divided. Since `n` is always
+            // non-negative, this means that `curr > 0` so `buf_ptr[curr..curr + 1]`
+            // is safe to access.
+            unsafe {
+                loop {
+                    curr -= 1;
+                    buf_ptr.add(curr).write((n % 10) as u8 + b'0');
+                    n /= 10;
+
+                    if n == 0 {
+                        break;
+                    }
+                }
+            }
+
+            // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid UTF-8
+            let buf_slice = unsafe {
+                str::from_utf8_unchecked(
+                    slice::from_raw_parts(buf_ptr.add(curr), buf.len() - curr))
+            };
+            f.pad_integral(is_nonnegative, "", buf_slice)
+        }
+
         $(#[stable(feature = "rust1", since = "1.0.0")]
         impl fmt::Display for $t {
             #[allow(unused_comparisons)]
diff --git a/core/src/fmt/rt.rs b/core/src/fmt/rt.rs
index 92626feabf3d7..65a4d537cc74d 100644
--- a/core/src/fmt/rt.rs
+++ b/core/src/fmt/rt.rs
@@ -5,6 +5,7 @@
 
 use super::*;
 use crate::hint::unreachable_unchecked;
+use crate::ptr::NonNull;
 
 #[lang = "format_placeholder"]
 #[derive(Copy, Clone)]
@@ -66,7 +67,13 @@ pub(super) enum Flag {
 
 #[derive(Copy, Clone)]
 enum ArgumentType<'a> {
-    Placeholder { value: &'a Opaque, formatter: fn(&Opaque, &mut Formatter<'_>) -> Result },
+    Placeholder {
+        // INVARIANT: `formatter` has type `fn(&T, _) -> _` for some `T`, and `value`
+        // was derived from a `&'a T`.
+        value: NonNull<()>,
+        formatter: unsafe fn(NonNull<()>, &mut Formatter<'_>) -> Result,
+        _lifetime: PhantomData<&'a ()>,
+    },
     Count(usize),
 }
 
@@ -90,21 +97,15 @@ pub struct Argument<'a> {
 impl<'a> Argument<'a> {
     #[inline(always)]
     fn new<'b, T>(x: &'b T, f: fn(&T, &mut Formatter<'_>) -> Result) -> Argument<'b> {
-        // SAFETY: `mem::transmute(x)` is safe because
-        //     1. `&'b T` keeps the lifetime it originated with `'b`
-        //              (so as to not have an unbounded lifetime)
-        //     2. `&'b T` and `&'b Opaque` have the same memory layout
-        //              (when `T` is `Sized`, as it is here)
-        // `mem::transmute(f)` is safe since `fn(&T, &mut Formatter<'_>) -> Result`
-        // and `fn(&Opaque, &mut Formatter<'_>) -> Result` have the same ABI
-        // (as long as `T` is `Sized`)
-        unsafe {
-            Argument {
-                ty: ArgumentType::Placeholder {
-                    formatter: mem::transmute(f),
-                    value: mem::transmute(x),
-                },
-            }
+        Argument {
+            // INVARIANT: this creates an `ArgumentType<'b>` from a `&'b T` and
+            // a `fn(&T, ...)`, so the invariant is maintained.
+            ty: ArgumentType::Placeholder {
+                value: NonNull::from(x).cast(),
+                // SAFETY: function pointers always have the same layout.
+                formatter: unsafe { mem::transmute(f) },
+                _lifetime: PhantomData,
+            },
         }
     }
 
@@ -162,7 +163,14 @@ impl<'a> Argument<'a> {
     #[inline(always)]
     pub(super) unsafe fn fmt(&self, f: &mut Formatter<'_>) -> Result {
         match self.ty {
-            ArgumentType::Placeholder { formatter, value } => formatter(value, f),
+            // SAFETY:
+            // Because of the invariant that if `formatter` had the type
+            // `fn(&T, _) -> _` then `value` has type `&'b T` where `'b` is
+            // the lifetime of the `ArgumentType`, and because references
+            // and `NonNull` are ABI-compatible, this is completely equivalent
+            // to calling the original function passed to `new` with the
+            // original reference, which is sound.
+            ArgumentType::Placeholder { formatter, value, .. } => unsafe { formatter(value, f) },
             // SAFETY: the caller promised this.
             ArgumentType::Count(_) => unsafe { unreachable_unchecked() },
         }
@@ -208,7 +216,3 @@ impl UnsafeArg {
         Self { _private: () }
     }
 }
-
-extern "C" {
-    type Opaque;
-}
diff --git a/core/src/future/async_drop.rs b/core/src/future/async_drop.rs
index 0eb8d7bb32899..63193bbfb35e8 100644
--- a/core/src/future/async_drop.rs
+++ b/core/src/future/async_drop.rs
@@ -1,4 +1,4 @@
-#![unstable(feature = "async_drop", issue = "none")]
+#![unstable(feature = "async_drop", issue = "126482")]
 
 use crate::fmt;
 use crate::future::{Future, IntoFuture};
@@ -10,27 +10,27 @@ use crate::task::{ready, Context, Poll};
 
 /// Asynchronously drops a value by running `AsyncDrop::async_drop`
 /// on a value and its fields recursively.
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 pub fn async_drop<T>(value: T) -> AsyncDropOwning<T> {
     AsyncDropOwning { value: MaybeUninit::new(value), dtor: None, _pinned: PhantomPinned }
 }
 
 /// A future returned by the [`async_drop`].
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 pub struct AsyncDropOwning<T> {
     value: MaybeUninit<T>,
     dtor: Option<AsyncDropInPlace<T>>,
     _pinned: PhantomPinned,
 }
 
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 impl<T> fmt::Debug for AsyncDropOwning<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("AsyncDropOwning").finish_non_exhaustive()
     }
 }
 
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 impl<T> Future for AsyncDropOwning<T> {
     type Output = ();
 
@@ -86,24 +86,24 @@ unsafe fn async_drop_in_place_raw<T: ?Sized>(
 ///   returned future stores the `to_drop` pointer and user is required
 ///   to guarantee that dropped value doesn't move.
 ///
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 pub unsafe fn async_drop_in_place<T: ?Sized>(to_drop: *mut T) -> AsyncDropInPlace<T> {
     // SAFETY: `async_drop_in_place_raw` has the same safety requirements
     unsafe { AsyncDropInPlace(async_drop_in_place_raw(to_drop)) }
 }
 
 /// A future returned by the [`async_drop_in_place`].
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 pub struct AsyncDropInPlace<T: ?Sized>(<T as AsyncDestruct>::AsyncDestructor);
 
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 impl<T: ?Sized> fmt::Debug for AsyncDropInPlace<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("AsyncDropInPlace").finish_non_exhaustive()
     }
 }
 
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 impl<T: ?Sized> Future for AsyncDropInPlace<T> {
     type Output = ();
 
@@ -117,18 +117,18 @@ impl<T: ?Sized> Future for AsyncDropInPlace<T> {
 // FIXME(zetanumbers): Add same restrictions on AsyncDrop impls as
 //   with Drop impls
 /// Custom code within the asynchronous destructor.
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 #[lang = "async_drop"]
 pub trait AsyncDrop {
     /// A future returned by the [`AsyncDrop::async_drop`] to be part
     /// of the async destructor.
-    #[unstable(feature = "async_drop", issue = "none")]
+    #[unstable(feature = "async_drop", issue = "126482")]
     type Dropper<'a>: Future<Output = ()>
     where
         Self: 'a;
 
     /// Constructs the asynchronous destructor for this type.
-    #[unstable(feature = "async_drop", issue = "none")]
+    #[unstable(feature = "async_drop", issue = "126482")]
     fn async_drop(self: Pin<&mut Self>) -> Self::Dropper<'_>;
 }
 
@@ -161,6 +161,11 @@ async unsafe fn surface_drop_in_place<T: Drop + ?Sized>(ptr: *mut T) {
 /// wrapped future completes by returning `Poll::Ready(())` on poll. This
 /// is useful for constructing async destructors to guarantee this
 /// "fuse" property
+//
+// FIXME: Consider optimizing combinators to not have to use fuse in majority
+// of cases, perhaps by adding `#[(rustc_)idempotent(_future)]` attribute for
+// async functions and blocks with the unit return type. However current layout
+// optimizations currently encode `None` case into the async block's discriminant.
 struct Fuse<T> {
     inner: Option<T>,
 }
@@ -251,6 +256,13 @@ async unsafe fn either<O: IntoFuture<Output = ()>, M: IntoFuture<Output = ()>, T
     }
 }
 
+#[lang = "async_drop_deferred_drop_in_place"]
+async unsafe fn deferred_drop_in_place<T>(to_drop: *mut T) {
+    // SAFETY: same safety requirements as with drop_in_place (implied by
+    // function's name)
+    unsafe { crate::ptr::drop_in_place(to_drop) }
+}
+
 /// Used for noop async destructors. We don't use [`core::future::Ready`]
 /// because it panics after its second poll, which could be potentially
 /// bad if that would happen during the cleanup.
diff --git a/core/src/future/future.rs b/core/src/future/future.rs
index f965afc8a5937..c80cfdcebf70d 100644
--- a/core/src/future/future.rs
+++ b/core/src/future/future.rs
@@ -35,7 +35,7 @@ use crate::task::{Context, Poll};
 pub trait Future {
     /// The type of value produced on completion.
     #[stable(feature = "futures_api", since = "1.36.0")]
-    #[rustc_diagnostic_item = "FutureOutput"]
+    #[lang = "future_output"]
     type Output;
 
     /// Attempt to resolve the future to a final value, registering
diff --git a/core/src/future/mod.rs b/core/src/future/mod.rs
index 873cccc7e96fd..3a1451abfa40b 100644
--- a/core/src/future/mod.rs
+++ b/core/src/future/mod.rs
@@ -37,7 +37,7 @@ pub use ready::{ready, Ready};
 #[stable(feature = "future_poll_fn", since = "1.64.0")]
 pub use poll_fn::{poll_fn, PollFn};
 
-#[unstable(feature = "async_drop", issue = "none")]
+#[unstable(feature = "async_drop", issue = "126482")]
 pub use async_drop::{async_drop, async_drop_in_place, AsyncDrop, AsyncDropInPlace};
 
 /// This type is needed because:
diff --git a/core/src/hash/mod.rs b/core/src/hash/mod.rs
index 1c93a7b28fd35..da734466263ab 100644
--- a/core/src/hash/mod.rs
+++ b/core/src/hash/mod.rs
@@ -334,6 +334,7 @@ pub trait Hasher {
     ///
     /// [`write`]: Hasher::write
     #[stable(feature = "rust1", since = "1.0.0")]
+    #[must_use]
     fn finish(&self) -> u64;
 
     /// Writes some data into this `Hasher`.
diff --git a/core/src/hash/sip.rs b/core/src/hash/sip.rs
index 78a232faaf88c..0d1ac64aa56cf 100644
--- a/core/src/hash/sip.rs
+++ b/core/src/hash/sip.rs
@@ -76,18 +76,19 @@ macro_rules! compress {
     ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }};
     ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
         $v0 = $v0.wrapping_add($v1);
+        $v2 = $v2.wrapping_add($v3);
         $v1 = $v1.rotate_left(13);
         $v1 ^= $v0;
-        $v0 = $v0.rotate_left(32);
-        $v2 = $v2.wrapping_add($v3);
         $v3 = $v3.rotate_left(16);
         $v3 ^= $v2;
-        $v0 = $v0.wrapping_add($v3);
-        $v3 = $v3.rotate_left(21);
-        $v3 ^= $v0;
+        $v0 = $v0.rotate_left(32);
+
         $v2 = $v2.wrapping_add($v1);
+        $v0 = $v0.wrapping_add($v3);
         $v1 = $v1.rotate_left(17);
         $v1 ^= $v2;
+        $v3 = $v3.rotate_left(21);
+        $v3 ^= $v0;
         $v2 = $v2.rotate_left(32);
     }};
 }
diff --git a/core/src/hint.rs b/core/src/hint.rs
index 6e2d88c6b8337..b3e36e6fbc4ac 100644
--- a/core/src/hint.rs
+++ b/core/src/hint.rs
@@ -111,41 +111,92 @@ pub const unsafe fn unreachable_unchecked() -> ! {
 
 /// Makes a *soundness* promise to the compiler that `cond` holds.
 ///
-/// This may allow the optimizer to simplify things,
-/// but it might also make the generated code slower.
-/// Either way, calling it will most likely make compilation take longer.
+/// This may allow the optimizer to simplify things, but it might also make the generated code
+/// slower. Either way, calling it will most likely make compilation take longer.
 ///
-/// This is a situational tool for micro-optimization, and is allowed to do nothing.
-/// Any use should come with a repeatable benchmark to show the value
-/// and allow removing it later should the optimizer get smarter and no longer need it.
+/// You may know this from other places as
+/// [`llvm.assume`](https://llvm.org/docs/LangRef.html#llvm-assume-intrinsic) or, in C,
+/// [`__builtin_assume`](https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume).
 ///
-/// The more complicated the condition the less likely this is to be fruitful.
-/// For example, `assert_unchecked(foo.is_sorted())` is a complex enough value
-/// that the compiler is unlikely to be able to take advantage of it.
+/// This promotes a correctness requirement to a soundness requirement. Don't do that without
+/// very good reason.
 ///
-/// There's also no need to `assert_unchecked` basic properties of things.  For
-/// example, the compiler already knows the range of `count_ones`, so there's no
-/// benefit to `let n = u32::count_ones(x); assert_unchecked(n <= u32::BITS);`.
+/// # Usage
 ///
-/// If ever you're tempted to write `assert_unchecked(false)`, then you're
-/// actually looking for [`unreachable_unchecked()`].
+/// This is a situational tool for micro-optimization, and is allowed to do nothing. Any use
+/// should come with a repeatable benchmark to show the value, with the expectation to drop it
+/// later should the optimizer get smarter and no longer need it.
 ///
-/// You may know this from other places
-/// as [`llvm.assume`](https://llvm.org/docs/LangRef.html#llvm-assume-intrinsic)
-/// or [`__builtin_assume`](https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume).
+/// The more complicated the condition, the less likely this is to be useful. For example,
+/// `assert_unchecked(foo.is_sorted())` is a complex enough value that the compiler is unlikely
+/// to be able to take advantage of it.
 ///
-/// This promotes a correctness requirement to a soundness requirement.
-/// Don't do that without very good reason.
+/// There's also no need to `assert_unchecked` basic properties of things.  For example, the
+/// compiler already knows the range of `count_ones`, so there is no benefit to
+/// `let n = u32::count_ones(x); assert_unchecked(n <= u32::BITS);`.
+///
+/// `assert_unchecked` is logically equivalent to `if !cond { unreachable_unchecked(); }`. If
+/// ever you are tempted to write `assert_unchecked(false)`, you should instead use
+/// [`unreachable_unchecked()`] directly.
 ///
 /// # Safety
 ///
-/// `cond` must be `true`.  It's immediate UB to call this with `false`.
+/// `cond` must be `true`. It is immediate UB to call this with `false`.
+///
+/// # Example
+///
+/// ```
+/// use core::hint;
 ///
+/// /// # Safety
+/// ///
+/// /// `p` must be nonnull and valid
+/// pub unsafe fn next_value(p: *const i32) -> i32 {
+///     // SAFETY: caller invariants guarantee that `p` is not null
+///     unsafe { hint::assert_unchecked(!p.is_null()) }
+///
+///     if p.is_null() {
+///         return -1;
+///     } else {
+///         // SAFETY: caller invariants guarantee that `p` is valid
+///         unsafe { *p + 1 }
+///     }
+/// }
+/// ```
+///
+/// Without the `assert_unchecked`, the above function produces the following with optimizations
+/// enabled:
+///
+/// ```asm
+/// next_value:
+///         test    rdi, rdi
+///         je      .LBB0_1
+///         mov     eax, dword ptr [rdi]
+///         inc     eax
+///         ret
+/// .LBB0_1:
+///         mov     eax, -1
+///         ret
+/// ```
+///
+/// Adding the assertion allows the optimizer to remove the extra check:
+///
+/// ```asm
+/// next_value:
+///         mov     eax, dword ptr [rdi]
+///         inc     eax
+///         ret
+/// ```
+///
+/// This example is quite unlike anything that would be used in the real world: it is redundant
+/// to put an assertion right next to code that checks the same thing, and dereferencing a
+/// pointer already has the builtin assumption that it is nonnull. However, it illustrates the
+/// kind of changes the optimizer can make even when the behavior is less obviously related.
+#[track_caller]
 #[inline(always)]
 #[doc(alias = "assume")]
-#[track_caller]
-#[unstable(feature = "hint_assert_unchecked", issue = "119131")]
-#[rustc_const_unstable(feature = "const_hint_assert_unchecked", issue = "119131")]
+#[stable(feature = "hint_assert_unchecked", since = "CURRENT_RUSTC_VERSION")]
+#[rustc_const_stable(feature = "hint_assert_unchecked", since = "CURRENT_RUSTC_VERSION")]
 pub const unsafe fn assert_unchecked(cond: bool) {
     // SAFETY: The caller promised `cond` is true.
     unsafe {
@@ -263,7 +314,7 @@ pub fn spin_loop() {
 /// extent to which it can block optimisations may vary depending upon the platform and code-gen
 /// backend used. Programs cannot rely on `black_box` for *correctness*, beyond it behaving as the
 /// identity function. As such, it **must not be relied upon to control critical program behavior.**
-/// This _immediately_ precludes any direct use of this function for cryptographic or security
+/// This also means that this function does not offer any guarantees for cryptographic or security
 /// purposes.
 ///
 /// [`std::convert::identity`]: crate::convert::identity
diff --git a/core/src/internal_macros.rs b/core/src/internal_macros.rs
index d3a4d6aff2d8b..bf53b2245ac59 100644
--- a/core/src/internal_macros.rs
+++ b/core/src/internal_macros.rs
@@ -80,6 +80,47 @@ macro_rules! forward_ref_op_assign {
     }
 }
 
+/// Create a zero-size type similar to a closure type, but named.
+macro_rules! impl_fn_for_zst {
+    ($(
+        $( #[$attr: meta] )*
+        struct $Name: ident impl$( <$( $lifetime : lifetime ),+> )? Fn =
+            |$( $arg: ident: $ArgTy: ty ),*| -> $ReturnTy: ty
+            $body: block;
+    )+) => {
+        $(
+            $( #[$attr] )*
+            struct $Name;
+
+            impl $( <$( $lifetime ),+> )? Fn<($( $ArgTy, )*)> for $Name {
+                #[inline]
+                extern "rust-call" fn call(&self, ($( $arg, )*): ($( $ArgTy, )*)) -> $ReturnTy {
+                    $body
+                }
+            }
+
+            impl $( <$( $lifetime ),+> )? FnMut<($( $ArgTy, )*)> for $Name {
+                #[inline]
+                extern "rust-call" fn call_mut(
+                    &mut self,
+                    ($( $arg, )*): ($( $ArgTy, )*)
+                ) -> $ReturnTy {
+                    Fn::call(&*self, ($( $arg, )*))
+                }
+            }
+
+            impl $( <$( $lifetime ),+> )? FnOnce<($( $ArgTy, )*)> for $Name {
+                type Output = $ReturnTy;
+
+                #[inline]
+                extern "rust-call" fn call_once(self, ($( $arg, )*): ($( $ArgTy, )*)) -> $ReturnTy {
+                    Fn::call(&self, ($( $arg, )*))
+                }
+            }
+        )+
+    }
+}
+
 /// A macro for defining `#[cfg]` if-else statements.
 ///
 /// `cfg_if` is similar to the `if/elif` C preprocessor macro by allowing definition of a cascade
diff --git a/core/src/intrinsics.rs b/core/src/intrinsics.rs
index 5a2a4c5ae6ebe..720da0feecee6 100644
--- a/core/src/intrinsics.rs
+++ b/core/src/intrinsics.rs
@@ -65,7 +65,6 @@
 
 use crate::marker::DiscriminantKind;
 use crate::marker::Tuple;
-use crate::mem::align_of;
 use crate::ptr;
 use crate::ub_checks;
 
@@ -960,7 +959,7 @@ extern "rust-intrinsic" {
 /// not be used if the invariant can be discovered by the optimizer on its
 /// own, or if it does not enable any significant optimizations.
 ///
-/// This intrinsic does not have a stable counterpart.
+/// The stabilized version of this intrinsic is [`core::hint::assert_unchecked`].
 #[rustc_const_stable(feature = "const_assume", since = "1.77.0")]
 #[rustc_nounwind]
 #[unstable(feature = "core_intrinsics", issue = "none")]
@@ -987,7 +986,7 @@ pub const unsafe fn assume(b: bool) {
 #[unstable(feature = "core_intrinsics", issue = "none")]
 #[rustc_intrinsic]
 #[rustc_nounwind]
-#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
+#[miri::intrinsic_fallback_is_spec]
 pub const fn likely(b: bool) -> bool {
     b
 }
@@ -1007,7 +1006,7 @@ pub const fn likely(b: bool) -> bool {
 #[unstable(feature = "core_intrinsics", issue = "none")]
 #[rustc_intrinsic]
 #[rustc_nounwind]
-#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
+#[miri::intrinsic_fallback_is_spec]
 pub const fn unlikely(b: bool) -> bool {
     b
 }
@@ -2483,7 +2482,7 @@ extern "rust-intrinsic" {
 #[rustc_nounwind]
 #[rustc_do_not_const_check]
 #[inline]
-#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
+#[miri::intrinsic_fallback_is_spec]
 pub const fn ptr_guaranteed_cmp<T>(ptr: *const T, other: *const T) -> u8 {
     (ptr == other) as u8
 }
@@ -2580,7 +2579,7 @@ extern "rust-intrinsic" {
 ///     fn runtime() -> i32 { 1 }
 ///     const fn compiletime() -> i32 { 2 }
 ///
-//      // ⚠ This code violates the required equivalence of `compiletime`
+///     // ⚠ This code violates the required equivalence of `compiletime`
 ///     // and `runtime`.
 ///     const_eval_select((), compiletime, runtime)
 /// }
@@ -2748,7 +2747,7 @@ pub const fn ub_checks() -> bool {
 #[unstable(feature = "core_intrinsics", issue = "none")]
 #[rustc_nounwind]
 #[rustc_intrinsic]
-#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
+#[miri::intrinsic_fallback_is_spec]
 pub const unsafe fn const_allocate(_size: usize, _align: usize) -> *mut u8 {
     // const eval overrides this function, but runtime code for now just returns null pointers.
     // See <https://github.com/rust-lang/rust/issues/93935>.
@@ -2769,7 +2768,7 @@ pub const unsafe fn const_allocate(_size: usize, _align: usize) -> *mut u8 {
 #[unstable(feature = "core_intrinsics", issue = "none")]
 #[rustc_nounwind]
 #[rustc_intrinsic]
-#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
+#[miri::intrinsic_fallback_is_spec]
 pub const unsafe fn const_deallocate(_ptr: *mut u8, _size: usize, _align: usize) {
     // Runtime NOP
 }
@@ -2821,6 +2820,20 @@ impl<P: ?Sized, T: ptr::Thin> AggregateRawPtr<*mut T> for *mut P {
     type Metadata = <P as ptr::Pointee>::Metadata;
 }
 
+/// Lowers in MIR to `Rvalue::UnaryOp` with `UnOp::PtrMetadata`.
+///
+/// This is used to implement functions like `ptr::metadata`.
+#[rustc_nounwind]
+#[unstable(feature = "core_intrinsics", issue = "none")]
+#[rustc_const_unstable(feature = "ptr_metadata", issue = "81513")]
+#[rustc_intrinsic]
+#[rustc_intrinsic_must_be_overridden]
+pub const fn ptr_metadata<P: ptr::Pointee<Metadata = M> + ?Sized, M>(_ptr: *const P) -> M {
+    // To implement a fallback we'd have to assume the layout of the pointer,
+    // but the whole point of this intrinsic is that we shouldn't do that.
+    unreachable!()
+}
+
 // Some functions are defined here because they accidentally got made
 // available in this module on stable. See <https://github.com/rust-lang/rust/issues/15702>.
 // (`transmute` also falls into this category, but it cannot be wrapped due to the
@@ -3030,8 +3043,7 @@ pub const unsafe fn copy<T>(src: *const T, dst: *mut T, count: usize) {
     unsafe {
         ub_checks::assert_unsafe_precondition!(
             check_language_ub,
-            "ptr::copy_nonoverlapping requires that both pointer arguments are aligned and non-null \
-            and the specified memory ranges do not overlap",
+            "ptr::copy requires that both pointer arguments are aligned and non-null",
             (
                 src: *const () = src as *const (),
                 dst: *mut () = dst as *mut (),
diff --git a/core/src/intrinsics/mir.rs b/core/src/intrinsics/mir.rs
index 02665b2676cc1..1daf1d723fb95 100644
--- a/core/src/intrinsics/mir.rs
+++ b/core/src/intrinsics/mir.rs
@@ -20,7 +20,7 @@
 //!
 //! #[custom_mir(dialect = "built")]
 //! pub fn simple(x: i32) -> i32 {
-//!     mir!(
+//!     mir! {
 //!         let temp2: i32;
 //!
 //!         {
@@ -33,7 +33,7 @@
 //!             RET = temp2;
 //!             Return()
 //!         }
-//!     )
+//!     }
 //! }
 //! ```
 //!
@@ -71,7 +71,7 @@
 //!
 //! #[custom_mir(dialect = "built")]
 //! pub fn choose_load(a: &i32, b: &i32, c: bool) -> i32 {
-//!     mir!(
+//!     mir! {
 //!         {
 //!             match c {
 //!                 true => t,
@@ -93,20 +93,22 @@
 //!             RET = *temp;
 //!             Return()
 //!         }
-//!     )
+//!     }
 //! }
 //!
 //! #[custom_mir(dialect = "built")]
 //! fn unwrap_unchecked<T>(opt: Option<T>) -> T {
-//!     mir!({
-//!         RET = Move(Field(Variant(opt, 1), 0));
-//!         Return()
-//!     })
+//!     mir! {
+//!         {
+//!             RET = Move(Field(Variant(opt, 1), 0));
+//!             Return()
+//!         }
+//!     }
 //! }
 //!
 //! #[custom_mir(dialect = "runtime", phase = "optimized")]
 //! fn push_and_pop<T>(v: &mut Vec<T>, value: T) {
-//!     mir!(
+//!     mir! {
 //!         let _unused;
 //!         let popped;
 //!
@@ -125,19 +127,19 @@
 //!         ret = {
 //!             Return()
 //!         }
-//!     )
+//!     }
 //! }
 //!
 //! #[custom_mir(dialect = "runtime", phase = "optimized")]
 //! fn annotated_return_type() -> (i32, bool) {
-//!     mir!(
+//!     mir! {
 //!         type RET = (i32, bool);
 //!         {
 //!             RET.0 = 1;
 //!             RET.1 = true;
 //!             Return()
 //!         }
-//!     )
+//!     }
 //! }
 //! ```
 //!
@@ -152,7 +154,7 @@
 //!
 //! #[custom_mir(dialect = "built")]
 //! fn borrow_error(should_init: bool) -> i32 {
-//!     mir!(
+//!     mir! {
 //!         let temp: i32;
 //!
 //!         {
@@ -171,7 +173,7 @@
 //!             RET = temp;
 //!             Return()
 //!         }
-//!     )
+//!     }
 //! }
 //! ```
 //!
@@ -179,7 +181,7 @@
 //! error[E0381]: used binding is possibly-uninitialized
 //!   --> test.rs:24:13
 //!    |
-//! 8  | /     mir!(
+//! 8  | /     mir! {
 //! 9  | |         let temp: i32;
 //! 10 | |
 //! 11 | |         {
@@ -191,7 +193,7 @@
 //!    | |             ^^^^^^^^^^ value used here but it is possibly-uninitialized
 //! 25 | |             Return()
 //! 26 | |         }
-//! 27 | |     )
+//! 27 | |     }
 //!    | |_____- binding declared here but left uninitialized
 //!
 //! error: aborting due to 1 previous error
@@ -360,6 +362,10 @@ define!("mir_assume", fn Assume(operand: bool));
 define!("mir_deinit", fn Deinit<T>(place: T));
 define!("mir_checked", fn Checked<T>(binop: T) -> (T, bool));
 define!("mir_len", fn Len<T>(place: T) -> usize);
+define!(
+    "mir_ptr_metadata",
+    fn PtrMetadata<P: ?Sized>(place: *const P) -> <P as ::core::ptr::Pointee>::Metadata
+);
 define!("mir_copy_for_deref", fn CopyForDeref<T>(place: T) -> T);
 define!("mir_retag", fn Retag<T>(place: T));
 define!("mir_move", fn Move<T>(place: T) -> T);
@@ -403,18 +409,22 @@ define!(
     ///
     /// #[custom_mir(dialect = "built")]
     /// fn unwrap_deref(opt: Option<&i32>) -> i32 {
-    ///     mir!({
-    ///         RET = *Field::<&i32>(Variant(opt, 1), 0);
-    ///         Return()
-    ///     })
+    ///     mir! {
+    ///         {
+    ///             RET = *Field::<&i32>(Variant(opt, 1), 0);
+    ///             Return()
+    ///         }
+    ///     }
     /// }
     ///
     /// #[custom_mir(dialect = "built")]
     /// fn set(opt: &mut Option<i32>) {
-    ///     mir!({
-    ///         place!(Field(Variant(*opt, 1), 0)) = 5;
-    ///         Return()
-    ///     })
+    ///     mir! {
+    ///         {
+    ///             place!(Field(Variant(*opt, 1), 0)) = 5;
+    ///             Return()
+    ///         }
+    ///     }
     /// }
     /// ```
     fn Field<F>(place: (), field: u32) -> F
@@ -434,6 +444,13 @@ define!(
     /// generated via the normal `mem::transmute`.
     fn CastTransmute<T, U>(operand: T) -> U
 );
+define!(
+    "mir_cast_ptr_to_ptr",
+    /// Emits a `CastKind::PtrToPtr` cast.
+    ///
+    /// This allows bypassing normal validation to generate strange casts.
+    fn CastPtrToPtr<T, U>(operand: T) -> U
+);
 define!(
     "mir_make_place",
     #[doc(hidden)]
@@ -451,7 +468,7 @@ define!(
 /// your MIR into something that is easier to parse in the compiler.
 #[rustc_macro_transparency = "transparent"]
 pub macro mir {
-    (
+    {
         $(type RET = $ret_ty:ty ;)?
         $(let $local_decl:ident $(: $local_decl_ty:ty)? ;)*
         $(debug $dbg_name:ident => $dbg_data:expr ;)*
@@ -465,7 +482,7 @@ pub macro mir {
                 $($block:tt)*
             }
         )*
-    ) => {{
+    } => {{
         // First, we declare all basic blocks.
         __internal_declare_basic_blocks!($(
             $block_name $(($block_cleanup))?
diff --git a/core/src/intrinsics/simd.rs b/core/src/intrinsics/simd.rs
index d1be534eaf083..30734c020b39b 100644
--- a/core/src/intrinsics/simd.rs
+++ b/core/src/intrinsics/simd.rs
@@ -152,7 +152,7 @@ extern "rust-intrinsic" {
     #[rustc_nounwind]
     pub fn simd_fabs<T>(x: T) -> T;
 
-    /// Elementwise minimum of a vector.
+    /// Elementwise minimum of two vectors.
     ///
     /// `T` must be a vector of floating-point primitive types.
     ///
@@ -160,7 +160,7 @@ extern "rust-intrinsic" {
     #[rustc_nounwind]
     pub fn simd_fmin<T>(x: T, y: T) -> T;
 
-    /// Elementwise maximum of a vector.
+    /// Elementwise maximum of two vectors.
     ///
     /// `T` must be a vector of floating-point primitive types.
     ///
@@ -263,9 +263,6 @@ extern "rust-intrinsic" {
     ///
     /// `V` must be a vector of integers with the same length as `T` (but any element size).
     ///
-    /// `idx` must be a constant: either naming a constant item, or an inline
-    /// `const {}` expression.
-    ///
     /// For each pointer in `ptr`, if the corresponding value in `mask` is `!0`, read the pointer.
     /// Otherwise if the corresponding value in `mask` is `0`, return the corresponding value from
     /// `val`.
@@ -387,7 +384,7 @@ extern "rust-intrinsic" {
     #[rustc_nounwind]
     pub fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
 
-    /// Add elements within a vector in arbitrary order. May also be re-associated with
+    /// Multiply elements within a vector in arbitrary order. May also be re-associated with
     /// unordered additions on the inputs/outputs.
     ///
     /// `T` must be a vector of integer or floating-point primitive types.
@@ -405,7 +402,7 @@ extern "rust-intrinsic" {
     #[rustc_nounwind]
     pub fn simd_reduce_all<T>(x: T) -> bool;
 
-    /// Check if all mask values are true.
+    /// Check if any mask value is true.
     ///
     /// `T` must be a vector of integer primitive types.
     ///
@@ -463,7 +460,7 @@ extern "rust-intrinsic" {
     /// `T` must be an integer vector.
     ///
     /// `U` must be either the smallest unsigned integer with at least as many bits as the length
-    /// of `T`, or the smallest array of `u8` with as many bits as the length of `T`.
+    /// of `T`, or the smallest array of `u8` with at least as many bits as the length of `T`.
     ///
     /// Each element is truncated to a single bit and packed into the result.
     ///
@@ -475,12 +472,19 @@ extern "rust-intrinsic" {
     /// * On little endian, the least significant bit corresponds to the first vector element.
     /// * On big endian, the least significant bit corresponds to the last vector element.
     ///
-    /// For example, `[!0, 0, !0, !0]` packs to `0b1101` on little endian and `0b1011` on big
-    /// endian.
+    /// For example, `[!0, 0, !0, !0]` packs to
+    /// - `0b1101u8` or `[0b1101]` on little endian, and
+    /// - `0b1011u8` or `[0b1011]` on big endian.
+    ///
+    /// To consider a larger example,
+    /// `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs to
+    /// - `0b0100001100000001u16` or `[0b00000001, 0b01000011]` on little endian, and
+    /// - `0b1000000011000010u16` or `[0b10000000, 0b11000010]` on big endian.
     ///
-    /// To consider a larger example, `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs
-    /// to `[0b00000001, 0b01000011]` or `0b0100001100000001` on little endian, and `[0b10000000,
-    /// 0b11000010]` or `0b1000000011000010` on big endian.
+    /// And finally, a non-power-of-2 example with multiple bytes:
+    /// `[!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]` packs to
+    /// - `0b0101001011u16` or `[0b01001011, 0b01]` on little endian, and
+    /// - `0b1101001010u16` or `[0b11, 0b01001010]` on big endian.
     ///
     /// # Safety
     /// `x` must contain only `0` and `!0`.
@@ -573,7 +577,6 @@ extern "rust-intrinsic" {
     ///
     /// `T` must be a vector of integers.
     #[rustc_nounwind]
-    #[cfg(not(bootstrap))]
     pub fn simd_ctpop<T>(x: T) -> T;
 
     /// Count the trailing zeros of each element.
diff --git a/core/src/iter/adapters/chain.rs b/core/src/iter/adapters/chain.rs
index bcaac2f42cf04..dad3d79acb183 100644
--- a/core/src/iter/adapters/chain.rs
+++ b/core/src/iter/adapters/chain.rs
@@ -4,8 +4,8 @@ use crate::ops::Try;
 
 /// An iterator that links two iterators together, in a chain.
 ///
-/// This `struct` is created by [`Iterator::chain`]. See its documentation
-/// for more.
+/// This `struct` is created by [`chain`] or [`Iterator::chain`]. See their
+/// documentation for more.
 ///
 /// # Examples
 ///
@@ -38,6 +38,39 @@ impl<A, B> Chain<A, B> {
     }
 }
 
+/// Converts the arguments to iterators and links them together, in a chain.
+///
+/// See the documentation of [`Iterator::chain`] for more.
+///
+/// # Examples
+///
+/// ```
+/// #![feature(iter_chain)]
+///
+/// use std::iter::chain;
+///
+/// let a = [1, 2, 3];
+/// let b = [4, 5, 6];
+///
+/// let mut iter = chain(a, b);
+///
+/// assert_eq!(iter.next(), Some(1));
+/// assert_eq!(iter.next(), Some(2));
+/// assert_eq!(iter.next(), Some(3));
+/// assert_eq!(iter.next(), Some(4));
+/// assert_eq!(iter.next(), Some(5));
+/// assert_eq!(iter.next(), Some(6));
+/// assert_eq!(iter.next(), None);
+/// ```
+#[unstable(feature = "iter_chain", reason = "recently added", issue = "125964")]
+pub fn chain<A, B>(a: A, b: B) -> Chain<A::IntoIter, B::IntoIter>
+where
+    A: IntoIterator,
+    B: IntoIterator<Item = A::Item>,
+{
+    Chain::new(a.into_iter(), b.into_iter())
+}
+
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<A, B> Iterator for Chain<A, B>
 where
diff --git a/core/src/iter/adapters/copied.rs b/core/src/iter/adapters/copied.rs
index 6d82d1581f79d..d772e7b36e09e 100644
--- a/core/src/iter/adapters/copied.rs
+++ b/core/src/iter/adapters/copied.rs
@@ -202,7 +202,7 @@ where
     T: Copy,
 {
     fn spec_next_chunk(&mut self) -> Result<[T; N], array::IntoIter<T, N>> {
-        let mut raw_array = MaybeUninit::uninit_array();
+        let mut raw_array = [const { MaybeUninit::uninit() }; N];
 
         let len = self.len();
 
diff --git a/core/src/iter/adapters/filter.rs b/core/src/iter/adapters/filter.rs
index a7f1fde6975c0..ba49070329c22 100644
--- a/core/src/iter/adapters/filter.rs
+++ b/core/src/iter/adapters/filter.rs
@@ -3,7 +3,7 @@ use crate::iter::{adapters::SourceIter, FusedIterator, InPlaceIterable, TrustedF
 use crate::num::NonZero;
 use crate::ops::Try;
 use core::array;
-use core::mem::{ManuallyDrop, MaybeUninit};
+use core::mem::MaybeUninit;
 use core::ops::ControlFlow;
 
 /// An iterator that filters the elements of `iter` with `predicate`.
@@ -27,6 +27,42 @@ impl<I, P> Filter<I, P> {
     }
 }
 
+impl<I, P> Filter<I, P>
+where
+    I: Iterator,
+    P: FnMut(&I::Item) -> bool,
+{
+    #[inline]
+    fn next_chunk_dropless<const N: usize>(
+        &mut self,
+    ) -> Result<[I::Item; N], array::IntoIter<I::Item, N>> {
+        let mut array: [MaybeUninit<I::Item>; N] = [const { MaybeUninit::uninit() }; N];
+        let mut initialized = 0;
+
+        let result = self.iter.try_for_each(|element| {
+            let idx = initialized;
+            // branchless index update combined with unconditionally copying the value even when
+            // it is filtered reduces branching and dependencies in the loop.
+            initialized = idx + (self.predicate)(&element) as usize;
+            // SAFETY: Loop conditions ensure the index is in bounds.
+            unsafe { array.get_unchecked_mut(idx) }.write(element);
+
+            if initialized < N { ControlFlow::Continue(()) } else { ControlFlow::Break(()) }
+        });
+
+        match result {
+            ControlFlow::Break(()) => {
+                // SAFETY: The loop above is only explicitly broken when the array has been fully initialized
+                Ok(unsafe { MaybeUninit::array_assume_init(array) })
+            }
+            ControlFlow::Continue(()) => {
+                // SAFETY: The range is in bounds since the loop breaks when reaching N elements.
+                Err(unsafe { array::IntoIter::new_unchecked(array, 0..initialized) })
+            }
+        }
+    }
+}
+
 #[stable(feature = "core_impl_debug", since = "1.9.0")]
 impl<I: fmt::Debug, P> fmt::Debug for Filter<I, P> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -64,52 +100,16 @@ where
     fn next_chunk<const N: usize>(
         &mut self,
     ) -> Result<[Self::Item; N], array::IntoIter<Self::Item, N>> {
-        let mut array: [MaybeUninit<Self::Item>; N] = MaybeUninit::uninit_array();
-
-        struct Guard<'a, T> {
-            array: &'a mut [MaybeUninit<T>],
-            initialized: usize,
-        }
-
-        impl<T> Drop for Guard<'_, T> {
-            #[inline]
-            fn drop(&mut self) {
-                if const { crate::mem::needs_drop::<T>() } {
-                    // SAFETY: self.initialized is always <= N, which also is the length of the array.
-                    unsafe {
-                        core::ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(
-                            self.array.get_unchecked_mut(..self.initialized),
-                        ));
-                    }
-                }
+        // avoid codegen for the dead branch
+        let fun = const {
+            if crate::mem::needs_drop::<I::Item>() {
+                array::iter_next_chunk::<I::Item, N>
+            } else {
+                Self::next_chunk_dropless::<N>
             }
-        }
-
-        let mut guard = Guard { array: &mut array, initialized: 0 };
-
-        let result = self.iter.try_for_each(|element| {
-            let idx = guard.initialized;
-            guard.initialized = idx + (self.predicate)(&element) as usize;
-
-            // SAFETY: Loop conditions ensure the index is in bounds.
-            unsafe { guard.array.get_unchecked_mut(idx) }.write(element);
-
-            if guard.initialized < N { ControlFlow::Continue(()) } else { ControlFlow::Break(()) }
-        });
+        };
 
-        let guard = ManuallyDrop::new(guard);
-
-        match result {
-            ControlFlow::Break(()) => {
-                // SAFETY: The loop above is only explicitly broken when the array has been fully initialized
-                Ok(unsafe { MaybeUninit::array_assume_init(array) })
-            }
-            ControlFlow::Continue(()) => {
-                let initialized = guard.initialized;
-                // SAFETY: The range is in bounds since the loop breaks when reaching N elements.
-                Err(unsafe { array::IntoIter::new_unchecked(array, 0..initialized) })
-            }
-        }
+        fun(self)
     }
 
     #[inline]
diff --git a/core/src/iter/adapters/filter_map.rs b/core/src/iter/adapters/filter_map.rs
index 1a5f9e6265454..2126619a58a87 100644
--- a/core/src/iter/adapters/filter_map.rs
+++ b/core/src/iter/adapters/filter_map.rs
@@ -68,7 +68,7 @@ where
     fn next_chunk<const N: usize>(
         &mut self,
     ) -> Result<[Self::Item; N], array::IntoIter<Self::Item, N>> {
-        let mut array: [MaybeUninit<Self::Item>; N] = MaybeUninit::uninit_array();
+        let mut array: [MaybeUninit<Self::Item>; N] = [const { MaybeUninit::uninit() }; N];
 
         struct Guard<'a, T> {
             array: &'a mut [MaybeUninit<T>],
diff --git a/core/src/iter/adapters/map_windows.rs b/core/src/iter/adapters/map_windows.rs
index 5f39b24583427..182775121369e 100644
--- a/core/src/iter/adapters/map_windows.rs
+++ b/core/src/iter/adapters/map_windows.rs
@@ -110,7 +110,8 @@ impl<I: Iterator, const N: usize> MapWindowsInner<I, N> {
 impl<T, const N: usize> Buffer<T, N> {
     fn try_from_iter(iter: &mut impl Iterator<Item = T>) -> Option<Self> {
         let first_half = crate::array::iter_next_chunk(iter).ok()?;
-        let buffer = [MaybeUninit::new(first_half).transpose(), MaybeUninit::uninit_array()];
+        let buffer =
+            [MaybeUninit::new(first_half).transpose(), [const { MaybeUninit::uninit() }; N]];
         Some(Self { buffer, start: 0 })
     }
 
@@ -204,7 +205,7 @@ impl<T, const N: usize> Buffer<T, N> {
 impl<T: Clone, const N: usize> Clone for Buffer<T, N> {
     fn clone(&self) -> Self {
         let mut buffer = Buffer {
-            buffer: [MaybeUninit::uninit_array(), MaybeUninit::uninit_array()],
+            buffer: [[const { MaybeUninit::uninit() }; N], [const { MaybeUninit::uninit() }; N]],
             start: self.start,
         };
         buffer.as_uninit_array_mut().write(self.as_array_ref().clone());
diff --git a/core/src/iter/adapters/mod.rs b/core/src/iter/adapters/mod.rs
index cc514bd914f14..1bde4488cc9de 100644
--- a/core/src/iter/adapters/mod.rs
+++ b/core/src/iter/adapters/mod.rs
@@ -41,6 +41,9 @@ pub use self::array_chunks::ArrayChunks;
 #[unstable(feature = "std_internals", issue = "none")]
 pub use self::by_ref_sized::ByRefSized;
 
+#[unstable(feature = "iter_chain", reason = "recently added", issue = "125964")]
+pub use self::chain::chain;
+
 #[stable(feature = "iter_cloned", since = "1.1.0")]
 pub use self::cloned::Cloned;
 
@@ -156,7 +159,7 @@ pub(crate) struct GenericShunt<'a, I, R> {
     residual: &'a mut Option<R>,
 }
 
-/// Process the given iterator as if it yielded a the item's `Try::Output`
+/// Process the given iterator as if it yielded the item's `Try::Output`
 /// type instead. Any `Try::Residual`s encountered will stop the inner iterator
 /// and be propagated back to the overall result.
 pub(crate) fn try_process<I, T, R, F, U>(iter: I, mut f: F) -> ChangeOutputType<I::Item, U>
diff --git a/core/src/iter/mod.rs b/core/src/iter/mod.rs
index 44fef3e145b78..921c75c85f161 100644
--- a/core/src/iter/mod.rs
+++ b/core/src/iter/mod.rs
@@ -428,6 +428,8 @@ pub use self::traits::{
     DoubleEndedIterator, ExactSizeIterator, Extend, FromIterator, IntoIterator, Product, Sum,
 };
 
+#[unstable(feature = "iter_chain", reason = "recently added", issue = "125964")]
+pub use self::adapters::chain;
 #[stable(feature = "iter_zip", since = "1.59.0")]
 pub use self::adapters::zip;
 #[unstable(feature = "iter_array_chunks", reason = "recently added", issue = "100450")]
diff --git a/core/src/iter/sources/repeat.rs b/core/src/iter/sources/repeat.rs
index 0168b11c7394a..243f938bce2af 100644
--- a/core/src/iter/sources/repeat.rs
+++ b/core/src/iter/sources/repeat.rs
@@ -8,11 +8,15 @@ use crate::num::NonZero;
 /// Infinite iterators like `repeat()` are often used with adapters like
 /// [`Iterator::take()`], in order to make them finite.
 ///
+/// Use [`str::repeat()`] instead of this function if you just want to repeat
+/// a char/string `n`th times.
+///
 /// If the element type of the iterator you need does not implement `Clone`,
 /// or if you do not want to keep the repeated element in memory, you can
 /// instead use the [`repeat_with()`] function.
 ///
 /// [`repeat_with()`]: crate::iter::repeat_with
+/// [`str::repeat()`]: ../../std/primitive.str.html#method.repeat
 ///
 /// # Examples
 ///
diff --git a/core/src/iter/sources/repeat_n.rs b/core/src/iter/sources/repeat_n.rs
index 8224e4b12a0eb..8390dab8e543e 100644
--- a/core/src/iter/sources/repeat_n.rs
+++ b/core/src/iter/sources/repeat_n.rs
@@ -1,4 +1,4 @@
-use crate::iter::{FusedIterator, TrustedLen};
+use crate::iter::{FusedIterator, TrustedLen, UncheckedIterator};
 use crate::mem::ManuallyDrop;
 use crate::num::NonZero;
 
@@ -193,3 +193,5 @@ impl<A: Clone> FusedIterator for RepeatN<A> {}
 
 #[unstable(feature = "trusted_len", issue = "37572")]
 unsafe impl<A: Clone> TrustedLen for RepeatN<A> {}
+#[unstable(feature = "trusted_len_next_unchecked", issue = "37572")]
+impl<A: Clone> UncheckedIterator for RepeatN<A> {}
diff --git a/core/src/iter/traits/collect.rs b/core/src/iter/traits/collect.rs
index d9d860c7b6cba..86660f2e375c3 100644
--- a/core/src/iter/traits/collect.rs
+++ b/core/src/iter/traits/collect.rs
@@ -1,3 +1,5 @@
+use super::TrustedLen;
+
 /// Conversion from an [`Iterator`].
 ///
 /// By implementing `FromIterator` for a type, you define how it will be
@@ -311,8 +313,7 @@ where
     label = "`{Self}` is not an iterator",
     message = "`{Self}` is not an iterator"
 )]
-#[cfg_attr(bootstrap, rustc_skip_array_during_method_dispatch)]
-#[cfg_attr(not(bootstrap), rustc_skip_during_method_dispatch(array, boxed_slice))]
+#[rustc_skip_during_method_dispatch(array, boxed_slice)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub trait IntoIterator {
     /// The type of the elements being iterated over.
@@ -461,6 +462,27 @@ pub trait Extend<A> {
     fn extend_reserve(&mut self, additional: usize) {
         let _ = additional;
     }
+
+    /// Extends a collection with one element, without checking there is enough capacity for it.
+    ///
+    /// # Safety
+    ///
+    /// **For callers:** This must only be called when we know the collection has enough capacity
+    /// to contain the new item, for example because we previously called `extend_reserve`.
+    ///
+    /// **For implementors:** For a collection to unsafely rely on this method's safety precondition (that is,
+    /// invoke UB if they are violated), it must implement `extend_reserve` correctly. In other words,
+    /// callers may assume that if they `extend_reserve`ed enough space they can call this method.
+
+    // This method is for internal usage only. It is only on the trait because of specialization's limitations.
+    #[unstable(feature = "extend_one_unchecked", issue = "none")]
+    #[doc(hidden)]
+    unsafe fn extend_one_unchecked(&mut self, item: A)
+    where
+        Self: Sized,
+    {
+        self.extend_one(item);
+    }
 }
 
 #[stable(feature = "extend_for_unit", since = "1.28.0")]
@@ -500,33 +522,102 @@ where
     fn extend<T: IntoIterator<Item = (A, B)>>(&mut self, into_iter: T) {
         let (a, b) = self;
         let iter = into_iter.into_iter();
+        SpecTupleExtend::extend(iter, a, b);
+    }
+
+    fn extend_one(&mut self, item: (A, B)) {
+        self.0.extend_one(item.0);
+        self.1.extend_one(item.1);
+    }
+
+    fn extend_reserve(&mut self, additional: usize) {
+        self.0.extend_reserve(additional);
+        self.1.extend_reserve(additional);
+    }
+
+    unsafe fn extend_one_unchecked(&mut self, item: (A, B)) {
+        // SAFETY: Those are our safety preconditions, and we correctly forward `extend_reserve`.
+        unsafe {
+            self.0.extend_one_unchecked(item.0);
+            self.1.extend_one_unchecked(item.1);
+        }
+    }
+}
+
+fn default_extend_tuple<A, B, ExtendA, ExtendB>(
+    iter: impl Iterator<Item = (A, B)>,
+    a: &mut ExtendA,
+    b: &mut ExtendB,
+) where
+    ExtendA: Extend<A>,
+    ExtendB: Extend<B>,
+{
+    fn extend<'a, A, B>(
+        a: &'a mut impl Extend<A>,
+        b: &'a mut impl Extend<B>,
+    ) -> impl FnMut((), (A, B)) + 'a {
+        move |(), (t, u)| {
+            a.extend_one(t);
+            b.extend_one(u);
+        }
+    }
+
+    let (lower_bound, _) = iter.size_hint();
+    if lower_bound > 0 {
+        a.extend_reserve(lower_bound);
+        b.extend_reserve(lower_bound);
+    }
+
+    iter.fold((), extend(a, b));
+}
+
+trait SpecTupleExtend<A, B> {
+    fn extend(self, a: &mut A, b: &mut B);
+}
 
+impl<A, B, ExtendA, ExtendB, Iter> SpecTupleExtend<ExtendA, ExtendB> for Iter
+where
+    ExtendA: Extend<A>,
+    ExtendB: Extend<B>,
+    Iter: Iterator<Item = (A, B)>,
+{
+    default fn extend(self, a: &mut ExtendA, b: &mut ExtendB) {
+        default_extend_tuple(self, a, b);
+    }
+}
+
+impl<A, B, ExtendA, ExtendB, Iter> SpecTupleExtend<ExtendA, ExtendB> for Iter
+where
+    ExtendA: Extend<A>,
+    ExtendB: Extend<B>,
+    Iter: TrustedLen<Item = (A, B)>,
+{
+    fn extend(self, a: &mut ExtendA, b: &mut ExtendB) {
         fn extend<'a, A, B>(
             a: &'a mut impl Extend<A>,
             b: &'a mut impl Extend<B>,
         ) -> impl FnMut((), (A, B)) + 'a {
-            move |(), (t, u)| {
-                a.extend_one(t);
-                b.extend_one(u);
+            // SAFETY: We reserve enough space for the `size_hint`, and the iterator is `TrustedLen`
+            // so its `size_hint` is exact.
+            move |(), (t, u)| unsafe {
+                a.extend_one_unchecked(t);
+                b.extend_one_unchecked(u);
             }
         }
 
-        let (lower_bound, _) = iter.size_hint();
+        let (lower_bound, upper_bound) = self.size_hint();
+
+        if upper_bound.is_none() {
+            // We cannot reserve more than `usize::MAX` items, and this is likely to go out of memory anyway.
+            default_extend_tuple(self, a, b);
+            return;
+        }
+
         if lower_bound > 0 {
             a.extend_reserve(lower_bound);
             b.extend_reserve(lower_bound);
         }
 
-        iter.fold((), extend(a, b));
-    }
-
-    fn extend_one(&mut self, item: (A, B)) {
-        self.0.extend_one(item.0);
-        self.1.extend_one(item.1);
-    }
-
-    fn extend_reserve(&mut self, additional: usize) {
-        self.0.extend_reserve(additional);
-        self.1.extend_reserve(additional);
+        self.fold((), extend(a, b));
     }
 }
diff --git a/core/src/iter/traits/iterator.rs b/core/src/iter/traits/iterator.rs
index cee99e28b5a97..733d414d44465 100644
--- a/core/src/iter/traits/iterator.rs
+++ b/core/src/iter/traits/iterator.rs
@@ -2080,8 +2080,7 @@ pub trait Iterator {
     fn try_collect<B>(&mut self) -> ChangeOutputType<Self::Item, B>
     where
         Self: Sized,
-        <Self as Iterator>::Item: Try,
-        <<Self as Iterator>::Item as Try>::Residual: Residual<B>,
+        Self::Item: Try<Residual: Residual<B>>,
         B: FromIterator<<Self::Item as Try>::Output>,
     {
         try_process(ByRefSized(self), |i| i.collect())
@@ -2689,12 +2688,13 @@ pub trait Iterator {
     #[inline]
     #[unstable(feature = "iterator_try_reduce", reason = "new API", issue = "87053")]
     #[rustc_do_not_const_check]
-    fn try_reduce<F, R>(&mut self, f: F) -> ChangeOutputType<R, Option<R::Output>>
+    fn try_reduce<R>(
+        &mut self,
+        f: impl FnMut(Self::Item, Self::Item) -> R,
+    ) -> ChangeOutputType<R, Option<R::Output>>
     where
         Self: Sized,
-        F: FnMut(Self::Item, Self::Item) -> R,
-        R: Try<Output = Self::Item>,
-        R::Residual: Residual<Option<Self::Item>>,
+        R: Try<Output = Self::Item, Residual: Residual<Option<Self::Item>>>,
     {
         let first = match self.next() {
             Some(i) => i,
@@ -2956,12 +2956,13 @@ pub trait Iterator {
     #[inline]
     #[unstable(feature = "try_find", reason = "new API", issue = "63178")]
     #[rustc_do_not_const_check]
-    fn try_find<F, R>(&mut self, f: F) -> ChangeOutputType<R, Option<Self::Item>>
+    fn try_find<R>(
+        &mut self,
+        f: impl FnMut(&Self::Item) -> R,
+    ) -> ChangeOutputType<R, Option<Self::Item>>
     where
         Self: Sized,
-        F: FnMut(&Self::Item) -> R,
-        R: Try<Output = bool>,
-        R::Residual: Residual<Option<Self::Item>>,
+        R: Try<Output = bool, Residual: Residual<Option<Self::Item>>>,
     {
         #[inline]
         fn check<I, V, R>(
diff --git a/core/src/lib.rs b/core/src/lib.rs
index 206d1ab885291..49f89e702558f 100644
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -34,12 +34,9 @@
 //!   Rust user code is to call the functions provided by this library instead (such as
 //!   `ptr::copy`).
 //!
-//! * `rust_begin_panic` - This function takes four arguments, a
-//!   `fmt::Arguments`, a `&'static str`, and two `u32`'s. These four arguments
-//!   dictate the panic message, the file at which panic was invoked, and the
-//!   line and column inside the file. It is up to consumers of this core
+//! * Panic handler - This function takes one argument, a `&panic::PanicInfo`. It is up to consumers of this core
 //!   library to define this panic function; it is only required to never
-//!   return. This requires a `lang` attribute named `panic_impl`.
+//!   return. You should mark your implementation using `#[panic_handler]`.
 //!
 //! * `rust_eh_personality` - is used by the failure mechanisms of the
 //!    compiler. This is often mapped to GCC's personality function, but crates
@@ -122,7 +119,6 @@
 #![feature(const_bigint_helper_methods)]
 #![feature(const_black_box)]
 #![feature(const_cell_into_inner)]
-#![feature(const_char_from_u32_unchecked)]
 #![feature(const_eval_select)]
 #![feature(const_exact_div)]
 #![feature(const_float_bits_conv)]
@@ -130,7 +126,6 @@
 #![feature(const_fmt_arguments_new)]
 #![feature(const_hash)]
 #![feature(const_heap)]
-#![feature(const_hint_assert_unchecked)]
 #![feature(const_index_range_slice_index)]
 #![feature(const_int_from_str)]
 #![feature(const_intrinsic_copy)]
@@ -140,7 +135,6 @@
 #![feature(const_likely)]
 #![feature(const_maybe_uninit_as_mut_ptr)]
 #![feature(const_maybe_uninit_assume_init)]
-#![feature(const_maybe_uninit_uninit_array)]
 #![feature(const_nonnull_new)]
 #![feature(const_num_midpoint)]
 #![feature(const_option)]
@@ -174,11 +168,9 @@
 #![feature(duration_consts_float)]
 #![feature(internal_impls_macro)]
 #![feature(ip)]
-#![feature(ip_bits)]
 #![feature(is_ascii_octdigit)]
 #![feature(isqrt)]
 #![feature(link_cfg)]
-#![feature(maybe_uninit_uninit_array)]
 #![feature(offset_of_enum)]
 #![feature(offset_of_nested)]
 #![feature(panic_internals)]
@@ -191,6 +183,7 @@
 #![feature(str_split_remainder)]
 #![feature(strict_provenance)]
 #![feature(ub_checks)]
+#![feature(unchecked_neg)]
 #![feature(unchecked_shifts)]
 #![feature(utf16_extra)]
 #![feature(utf16_extra_const)]
@@ -199,29 +192,27 @@
 //
 // Language features:
 // tidy-alphabetical-start
+#![cfg_attr(bootstrap, feature(c_unwind))]
+#![cfg_attr(bootstrap, feature(effects))]
 #![feature(abi_unadjusted)]
 #![feature(adt_const_params)]
 #![feature(allow_internal_unsafe)]
 #![feature(allow_internal_unstable)]
 #![feature(asm_const)]
 #![feature(auto_traits)]
-#![feature(c_unwind)]
 #![feature(cfg_sanitize)]
 #![feature(cfg_target_has_atomic)]
 #![feature(cfg_target_has_atomic_equal_alignment)]
-#![feature(const_closures)]
 #![feature(const_fn_floating_point_arithmetic)]
 #![feature(const_for)]
 #![feature(const_mut_refs)]
 #![feature(const_precise_live_drops)]
 #![feature(const_refs_to_cell)]
-#![feature(const_trait_impl)]
 #![feature(decl_macro)]
 #![feature(deprecated_suggestion)]
 #![feature(doc_cfg)]
 #![feature(doc_cfg_hide)]
 #![feature(doc_notable_trait)]
-#![feature(effects)]
 #![feature(extern_types)]
 #![feature(f128)]
 #![feature(f16)]
@@ -235,6 +226,7 @@
 #![feature(let_chains)]
 #![feature(link_llvm_intrinsics)]
 #![feature(macro_metavar_expr)]
+#![feature(marker_trait_attr)]
 #![feature(min_exhaustive_patterns)]
 #![feature(min_specialization)]
 #![feature(multiple_supertrait_upcastable)]
@@ -255,7 +247,6 @@
 #![feature(trait_alias)]
 #![feature(transparent_unions)]
 #![feature(try_blocks)]
-#![feature(type_alias_impl_trait)]
 #![feature(unboxed_closures)]
 #![feature(unsized_fn_params)]
 #![feature(with_negative_coherence)]
@@ -403,6 +394,8 @@ pub mod panicking;
 #[unstable(feature = "core_pattern_types", issue = "none")]
 pub mod pat;
 pub mod pin;
+#[unstable(feature = "new_range_api", issue = "125687")]
+pub mod range;
 pub mod result;
 pub mod sync;
 
diff --git a/core/src/macros/mod.rs b/core/src/macros/mod.rs
index 2ddedfa37fe27..0d4ca4d5f01e4 100644
--- a/core/src/macros/mod.rs
+++ b/core/src/macros/mod.rs
@@ -1569,7 +1569,12 @@ pub(crate) mod builtin {
     #[rustc_builtin_macro]
     #[macro_export]
     #[rustc_diagnostic_item = "assert_macro"]
-    #[allow_internal_unstable(panic_internals, edition_panic, generic_assert_internals)]
+    #[allow_internal_unstable(
+        core_intrinsics,
+        panic_internals,
+        edition_panic,
+        generic_assert_internals
+    )]
     macro_rules! assert {
         ($cond:expr $(,)?) => {{ /* compiler built-in */ }};
         ($cond:expr, $($arg:tt)+) => {{ /* compiler built-in */ }};
diff --git a/core/src/marker.rs b/core/src/marker.rs
index 1d073a6d649b8..21abd7c036ba7 100644
--- a/core/src/marker.rs
+++ b/core/src/marker.rs
@@ -944,7 +944,6 @@ marker_impls! {
 #[lang = "destruct"]
 #[rustc_on_unimplemented(message = "can't drop `{Self}`", append_const_msg)]
 #[rustc_deny_explicit_impl(implement_via_object = false)]
-#[const_trait]
 pub trait Destruct {}
 
 /// A marker for tuple types.
@@ -997,15 +996,12 @@ marker_impls! {
         bool,
         char,
         str /* Technically requires `[u8]: ConstParamTy` */,
+        (),
         {T: ConstParamTy, const N: usize} [T; N],
         {T: ConstParamTy} [T],
         {T: ?Sized + ConstParamTy} &T,
 }
 
-// FIXME(adt_const_params): Add to marker_impls call above once not in bootstrap
-#[unstable(feature = "adt_const_params", issue = "95174")]
-impl ConstParamTy for () {}
-
 /// A common trait implemented by all function pointers.
 #[unstable(
     feature = "fn_ptr_trait",
@@ -1019,3 +1015,58 @@ pub trait FnPtr: Copy + Clone {
     #[lang = "fn_ptr_addr"]
     fn addr(self) -> *const ();
 }
+
+/// Derive macro generating impls of traits related to smart pointers.
+#[cfg(not(bootstrap))]
+#[rustc_builtin_macro]
+#[allow_internal_unstable(dispatch_from_dyn, coerce_unsized, unsize)]
+#[unstable(feature = "derive_smart_pointer", issue = "123430")]
+pub macro SmartPointer($item:item) {
+    /* compiler built-in */
+}
+
+// Support traits and types for the desugaring of const traits and
+// `~const` bounds. Not supposed to be used by anything other than
+// the compiler.
+#[doc(hidden)]
+#[unstable(
+    feature = "effect_types",
+    issue = "none",
+    reason = "internal module for implementing effects"
+)]
+#[allow(missing_debug_implementations)] // these unit structs don't need `Debug` impls.
+#[cfg(not(bootstrap))]
+pub mod effects {
+    #[lang = "EffectsNoRuntime"]
+    pub struct NoRuntime;
+    #[lang = "EffectsMaybe"]
+    pub struct Maybe;
+    #[lang = "EffectsRuntime"]
+    pub struct Runtime;
+
+    #[lang = "EffectsCompat"]
+    pub trait Compat<#[rustc_runtime] const RUNTIME: bool> {}
+
+    impl Compat<false> for NoRuntime {}
+    impl Compat<true> for Runtime {}
+    impl<#[rustc_runtime] const RUNTIME: bool> Compat<RUNTIME> for Maybe {}
+
+    #[lang = "EffectsTyCompat"]
+    #[marker]
+    pub trait TyCompat<T: ?Sized> {}
+
+    impl<T: ?Sized> TyCompat<T> for T {}
+    impl<T: ?Sized> TyCompat<T> for Maybe {}
+    impl<T: ?Sized> TyCompat<Maybe> for T {}
+
+    #[lang = "EffectsIntersection"]
+    pub trait Intersection {
+        #[lang = "EffectsIntersectionOutput"]
+        type Output: ?Sized;
+    }
+
+    // FIXME(effects): remove this after next trait solver lands
+    impl Intersection for () {
+        type Output = Maybe;
+    }
+}
diff --git a/core/src/mem/manually_drop.rs b/core/src/mem/manually_drop.rs
index e0c3b9f3b51da..997f088c6d687 100644
--- a/core/src/mem/manually_drop.rs
+++ b/core/src/mem/manually_drop.rs
@@ -62,6 +62,9 @@ impl<T> ManuallyDrop<T> {
     /// x.truncate(5); // You can still safely operate on the value
     /// assert_eq!(*x, "Hello");
     /// // But `Drop` will not be run here
+    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// # let _ = ManuallyDrop::into_inner(x);
     /// ```
     #[must_use = "if you don't need the wrapper, you can use `mem::forget` instead"]
     #[stable(feature = "manually_drop", since = "1.20.0")]
diff --git a/core/src/mem/maybe_uninit.rs b/core/src/mem/maybe_uninit.rs
index 026e21586d403..dd40f57dc8707 100644
--- a/core/src/mem/maybe_uninit.rs
+++ b/core/src/mem/maybe_uninit.rs
@@ -120,12 +120,8 @@ use crate::slice;
 /// use std::mem::{self, MaybeUninit};
 ///
 /// let data = {
-///     // Create an uninitialized array of `MaybeUninit`. The `assume_init` is
-///     // safe because the type we are claiming to have initialized here is a
-///     // bunch of `MaybeUninit`s, which do not require initialization.
-///     let mut data: [MaybeUninit<Vec<u32>>; 1000] = unsafe {
-///         MaybeUninit::uninit().assume_init()
-///     };
+///     // Create an uninitialized array of `MaybeUninit`.
+///     let mut data: [MaybeUninit<Vec<u32>>; 1000] = [const { MaybeUninit::uninit() }; 1000];
 ///
 ///     // Dropping a `MaybeUninit` does nothing, so if there is a panic during this loop,
 ///     // we have a memory leak, but there is no memory safety issue.
@@ -147,10 +143,8 @@ use crate::slice;
 /// ```
 /// use std::mem::MaybeUninit;
 ///
-/// // Create an uninitialized array of `MaybeUninit`. The `assume_init` is
-/// // safe because the type we are claiming to have initialized here is a
-/// // bunch of `MaybeUninit`s, which do not require initialization.
-/// let mut data: [MaybeUninit<String>; 1000] = unsafe { MaybeUninit::uninit().assume_init() };
+/// // Create an uninitialized array of `MaybeUninit`.
+/// let mut data: [MaybeUninit<String>; 1000] = [const { MaybeUninit::uninit() }; 1000];
 /// // Count the number of elements we have assigned.
 /// let mut data_len: usize = 0;
 ///
@@ -280,6 +274,8 @@ impl<T> MaybeUninit<T> {
     /// use std::mem::MaybeUninit;
     ///
     /// let v: MaybeUninit<Vec<u8>> = MaybeUninit::new(vec![42]);
+    /// # // Prevent leaks for Miri
+    /// # unsafe { let _ = MaybeUninit::assume_init(v); }
     /// ```
     ///
     /// [`assume_init`]: MaybeUninit::assume_init
@@ -348,8 +344,7 @@ impl<T> MaybeUninit<T> {
     #[must_use]
     #[inline(always)]
     pub const fn uninit_array<const N: usize>() -> [Self; N] {
-        // SAFETY: An uninitialized `[MaybeUninit<_>; LEN]` is valid.
-        unsafe { MaybeUninit::<[MaybeUninit<T>; N]>::uninit().assume_init() }
+        [const { MaybeUninit::uninit() }; N]
     }
 
     /// Creates a new `MaybeUninit<T>` in an uninitialized state, with the memory being
@@ -453,6 +448,9 @@ impl<T> MaybeUninit<T> {
     /// let mut x = MaybeUninit::<String>::uninit();
     ///
     /// x.write("Hello".to_string());
+    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
+    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
+    /// # unsafe { MaybeUninit::assume_init_drop(&mut x); }
     /// // This leaks the contained string:
     /// x.write("hello".to_string());
     /// // x is initialized now:
@@ -513,6 +511,8 @@ impl<T> MaybeUninit<T> {
     /// // Create a reference into the `MaybeUninit<T>`. This is okay because we initialized it.
     /// let x_vec = unsafe { &*x.as_ptr() };
     /// assert_eq!(x_vec.len(), 3);
+    /// # // Prevent leaks for Miri
+    /// # unsafe { MaybeUninit::assume_init_drop(&mut x); }
     /// ```
     ///
     /// *Incorrect* usage of this method:
@@ -552,6 +552,8 @@ impl<T> MaybeUninit<T> {
     /// let x_vec = unsafe { &mut *x.as_mut_ptr() };
     /// x_vec.push(3);
     /// assert_eq!(x_vec.len(), 4);
+    /// # // Prevent leaks for Miri
+    /// # unsafe { MaybeUninit::assume_init_drop(&mut x); }
     /// ```
     ///
     /// *Incorrect* usage of this method:
@@ -753,6 +755,8 @@ impl<T> MaybeUninit<T> {
     /// use std::mem::MaybeUninit;
     ///
     /// let mut x = MaybeUninit::<Vec<u32>>::uninit();
+    /// # let mut x_mu = x;
+    /// # let mut x = &mut x_mu;
     /// // Initialize `x`:
     /// x.write(vec![1, 2, 3]);
     /// // Now that our `MaybeUninit<_>` is known to be initialized, it is okay to
@@ -762,6 +766,8 @@ impl<T> MaybeUninit<T> {
     ///     x.assume_init_ref()
     /// };
     /// assert_eq!(x, &vec![1, 2, 3]);
+    /// # // Prevent leaks for Miri
+    /// # unsafe { MaybeUninit::assume_init_drop(&mut x_mu); }
     /// ```
     ///
     /// ### *Incorrect* usages of this method:
@@ -924,11 +930,10 @@ impl<T> MaybeUninit<T> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(maybe_uninit_uninit_array)]
     /// #![feature(maybe_uninit_array_assume_init)]
     /// use std::mem::MaybeUninit;
     ///
-    /// let mut array: [MaybeUninit<i32>; 3] = MaybeUninit::uninit_array();
+    /// let mut array: [MaybeUninit<i32>; 3] = [MaybeUninit::uninit(); 3];
     /// array[0].write(0);
     /// array[1].write(1);
     /// array[2].write(2);
@@ -1096,6 +1101,8 @@ impl<T> MaybeUninit<T> {
     /// let init = MaybeUninit::clone_from_slice(&mut dst, &src);
     ///
     /// assert_eq!(init, src);
+    /// # // Prevent leaks for Miri
+    /// # unsafe { std::ptr::drop_in_place(init); }
     /// ```
     ///
     /// ```
diff --git a/core/src/mem/mod.rs b/core/src/mem/mod.rs
index 9054ade2d7968..dd4b6e823434e 100644
--- a/core/src/mem/mod.rs
+++ b/core/src/mem/mod.rs
@@ -1266,6 +1266,20 @@ impl<T> SizedTypeProperties for T {}
 /// // ^^^ error[E0616]: field `private` of struct `Struct` is private
 /// ```
 ///
+/// Only [`Sized`] fields are supported, but the container may be unsized:
+/// ```
+/// # use core::mem;
+/// #[repr(C)]
+/// pub struct Struct {
+///     a: u8,
+///     b: [u8],
+/// }
+///
+/// assert_eq!(mem::offset_of!(Struct, a), 0); // OK
+/// // assert_eq!(mem::offset_of!(Struct, b), 1);
+/// // ^^^ error[E0277]: doesn't have a size known at compile-time
+/// ```
+///
 /// Note that type layout is, in general, [subject to change and
 /// platform-specific](https://doc.rust-lang.org/reference/type-layout.html). If
 /// layout stability is required, consider using an [explicit `repr` attribute].
diff --git a/core/src/net/display_buffer.rs b/core/src/net/display_buffer.rs
index b7e778605fc0a..6619c85f483ef 100644
--- a/core/src/net/display_buffer.rs
+++ b/core/src/net/display_buffer.rs
@@ -11,7 +11,7 @@ pub struct DisplayBuffer<const SIZE: usize> {
 impl<const SIZE: usize> DisplayBuffer<SIZE> {
     #[inline]
     pub const fn new() -> Self {
-        Self { buf: MaybeUninit::uninit_array(), len: 0 }
+        Self { buf: [MaybeUninit::uninit(); SIZE], len: 0 }
     }
 
     #[inline]
diff --git a/core/src/net/ip_addr.rs b/core/src/net/ip_addr.rs
index 959c3289affbf..c11a508a135b3 100644
--- a/core/src/net/ip_addr.rs
+++ b/core/src/net/ip_addr.rs
@@ -406,7 +406,7 @@ impl IpAddr {
         matches!(self, IpAddr::V6(_))
     }
 
-    /// Converts this address to an `IpAddr::V4` if it is an IPv4-mapped IPv6 addresses, otherwise it
+    /// Converts this address to an `IpAddr::V4` if it is an IPv4-mapped IPv6 address, otherwise it
     /// returns `self` as-is.
     ///
     /// # Examples
@@ -460,12 +460,11 @@ impl Ipv4Addr {
     /// # Examples
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv4Addr;
     ///
     /// assert_eq!(Ipv4Addr::BITS, 32);
     /// ```
-    #[unstable(feature = "ip_bits", issue = "113744")]
+    #[stable(feature = "ip_bits", since = "1.80.0")]
     pub const BITS: u32 = 32;
 
     /// Converts an IPv4 address into a `u32` representation using native byte order.
@@ -479,7 +478,6 @@ impl Ipv4Addr {
     /// # Examples
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv4Addr;
     ///
     /// let addr = Ipv4Addr::new(0x12, 0x34, 0x56, 0x78);
@@ -487,7 +485,6 @@ impl Ipv4Addr {
     /// ```
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv4Addr;
     ///
     /// let addr = Ipv4Addr::new(0x12, 0x34, 0x56, 0x78);
@@ -495,8 +492,8 @@ impl Ipv4Addr {
     /// assert_eq!(Ipv4Addr::new(0x12, 0x34, 0x56, 0x00), Ipv4Addr::from_bits(addr_bits));
     ///
     /// ```
-    #[rustc_const_unstable(feature = "ip_bits", issue = "113744")]
-    #[unstable(feature = "ip_bits", issue = "113744")]
+    #[rustc_const_stable(feature = "ip_bits", since = "1.80.0")]
+    #[stable(feature = "ip_bits", since = "1.80.0")]
     #[must_use]
     #[inline]
     pub const fn to_bits(self) -> u32 {
@@ -510,14 +507,13 @@ impl Ipv4Addr {
     /// # Examples
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv4Addr;
     ///
     /// let addr = Ipv4Addr::from(0x12345678);
     /// assert_eq!(Ipv4Addr::new(0x12, 0x34, 0x56, 0x78), addr);
     /// ```
-    #[rustc_const_unstable(feature = "ip_bits", issue = "113744")]
-    #[unstable(feature = "ip_bits", issue = "113744")]
+    #[rustc_const_stable(feature = "ip_bits", since = "1.80.0")]
+    #[stable(feature = "ip_bits", since = "1.80.0")]
     #[must_use]
     #[inline]
     pub const fn from_bits(bits: u32) -> Ipv4Addr {
@@ -1238,12 +1234,11 @@ impl Ipv6Addr {
     /// # Examples
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv6Addr;
     ///
     /// assert_eq!(Ipv6Addr::BITS, 128);
     /// ```
-    #[unstable(feature = "ip_bits", issue = "113744")]
+    #[stable(feature = "ip_bits", since = "1.80.0")]
     pub const BITS: u32 = 128;
 
     /// Converts an IPv6 address into a `u128` representation using native byte order.
@@ -1257,7 +1252,6 @@ impl Ipv6Addr {
     /// # Examples
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv6Addr;
     ///
     /// let addr = Ipv6Addr::new(
@@ -1268,7 +1262,6 @@ impl Ipv6Addr {
     /// ```
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv6Addr;
     ///
     /// let addr = Ipv6Addr::new(
@@ -1284,8 +1277,8 @@ impl Ipv6Addr {
     ///     Ipv6Addr::from_bits(addr_bits));
     ///
     /// ```
-    #[rustc_const_unstable(feature = "ip_bits", issue = "113744")]
-    #[unstable(feature = "ip_bits", issue = "113744")]
+    #[rustc_const_stable(feature = "ip_bits", since = "1.80.0")]
+    #[stable(feature = "ip_bits", since = "1.80.0")]
     #[must_use]
     #[inline]
     pub const fn to_bits(self) -> u128 {
@@ -1299,7 +1292,6 @@ impl Ipv6Addr {
     /// # Examples
     ///
     /// ```
-    /// #![feature(ip_bits)]
     /// use std::net::Ipv6Addr;
     ///
     /// let addr = Ipv6Addr::from(0x102030405060708090A0B0C0D0E0F00D_u128);
@@ -1310,8 +1302,8 @@ impl Ipv6Addr {
     ///     ),
     ///     addr);
     /// ```
-    #[rustc_const_unstable(feature = "ip_bits", issue = "113744")]
-    #[unstable(feature = "ip_bits", issue = "113744")]
+    #[rustc_const_stable(feature = "ip_bits", since = "1.80.0")]
+    #[stable(feature = "ip_bits", since = "1.80.0")]
     #[must_use]
     #[inline]
     pub const fn from_bits(bits: u128) -> Ipv6Addr {
@@ -1887,7 +1879,7 @@ impl Ipv6Addr {
         }
     }
 
-    /// Converts this address to an `IpAddr::V4` if it is an IPv4-mapped addresses, otherwise it
+    /// Converts this address to an `IpAddr::V4` if it is an IPv4-mapped address, otherwise it
     /// returns self wrapped in an `IpAddr::V6`.
     ///
     /// # Examples
diff --git a/core/src/num/dec2flt/common.rs b/core/src/num/dec2flt/common.rs
index 11a626485191c..c85727b493816 100644
--- a/core/src/num/dec2flt/common.rs
+++ b/core/src/num/dec2flt/common.rs
@@ -39,9 +39,7 @@ impl ByteSlice for [u8] {
     fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self {
         let mut s = self;
 
-        // FIXME: Can't use s.split_first() here yet,
-        // see https://github.com/rust-lang/rust/issues/109328
-        while let [c, s_next @ ..] = s {
+        while let Some((c, s_next)) = s.split_first() {
             let c = c.wrapping_sub(b'0');
             if c < 10 {
                 func(c);
diff --git a/core/src/num/dec2flt/lemire.rs b/core/src/num/dec2flt/lemire.rs
index 3bc052df7a6c1..01642e1b1112a 100644
--- a/core/src/num/dec2flt/lemire.rs
+++ b/core/src/num/dec2flt/lemire.rs
@@ -157,7 +157,7 @@ fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) {
         // Need to do a second multiplication to get better precision
         // for the lower product. This will always be exact
         // where q is < 55, since 5^55 < 2^128. If this wraps,
-        // then we need to need to round up the hi product.
+        // then we need to round up the hi product.
         let (_, second_hi) = full_multiplication(w, hi5);
         first_lo = first_lo.wrapping_add(second_hi);
         if second_hi > first_lo {
diff --git a/core/src/num/dec2flt/mod.rs b/core/src/num/dec2flt/mod.rs
index a4bc8b1c9b0c3..9aac2332dce0d 100644
--- a/core/src/num/dec2flt/mod.rs
+++ b/core/src/num/dec2flt/mod.rs
@@ -250,8 +250,10 @@ pub fn dec2flt<F: RawFloat>(s: &str) -> Result<F, ParseFloatError> {
         None => return Err(pfe_invalid()),
     };
     num.negative = negative;
-    if let Some(value) = num.try_fast_path::<F>() {
-        return Ok(value);
+    if !cfg!(feature = "optimize_for_size") {
+        if let Some(value) = num.try_fast_path::<F>() {
+            return Ok(value);
+        }
     }
 
     // If significant digits were truncated, then we can have rounding error
diff --git a/core/src/num/dec2flt/parse.rs b/core/src/num/dec2flt/parse.rs
index b0a23835c5bd4..975bb8ad6bc1f 100644
--- a/core/src/num/dec2flt/parse.rs
+++ b/core/src/num/dec2flt/parse.rs
@@ -51,9 +51,7 @@ fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) {
     let mut s = *s_ref;
 
     while *x < MIN_19DIGIT_INT {
-        // FIXME: Can't use s.split_first() here yet,
-        // see https://github.com/rust-lang/rust/issues/109328
-        if let [c, s_next @ ..] = s {
+        if let Some((c, s_next)) = s.split_first() {
             let digit = c.wrapping_sub(b'0');
 
             if digit < 10 {
diff --git a/core/src/num/f128.rs b/core/src/num/f128.rs
index 9362dc8765492..05dc1e97852e0 100644
--- a/core/src/num/f128.rs
+++ b/core/src/num/f128.rs
@@ -11,7 +11,11 @@
 
 #![unstable(feature = "f128", issue = "116909")]
 
+use crate::convert::FloatToInt;
+#[cfg(not(test))]
+use crate::intrinsics;
 use crate::mem;
+use crate::num::FpCategory;
 
 /// Basic mathematical constants.
 #[unstable(feature = "f128", issue = "116909")]
@@ -68,6 +72,13 @@ pub mod consts {
     pub const FRAC_1_SQRT_PI: f128 =
         0.564189583547756286948079451560772585844050629328998856844086_f128;
 
+    /// 1/sqrt(2π)
+    #[doc(alias = "FRAC_1_SQRT_TAU")]
+    #[unstable(feature = "f128", issue = "116909")]
+    // Also, #[unstable(feature = "more_float_constants", issue = "103883")]
+    pub const FRAC_1_SQRT_2PI: f128 =
+        0.398942280401432677939946059934381868475858631164934657665926_f128;
+
     /// 2/π
     #[unstable(feature = "f128", issue = "116909")]
     pub const FRAC_2_PI: f128 = 0.636619772367581343075535053490057448137838582961825794990669_f128;
@@ -159,7 +170,7 @@ impl f128 {
     /// [Machine epsilon]: https://en.wikipedia.org/wiki/Machine_epsilon
     /// [`MANTISSA_DIGITS`]: f128::MANTISSA_DIGITS
     #[unstable(feature = "f128", issue = "116909")]
-    pub const EPSILON: f128 = 1.92592994438723585305597794258492731e-34_f128;
+    pub const EPSILON: f128 = 1.92592994438723585305597794258492732e-34_f128;
 
     /// Smallest finite `f128` value.
     ///
@@ -167,7 +178,7 @@ impl f128 {
     ///
     /// [`MAX`]: f128::MAX
     #[unstable(feature = "f128", issue = "116909")]
-    pub const MIN: f128 = -1.18973149535723176508575932662800701e+4932_f128;
+    pub const MIN: f128 = -1.18973149535723176508575932662800702e+4932_f128;
     /// Smallest positive normal `f128` value.
     ///
     /// Equal to 2<sup>[`MIN_EXP`]&nbsp;&minus;&nbsp;1</sup>.
@@ -183,7 +194,7 @@ impl f128 {
     /// [`MANTISSA_DIGITS`]: f128::MANTISSA_DIGITS
     /// [`MAX_EXP`]: f128::MAX_EXP
     #[unstable(feature = "f128", issue = "116909")]
-    pub const MAX: f128 = 1.18973149535723176508575932662800701e+4932_f128;
+    pub const MAX: f128 = 1.18973149535723176508575932662800702e+4932_f128;
 
     /// One greater than the minimum possible normal power of 2 exponent.
     ///
@@ -213,21 +224,264 @@ impl f128 {
     #[unstable(feature = "f128", issue = "116909")]
     pub const MAX_10_EXP: i32 = 4_932;
 
+    /// Not a Number (NaN).
+    ///
+    /// Note that IEEE 754 doesn't define just a single NaN value;
+    /// a plethora of bit patterns are considered to be NaN.
+    /// Furthermore, the standard makes a difference
+    /// between a "signaling" and a "quiet" NaN,
+    /// and allows inspecting its "payload" (the unspecified bits in the bit pattern).
+    /// This constant isn't guaranteed to equal to any specific NaN bitpattern,
+    /// and the stability of its representation over Rust versions
+    /// and target platforms isn't guaranteed.
+    #[cfg(not(bootstrap))]
+    #[allow(clippy::eq_op)]
+    #[rustc_diagnostic_item = "f128_nan"]
+    #[unstable(feature = "f128", issue = "116909")]
+    pub const NAN: f128 = 0.0_f128 / 0.0_f128;
+
+    /// Infinity (∞).
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    pub const INFINITY: f128 = 1.0_f128 / 0.0_f128;
+
+    /// Negative infinity (−∞).
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    pub const NEG_INFINITY: f128 = -1.0_f128 / 0.0_f128;
+
+    /// Sign bit
+    #[cfg(not(bootstrap))]
+    pub(crate) const SIGN_MASK: u128 = 0x8000_0000_0000_0000_0000_0000_0000_0000;
+
+    /// Exponent mask
+    pub(crate) const EXP_MASK: u128 = 0x7fff_0000_0000_0000_0000_0000_0000_0000;
+
+    /// Mantissa mask
+    pub(crate) const MAN_MASK: u128 = 0x0000_ffff_ffff_ffff_ffff_ffff_ffff_ffff;
+
+    /// Minimum representable positive value (min subnormal)
+    #[cfg(not(bootstrap))]
+    const TINY_BITS: u128 = 0x1;
+
+    /// Minimum representable negative value (min negative subnormal)
+    #[cfg(not(bootstrap))]
+    const NEG_TINY_BITS: u128 = Self::TINY_BITS | Self::SIGN_MASK;
+
     /// Returns `true` if this value is NaN.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `unordtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let nan = f128::NAN;
+    /// let f = 7.0_f128;
+    ///
+    /// assert!(nan.is_nan());
+    /// assert!(!f.is_nan());
+    /// # }
+    /// ```
     #[inline]
     #[must_use]
+    #[cfg(not(bootstrap))]
     #[unstable(feature = "f128", issue = "116909")]
     #[allow(clippy::eq_op)] // > if you intended to check if the operand is NaN, use `.is_nan()` instead :)
     pub const fn is_nan(self) -> bool {
         self != self
     }
 
+    // FIXME(#50145): `abs` is publicly unavailable in core due to
+    // concerns about portability, so this implementation is for
+    // private use internally.
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub(crate) const fn abs_private(self) -> f128 {
+        // SAFETY: This transmutation is fine. Probably. For the reasons std is using it.
+        unsafe {
+            mem::transmute::<u128, f128>(mem::transmute::<f128, u128>(self) & !Self::SIGN_MASK)
+        }
+    }
+
+    /// Returns `true` if this value is positive infinity or negative infinity, and
+    /// `false` otherwise.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let f = 7.0f128;
+    /// let inf = f128::INFINITY;
+    /// let neg_inf = f128::NEG_INFINITY;
+    /// let nan = f128::NAN;
+    ///
+    /// assert!(!f.is_infinite());
+    /// assert!(!nan.is_infinite());
+    ///
+    /// assert!(inf.is_infinite());
+    /// assert!(neg_inf.is_infinite());
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_infinite(self) -> bool {
+        (self == f128::INFINITY) | (self == f128::NEG_INFINITY)
+    }
+
+    /// Returns `true` if this number is neither infinite nor NaN.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `lttf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let f = 7.0f128;
+    /// let inf: f128 = f128::INFINITY;
+    /// let neg_inf: f128 = f128::NEG_INFINITY;
+    /// let nan: f128 = f128::NAN;
+    ///
+    /// assert!(f.is_finite());
+    ///
+    /// assert!(!nan.is_finite());
+    /// assert!(!inf.is_finite());
+    /// assert!(!neg_inf.is_finite());
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_finite(self) -> bool {
+        // There's no need to handle NaN separately: if self is NaN,
+        // the comparison is not true, exactly as desired.
+        self.abs_private() < Self::INFINITY
+    }
+
+    /// Returns `true` if the number is [subnormal].
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let min = f128::MIN_POSITIVE; // 3.362103143e-4932f128
+    /// let max = f128::MAX;
+    /// let lower_than_min = 1.0e-4960_f128;
+    /// let zero = 0.0_f128;
+    ///
+    /// assert!(!min.is_subnormal());
+    /// assert!(!max.is_subnormal());
+    ///
+    /// assert!(!zero.is_subnormal());
+    /// assert!(!f128::NAN.is_subnormal());
+    /// assert!(!f128::INFINITY.is_subnormal());
+    /// // Values between `0` and `min` are Subnormal.
+    /// assert!(lower_than_min.is_subnormal());
+    /// # }
+    /// ```
+    ///
+    /// [subnormal]: https://en.wikipedia.org/wiki/Denormal_number
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_subnormal(self) -> bool {
+        matches!(self.classify(), FpCategory::Subnormal)
+    }
+
+    /// Returns `true` if the number is neither zero, infinite, [subnormal], or NaN.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let min = f128::MIN_POSITIVE; // 3.362103143e-4932f128
+    /// let max = f128::MAX;
+    /// let lower_than_min = 1.0e-4960_f128;
+    /// let zero = 0.0_f128;
+    ///
+    /// assert!(min.is_normal());
+    /// assert!(max.is_normal());
+    ///
+    /// assert!(!zero.is_normal());
+    /// assert!(!f128::NAN.is_normal());
+    /// assert!(!f128::INFINITY.is_normal());
+    /// // Values between `0` and `min` are Subnormal.
+    /// assert!(!lower_than_min.is_normal());
+    /// # }
+    /// ```
+    ///
+    /// [subnormal]: https://en.wikipedia.org/wiki/Denormal_number
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_normal(self) -> bool {
+        matches!(self.classify(), FpCategory::Normal)
+    }
+
+    /// Returns the floating point category of the number. If only one property
+    /// is going to be tested, it is generally faster to use the specific
+    /// predicate instead.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// use std::num::FpCategory;
+    ///
+    /// let num = 12.4_f128;
+    /// let inf = f128::INFINITY;
+    ///
+    /// assert_eq!(num.classify(), FpCategory::Normal);
+    /// assert_eq!(inf.classify(), FpCategory::Infinite);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn classify(self) -> FpCategory {
+        // Other float types cannot use a bitwise classify because they may suffer a variety
+        // of errors if the backend chooses to cast to different float types (x87). `f128` cannot
+        // fit into any other float types so this is not a concern, and we rely on bit patterns.
+
+        // SAFETY: POD bitcast, same as in `to_bits`.
+        let bits = unsafe { mem::transmute::<f128, u128>(self) };
+        Self::classify_bits(bits)
+    }
+
+    /// This operates on bits, and only bits, so it can ignore concerns about weird FPUs.
+    /// FIXME(jubilee): In a just world, this would be the entire impl for classify,
+    /// plus a transmute. We do not live in a just world, but we can make it more so.
+    #[inline]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    const fn classify_bits(b: u128) -> FpCategory {
+        match (b & Self::MAN_MASK, b & Self::EXP_MASK) {
+            (0, Self::EXP_MASK) => FpCategory::Infinite,
+            (_, Self::EXP_MASK) => FpCategory::Nan,
+            (0, 0) => FpCategory::Zero,
+            (_, 0) => FpCategory::Subnormal,
+            _ => FpCategory::Normal,
+        }
+    }
+
     /// Returns `true` if `self` has a positive sign, including `+0.0`, NaNs with
     /// positive sign bit and positive infinity. Note that IEEE 754 doesn't assign any
     /// meaning to the sign bit in case of a NaN, and as Rust doesn't guarantee that
     /// the bit pattern of NaNs are conserved over arithmetic operations, the result of
     /// `is_sign_positive` on a NaN might produce an unexpected result in some cases.
-    /// See [explanation of NaN as a special value](f32) for more info.
+    /// See [explanation of NaN as a special value](f128) for more info.
     ///
     /// ```
     /// #![feature(f128)]
@@ -250,7 +504,7 @@ impl f128 {
     /// meaning to the sign bit in case of a NaN, and as Rust doesn't guarantee that
     /// the bit pattern of NaNs are conserved over arithmetic operations, the result of
     /// `is_sign_negative` on a NaN might produce an unexpected result in some cases.
-    /// See [explanation of NaN as a special value](f32) for more info.
+    /// See [explanation of NaN as a special value](f128) for more info.
     ///
     /// ```
     /// #![feature(f128)]
@@ -271,6 +525,222 @@ impl f128 {
         (self.to_bits() & (1 << 127)) != 0
     }
 
+    /// Returns the least number greater than `self`.
+    ///
+    /// Let `TINY` be the smallest representable positive `f128`. Then,
+    ///  - if `self.is_nan()`, this returns `self`;
+    ///  - if `self` is [`NEG_INFINITY`], this returns [`MIN`];
+    ///  - if `self` is `-TINY`, this returns -0.0;
+    ///  - if `self` is -0.0 or +0.0, this returns `TINY`;
+    ///  - if `self` is [`MAX`] or [`INFINITY`], this returns [`INFINITY`];
+    ///  - otherwise the unique least value greater than `self` is returned.
+    ///
+    /// The identity `x.next_up() == -(-x).next_down()` holds for all non-NaN `x`. When `x`
+    /// is finite `x == x.next_up().next_down()` also holds.
+    ///
+    /// ```rust
+    /// #![feature(f128)]
+    /// #![feature(float_next_up_down)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// // f128::EPSILON is the difference between 1.0 and the next number up.
+    /// assert_eq!(1.0f128.next_up(), 1.0 + f128::EPSILON);
+    /// // But not for most numbers.
+    /// assert!(0.1f128.next_up() < 0.1 + f128::EPSILON);
+    /// assert_eq!(4611686018427387904f128.next_up(), 4611686018427387904.000000000000001);
+    /// # }
+    /// ```
+    ///
+    /// [`NEG_INFINITY`]: Self::NEG_INFINITY
+    /// [`INFINITY`]: Self::INFINITY
+    /// [`MIN`]: Self::MIN
+    /// [`MAX`]: Self::MAX
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    // #[unstable(feature = "float_next_up_down", issue = "91399")]
+    pub fn next_up(self) -> Self {
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = bits & !Self::SIGN_MASK;
+        let next_bits = if abs == 0 {
+            Self::TINY_BITS
+        } else if bits == abs {
+            bits + 1
+        } else {
+            bits - 1
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// Returns the greatest number less than `self`.
+    ///
+    /// Let `TINY` be the smallest representable positive `f128`. Then,
+    ///  - if `self.is_nan()`, this returns `self`;
+    ///  - if `self` is [`INFINITY`], this returns [`MAX`];
+    ///  - if `self` is `TINY`, this returns 0.0;
+    ///  - if `self` is -0.0 or +0.0, this returns `-TINY`;
+    ///  - if `self` is [`MIN`] or [`NEG_INFINITY`], this returns [`NEG_INFINITY`];
+    ///  - otherwise the unique greatest value less than `self` is returned.
+    ///
+    /// The identity `x.next_down() == -(-x).next_up()` holds for all non-NaN `x`. When `x`
+    /// is finite `x == x.next_down().next_up()` also holds.
+    ///
+    /// ```rust
+    /// #![feature(f128)]
+    /// #![feature(float_next_up_down)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let x = 1.0f128;
+    /// // Clamp value into range [0, 1).
+    /// let clamped = x.clamp(0.0, 1.0f128.next_down());
+    /// assert!(clamped < 1.0);
+    /// assert_eq!(clamped.next_up(), 1.0);
+    /// # }
+    /// ```
+    ///
+    /// [`NEG_INFINITY`]: Self::NEG_INFINITY
+    /// [`INFINITY`]: Self::INFINITY
+    /// [`MIN`]: Self::MIN
+    /// [`MAX`]: Self::MAX
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    // #[unstable(feature = "float_next_up_down", issue = "91399")]
+    pub fn next_down(self) -> Self {
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = bits & !Self::SIGN_MASK;
+        let next_bits = if abs == 0 {
+            Self::NEG_TINY_BITS
+        } else if bits == abs {
+            bits - 1
+        } else {
+            bits + 1
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// Takes the reciprocal (inverse) of a number, `1/x`.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let x = 2.0_f128;
+    /// let abs_difference = (x.recip() - (1.0 / x)).abs();
+    ///
+    /// assert!(abs_difference <= f128::EPSILON);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub fn recip(self) -> Self {
+        1.0 / self
+    }
+
+    /// Converts radians to degrees.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let angle = std::f128::consts::PI;
+    ///
+    /// let abs_difference = (angle.to_degrees() - 180.0).abs();
+    /// assert!(abs_difference <= f128::EPSILON);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub fn to_degrees(self) -> Self {
+        // Use a literal for better precision.
+        const PIS_IN_180: f128 = 57.2957795130823208767981548141051703324054724665643215491602_f128;
+        self * PIS_IN_180
+    }
+
+    /// Converts degrees to radians.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let angle = 180.0f128;
+    ///
+    /// let abs_difference = (angle.to_radians() - std::f128::consts::PI).abs();
+    ///
+    /// assert!(abs_difference <= 1e-30);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub fn to_radians(self) -> f128 {
+        // Use a literal for better precision.
+        const RADS_PER_DEG: f128 =
+            0.0174532925199432957692369076848861271344287188854172545609719_f128;
+        self * RADS_PER_DEG
+    }
+
+    /// Rounds toward zero and converts to any primitive integer type,
+    /// assuming that the value is finite and fits in that type.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `float*itf` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let value = 4.6_f128;
+    /// let rounded = unsafe { value.to_int_unchecked::<u16>() };
+    /// assert_eq!(rounded, 4);
+    ///
+    /// let value = -128.9_f128;
+    /// let rounded = unsafe { value.to_int_unchecked::<i8>() };
+    /// assert_eq!(rounded, i8::MIN);
+    /// # }
+    /// ```
+    ///
+    /// # Safety
+    ///
+    /// The value must:
+    ///
+    /// * Not be `NaN`
+    /// * Not be infinite
+    /// * Be representable in the return type `Int`, after truncating off its fractional part
+    #[inline]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub unsafe fn to_int_unchecked<Int>(self) -> Int
+    where
+        Self: FloatToInt<Int>,
+    {
+        // SAFETY: the caller must uphold the safety contract for
+        // `FloatToInt::to_int_unchecked`.
+        unsafe { FloatToInt::<Int>::to_int_unchecked(self) }
+    }
+
     /// Raw transmutation to `u128`.
     ///
     /// This is currently identical to `transmute::<f128, u128>(self)` on all platforms.
@@ -280,14 +750,62 @@ impl f128 {
     ///
     /// Note that this function is distinct from `as` casting, which attempts to
     /// preserve the *numeric* value, and not the bitwise value.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    ///
+    /// # // FIXME(f16_f128): enable this once const casting works
+    /// # // assert_ne!((1f128).to_bits(), 1f128 as u128); // to_bits() is not casting!
+    /// assert_eq!((12.5f128).to_bits(), 0x40029000000000000000000000000000);
+    /// ```
     #[inline]
     #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
     #[must_use = "this returns the result of the operation, without modifying the original"]
-    pub fn to_bits(self) -> u128 {
-        // SAFETY: `u128` is a plain old datatype so we can always... uh...
-        // ...look, just pretend you forgot what you just read.
-        // Stability concerns.
-        unsafe { mem::transmute(self) }
+    pub const fn to_bits(self) -> u128 {
+        // SAFETY: `u128` is a plain old datatype so we can always transmute to it.
+        // ...sorta.
+        //
+        // It turns out that at runtime, it is possible for a floating point number
+        // to be subject to a floating point mode that alters nonzero subnormal numbers
+        // to zero on reads and writes, aka "denormals are zero" and "flush to zero".
+        //
+        // And, of course evaluating to a NaN value is fairly nondeterministic.
+        // More precisely: when NaN should be returned is knowable, but which NaN?
+        // So far that's defined by a combination of LLVM and the CPU, not Rust.
+        // This function, however, allows observing the bitstring of a NaN,
+        // thus introspection on CTFE.
+        //
+        // In order to preserve, at least for the moment, const-to-runtime equivalence,
+        // we reject any of these possible situations from happening.
+        #[inline]
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_f128_to_u128(ct: f128) -> u128 {
+            // FIXME(f16_f128): we should use `.classify()` like `f32` and `f64`, but that
+            // is not available on all platforms (needs `netf2` and `unordtf2`). So classify
+            // the bits instead.
+
+            // SAFETY: this is a POD transmutation
+            let bits = unsafe { mem::transmute::<f128, u128>(ct) };
+            match f128::classify_bits(bits) {
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f128::to_bits on a NaN")
+                }
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f128::to_bits on a subnormal number")
+                }
+                FpCategory::Infinite | FpCategory::Normal | FpCategory::Zero => bits,
+            }
+        }
+
+        #[inline(always)] // See https://github.com/rust-lang/compiler-builtins/issues/491
+        fn rt_f128_to_u128(x: f128) -> u128 {
+            // SAFETY: `u128` is a plain old datatype so we can always... uh...
+            // ...look, just pretend you forgot what you just read.
+            // Stability concerns.
+            unsafe { mem::transmute(x) }
+        }
+        intrinsics::const_eval_select((self,), ct_f128_to_u128, rt_f128_to_u128)
     }
 
     /// Raw transmutation from `u128`.
@@ -319,13 +837,381 @@ impl f128 {
     ///
     /// Note that this function is distinct from `as` casting, which attempts to
     /// preserve the *numeric* value, and not the bitwise value.
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// #  // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let v = f128::from_bits(0x40029000000000000000000000000000);
+    /// assert_eq!(v, 12.5);
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_bits(v: u128) -> Self {
+        // It turns out the safety issues with sNaN were overblown! Hooray!
+        // SAFETY: `u128` is a plain old datatype so we can always transmute from it
+        // ...sorta.
+        //
+        // It turns out that at runtime, it is possible for a floating point number
+        // to be subject to floating point modes that alter nonzero subnormal numbers
+        // to zero on reads and writes, aka "denormals are zero" and "flush to zero".
+        // This is not a problem usually, but at least one tier2 platform for Rust
+        // actually exhibits this behavior by default: thumbv7neon
+        // aka "the Neon FPU in AArch32 state"
+        //
+        // And, of course evaluating to a NaN value is fairly nondeterministic.
+        // More precisely: when NaN should be returned is knowable, but which NaN?
+        // So far that's defined by a combination of LLVM and the CPU, not Rust.
+        // This function, however, allows observing the bitstring of a NaN,
+        // thus introspection on CTFE.
+        //
+        // In order to preserve, at least for the moment, const-to-runtime equivalence,
+        // reject any of these possible situations from happening.
+        #[inline]
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_u128_to_f128(ct: u128) -> f128 {
+            match f128::classify_bits(ct) {
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f128::from_bits on a subnormal number")
+                }
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f128::from_bits on NaN")
+                }
+                FpCategory::Infinite | FpCategory::Normal | FpCategory::Zero => {
+                    // SAFETY: It's not a frumious number
+                    unsafe { mem::transmute::<u128, f128>(ct) }
+                }
+            }
+        }
+
+        #[inline(always)] // See https://github.com/rust-lang/compiler-builtins/issues/491
+        fn rt_u128_to_f128(x: u128) -> f128 {
+            // SAFETY: `u128` is a plain old datatype so we can always... uh...
+            // ...look, just pretend you forgot what you just read.
+            // Stability concerns.
+            unsafe { mem::transmute(x) }
+        }
+        intrinsics::const_eval_select((v,), ct_u128_to_f128, rt_u128_to_f128)
+    }
+
+    /// Return the memory representation of this floating point number as a byte array in
+    /// big-endian (network) byte order.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    ///
+    /// let bytes = 12.5f128.to_be_bytes();
+    /// assert_eq!(
+    ///     bytes,
+    ///     [0x40, 0x02, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+    /// );
+    /// ```
+    #[inline]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub const fn to_be_bytes(self) -> [u8; 16] {
+        self.to_bits().to_be_bytes()
+    }
+
+    /// Return the memory representation of this floating point number as a byte array in
+    /// little-endian byte order.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    ///
+    /// let bytes = 12.5f128.to_le_bytes();
+    /// assert_eq!(
+    ///     bytes,
+    ///     [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///      0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x02, 0x40]
+    /// );
+    /// ```
+    #[inline]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub const fn to_le_bytes(self) -> [u8; 16] {
+        self.to_bits().to_le_bytes()
+    }
+
+    /// Return the memory representation of this floating point number as a byte array in
+    /// native byte order.
+    ///
+    /// As the target platform's native endianness is used, portable code
+    /// should use [`to_be_bytes`] or [`to_le_bytes`], as appropriate, instead.
+    ///
+    /// [`to_be_bytes`]: f128::to_be_bytes
+    /// [`to_le_bytes`]: f128::to_le_bytes
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    ///
+    /// let bytes = 12.5f128.to_ne_bytes();
+    /// assert_eq!(
+    ///     bytes,
+    ///     if cfg!(target_endian = "big") {
+    ///         [0x40, 0x02, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+    ///     } else {
+    ///         [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///          0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x02, 0x40]
+    ///     }
+    /// );
+    /// ```
+    #[inline]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub const fn to_ne_bytes(self) -> [u8; 16] {
+        self.to_bits().to_ne_bytes()
+    }
+
+    /// Create a floating point value from its representation as a byte array in big endian.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let value = f128::from_be_bytes(
+    ///     [0x40, 0x02, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+    /// );
+    /// assert_eq!(value, 12.5);
+    /// # }
+    /// ```
     #[inline]
     #[must_use]
     #[unstable(feature = "f128", issue = "116909")]
-    pub fn from_bits(v: u128) -> Self {
-        // SAFETY: `u128 is a plain old datatype so we can always... uh...
-        // ...look, just pretend you forgot what you just read.
-        // Stability concerns.
-        unsafe { mem::transmute(v) }
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_be_bytes(bytes: [u8; 16]) -> Self {
+        Self::from_bits(u128::from_be_bytes(bytes))
+    }
+
+    /// Create a floating point value from its representation as a byte array in little endian.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let value = f128::from_le_bytes(
+    ///     [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///      0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x02, 0x40]
+    /// );
+    /// assert_eq!(value, 12.5);
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_le_bytes(bytes: [u8; 16]) -> Self {
+        Self::from_bits(u128::from_le_bytes(bytes))
+    }
+
+    /// Create a floating point value from its representation as a byte array in native endian.
+    ///
+    /// As the target platform's native endianness is used, portable code
+    /// likely wants to use [`from_be_bytes`] or [`from_le_bytes`], as
+    /// appropriate instead.
+    ///
+    /// [`from_be_bytes`]: f128::from_be_bytes
+    /// [`from_le_bytes`]: f128::from_le_bytes
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `eqtf2` is available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let value = f128::from_ne_bytes(if cfg!(target_endian = "big") {
+    ///     [0x40, 0x02, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+    /// } else {
+    ///     [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ///      0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x02, 0x40]
+    /// });
+    /// assert_eq!(value, 12.5);
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_ne_bytes(bytes: [u8; 16]) -> Self {
+        Self::from_bits(u128::from_ne_bytes(bytes))
+    }
+
+    /// Return the ordering between `self` and `other`.
+    ///
+    /// Unlike the standard partial comparison between floating point numbers,
+    /// this comparison always produces an ordering in accordance to
+    /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
+    /// floating point standard. The values are ordered in the following sequence:
+    ///
+    /// - negative quiet NaN
+    /// - negative signaling NaN
+    /// - negative infinity
+    /// - negative numbers
+    /// - negative subnormal numbers
+    /// - negative zero
+    /// - positive zero
+    /// - positive subnormal numbers
+    /// - positive numbers
+    /// - positive infinity
+    /// - positive signaling NaN
+    /// - positive quiet NaN.
+    ///
+    /// The ordering established by this function does not always agree with the
+    /// [`PartialOrd`] and [`PartialEq`] implementations of `f128`. For example,
+    /// they consider negative and positive zero equal, while `total_cmp`
+    /// doesn't.
+    ///
+    /// The interpretation of the signaling NaN bit follows the definition in
+    /// the IEEE 754 standard, which may not match the interpretation by some of
+    /// the older, non-conformant (e.g. MIPS) hardware implementations.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// #![feature(f128)]
+    ///
+    /// struct GoodBoy {
+    ///     name: &'static str,
+    ///     weight: f128,
+    /// }
+    ///
+    /// let mut bois = vec![
+    ///     GoodBoy { name: "Pucci", weight: 0.1 },
+    ///     GoodBoy { name: "Woofer", weight: 99.0 },
+    ///     GoodBoy { name: "Yapper", weight: 10.0 },
+    ///     GoodBoy { name: "Chonk", weight: f128::INFINITY },
+    ///     GoodBoy { name: "Abs. Unit", weight: f128::NAN },
+    ///     GoodBoy { name: "Floaty", weight: -5.0 },
+    /// ];
+    ///
+    /// bois.sort_by(|a, b| a.weight.total_cmp(&b.weight));
+    ///
+    /// // `f128::NAN` could be positive or negative, which will affect the sort order.
+    /// if f128::NAN.is_sign_negative() {
+    ///     bois.into_iter().map(|b| b.weight)
+    ///         .zip([f128::NAN, -5.0, 0.1, 10.0, 99.0, f128::INFINITY].iter())
+    ///         .for_each(|(a, b)| assert_eq!(a.to_bits(), b.to_bits()))
+    /// } else {
+    ///     bois.into_iter().map(|b| b.weight)
+    ///         .zip([-5.0, 0.1, 10.0, 99.0, f128::INFINITY, f128::NAN].iter())
+    ///         .for_each(|(a, b)| assert_eq!(a.to_bits(), b.to_bits()))
+    /// }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    pub fn total_cmp(&self, other: &Self) -> crate::cmp::Ordering {
+        let mut left = self.to_bits() as i128;
+        let mut right = other.to_bits() as i128;
+
+        // In case of negatives, flip all the bits except the sign
+        // to achieve a similar layout as two's complement integers
+        //
+        // Why does this work? IEEE 754 floats consist of three fields:
+        // Sign bit, exponent and mantissa. The set of exponent and mantissa
+        // fields as a whole have the property that their bitwise order is
+        // equal to the numeric magnitude where the magnitude is defined.
+        // The magnitude is not normally defined on NaN values, but
+        // IEEE 754 totalOrder defines the NaN values also to follow the
+        // bitwise order. This leads to order explained in the doc comment.
+        // However, the representation of magnitude is the same for negative
+        // and positive numbers – only the sign bit is different.
+        // To easily compare the floats as signed integers, we need to
+        // flip the exponent and mantissa bits in case of negative numbers.
+        // We effectively convert the numbers to "two's complement" form.
+        //
+        // To do the flipping, we construct a mask and XOR against it.
+        // We branchlessly calculate an "all-ones except for the sign bit"
+        // mask from negative-signed values: right shifting sign-extends
+        // the integer, so we "fill" the mask with sign bits, and then
+        // convert to unsigned to push one more zero bit.
+        // On positive values, the mask is all zeros, so it's a no-op.
+        left ^= (((left >> 127) as u128) >> 1) as i128;
+        right ^= (((right >> 127) as u128) >> 1) as i128;
+
+        left.cmp(&right)
+    }
+
+    /// Restrict a value to a certain interval unless it is NaN.
+    ///
+    /// Returns `max` if `self` is greater than `max`, and `min` if `self` is
+    /// less than `min`. Otherwise this returns `self`.
+    ///
+    /// Note that this function returns NaN if the initial value was NaN as
+    /// well.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `min > max`, `min` is NaN, or `max` is NaN.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # // FIXME(f16_f128): remove when `{eq,gt,unord}tf` are available
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// assert!((-3.0f128).clamp(-2.0, 1.0) == -2.0);
+    /// assert!((0.0f128).clamp(-2.0, 1.0) == 0.0);
+    /// assert!((2.0f128).clamp(-2.0, 1.0) == 1.0);
+    /// assert!((f128::NAN).clamp(-2.0, 1.0).is_nan());
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[must_use = "method returns a new number and does not mutate the original value"]
+    pub fn clamp(mut self, min: f128, max: f128) -> f128 {
+        assert!(min <= max, "min > max, or either was NaN. min = {min:?}, max = {max:?}");
+        if self < min {
+            self = min;
+        }
+        if self > max {
+            self = max;
+        }
+        self
     }
 }
diff --git a/core/src/num/f16.rs b/core/src/num/f16.rs
index c4d4584544bad..2a8ede9383844 100644
--- a/core/src/num/f16.rs
+++ b/core/src/num/f16.rs
@@ -11,7 +11,11 @@
 
 #![unstable(feature = "f16", issue = "116909")]
 
+use crate::convert::FloatToInt;
+#[cfg(not(test))]
+use crate::intrinsics;
 use crate::mem;
+use crate::num::FpCategory;
 
 /// Basic mathematical constants.
 #[unstable(feature = "f16", issue = "116909")]
@@ -67,6 +71,12 @@ pub mod consts {
     // Also, #[unstable(feature = "more_float_constants", issue = "103883")]
     pub const FRAC_1_SQRT_PI: f16 = 0.564189583547756286948079451560772586_f16;
 
+    /// 1/sqrt(2π)
+    #[doc(alias = "FRAC_1_SQRT_TAU")]
+    #[unstable(feature = "f16", issue = "116909")]
+    // Also, #[unstable(feature = "more_float_constants", issue = "103883")]
+    pub const FRAC_1_SQRT_2PI: f16 = 0.398942280401432677939946059934381868_f16;
+
     /// 2/π
     #[unstable(feature = "f16", issue = "116909")]
     pub const FRAC_2_PI: f16 = 0.636619772367581343075535053490057448_f16;
@@ -209,30 +219,311 @@ impl f16 {
     #[unstable(feature = "f16", issue = "116909")]
     pub const MAX_10_EXP: i32 = 4;
 
+    /// Not a Number (NaN).
+    ///
+    /// Note that IEEE 754 doesn't define just a single NaN value;
+    /// a plethora of bit patterns are considered to be NaN.
+    /// Furthermore, the standard makes a difference
+    /// between a "signaling" and a "quiet" NaN,
+    /// and allows inspecting its "payload" (the unspecified bits in the bit pattern).
+    /// This constant isn't guaranteed to equal to any specific NaN bitpattern,
+    /// and the stability of its representation over Rust versions
+    /// and target platforms isn't guaranteed.
+    #[cfg(not(bootstrap))]
+    #[allow(clippy::eq_op)]
+    #[rustc_diagnostic_item = "f16_nan"]
+    #[unstable(feature = "f16", issue = "116909")]
+    pub const NAN: f16 = 0.0_f16 / 0.0_f16;
+
+    /// Infinity (∞).
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    pub const INFINITY: f16 = 1.0_f16 / 0.0_f16;
+
+    /// Negative infinity (−∞).
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    pub const NEG_INFINITY: f16 = -1.0_f16 / 0.0_f16;
+
+    /// Sign bit
+    #[cfg(not(bootstrap))]
+    pub(crate) const SIGN_MASK: u16 = 0x8000;
+
+    /// Exponent mask
+    pub(crate) const EXP_MASK: u16 = 0x7c00;
+
+    /// Mantissa mask
+    pub(crate) const MAN_MASK: u16 = 0x03ff;
+
+    /// Minimum representable positive value (min subnormal)
+    #[cfg(not(bootstrap))]
+    const TINY_BITS: u16 = 0x1;
+
+    /// Minimum representable negative value (min negative subnormal)
+    #[cfg(not(bootstrap))]
+    const NEG_TINY_BITS: u16 = Self::TINY_BITS | Self::SIGN_MASK;
+
     /// Returns `true` if this value is NaN.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let nan = f16::NAN;
+    /// let f = 7.0_f16;
+    ///
+    /// assert!(nan.is_nan());
+    /// assert!(!f.is_nan());
+    /// # }
+    /// ```
     #[inline]
     #[must_use]
+    #[cfg(not(bootstrap))]
     #[unstable(feature = "f16", issue = "116909")]
     #[allow(clippy::eq_op)] // > if you intended to check if the operand is NaN, use `.is_nan()` instead :)
     pub const fn is_nan(self) -> bool {
         self != self
     }
 
+    // FIXMxE(#50145): `abs` is publicly unavailable in core due to
+    // concerns about portability, so this implementation is for
+    // private use internally.
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub(crate) const fn abs_private(self) -> f16 {
+        // SAFETY: This transmutation is fine. Probably. For the reasons std is using it.
+        unsafe { mem::transmute::<u16, f16>(mem::transmute::<f16, u16>(self) & !Self::SIGN_MASK) }
+    }
+
+    /// Returns `true` if this value is positive infinity or negative infinity, and
+    /// `false` otherwise.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let f = 7.0f16;
+    /// let inf = f16::INFINITY;
+    /// let neg_inf = f16::NEG_INFINITY;
+    /// let nan = f16::NAN;
+    ///
+    /// assert!(!f.is_infinite());
+    /// assert!(!nan.is_infinite());
+    ///
+    /// assert!(inf.is_infinite());
+    /// assert!(neg_inf.is_infinite());
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_infinite(self) -> bool {
+        (self == f16::INFINITY) | (self == f16::NEG_INFINITY)
+    }
+
+    /// Returns `true` if this number is neither infinite nor NaN.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let f = 7.0f16;
+    /// let inf: f16 = f16::INFINITY;
+    /// let neg_inf: f16 = f16::NEG_INFINITY;
+    /// let nan: f16 = f16::NAN;
+    ///
+    /// assert!(f.is_finite());
+    ///
+    /// assert!(!nan.is_finite());
+    /// assert!(!inf.is_finite());
+    /// assert!(!neg_inf.is_finite());
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_finite(self) -> bool {
+        // There's no need to handle NaN separately: if self is NaN,
+        // the comparison is not true, exactly as desired.
+        self.abs_private() < Self::INFINITY
+    }
+
+    /// Returns `true` if the number is [subnormal].
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let min = f16::MIN_POSITIVE; // 6.1035e-5
+    /// let max = f16::MAX;
+    /// let lower_than_min = 1.0e-7_f16;
+    /// let zero = 0.0_f16;
+    ///
+    /// assert!(!min.is_subnormal());
+    /// assert!(!max.is_subnormal());
+    ///
+    /// assert!(!zero.is_subnormal());
+    /// assert!(!f16::NAN.is_subnormal());
+    /// assert!(!f16::INFINITY.is_subnormal());
+    /// // Values between `0` and `min` are Subnormal.
+    /// assert!(lower_than_min.is_subnormal());
+    /// # }
+    /// ```
+    /// [subnormal]: https://en.wikipedia.org/wiki/Denormal_number
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_subnormal(self) -> bool {
+        matches!(self.classify(), FpCategory::Subnormal)
+    }
+
+    /// Returns `true` if the number is neither zero, infinite, [subnormal], or NaN.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let min = f16::MIN_POSITIVE; // 6.1035e-5
+    /// let max = f16::MAX;
+    /// let lower_than_min = 1.0e-7_f16;
+    /// let zero = 0.0_f16;
+    ///
+    /// assert!(min.is_normal());
+    /// assert!(max.is_normal());
+    ///
+    /// assert!(!zero.is_normal());
+    /// assert!(!f16::NAN.is_normal());
+    /// assert!(!f16::INFINITY.is_normal());
+    /// // Values between `0` and `min` are Subnormal.
+    /// assert!(!lower_than_min.is_normal());
+    /// # }
+    /// ```
+    /// [subnormal]: https://en.wikipedia.org/wiki/Denormal_number
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn is_normal(self) -> bool {
+        matches!(self.classify(), FpCategory::Normal)
+    }
+
+    /// Returns the floating point category of the number. If only one property
+    /// is going to be tested, it is generally faster to use the specific
+    /// predicate instead.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// use std::num::FpCategory;
+    ///
+    /// let num = 12.4_f16;
+    /// let inf = f16::INFINITY;
+    ///
+    /// assert_eq!(num.classify(), FpCategory::Normal);
+    /// assert_eq!(inf.classify(), FpCategory::Infinite);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    pub const fn classify(self) -> FpCategory {
+        // A previous implementation for f32/f64 tried to only use bitmask-based checks,
+        // using `to_bits` to transmute the float to its bit repr and match on that.
+        // Unfortunately, floating point numbers can be much worse than that.
+        // This also needs to not result in recursive evaluations of `to_bits`.
+        //
+
+        // Platforms without native support generally convert to `f32` to perform operations,
+        // and most of these platforms correctly round back to `f16` after each operation.
+        // However, some platforms have bugs where they keep the excess `f32` precision (e.g.
+        // WASM, see llvm/llvm-project#96437). This implementation makes a best-effort attempt
+        // to account for that excess precision.
+        if self.is_infinite() {
+            // Thus, a value may compare unequal to infinity, despite having a "full" exponent mask.
+            FpCategory::Infinite
+        } else if self.is_nan() {
+            // And it may not be NaN, as it can simply be an "overextended" finite value.
+            FpCategory::Nan
+        } else {
+            // However, std can't simply compare to zero to check for zero, either,
+            // as correctness requires avoiding equality tests that may be Subnormal == -0.0
+            // because it may be wrong under "denormals are zero" and "flush to zero" modes.
+            // Most of std's targets don't use those, but they are used for thumbv7neon.
+            // So, this does use bitpattern matching for the rest.
+
+            // SAFETY: f16 to u16 is fine. Usually.
+            // If classify has gotten this far, the value is definitely in one of these categories.
+            unsafe { f16::partial_classify(self) }
+        }
+    }
+
+    /// This doesn't actually return a right answer for NaN on purpose,
+    /// seeing as how it cannot correctly discern between a floating point NaN,
+    /// and some normal floating point numbers truncated from an x87 FPU.
+    ///
+    /// # Safety
+    ///
+    /// This requires making sure you call this function for values it answers correctly on,
+    /// otherwise it returns a wrong answer. This is not important for memory safety per se,
+    /// but getting floats correct is important for not accidentally leaking const eval
+    /// runtime-deviating logic which may or may not be acceptable.
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    const unsafe fn partial_classify(self) -> FpCategory {
+        // SAFETY: The caller is not asking questions for which this will tell lies.
+        let b = unsafe { mem::transmute::<f16, u16>(self) };
+        match (b & Self::MAN_MASK, b & Self::EXP_MASK) {
+            (0, Self::EXP_MASK) => FpCategory::Infinite,
+            (0, 0) => FpCategory::Zero,
+            (_, 0) => FpCategory::Subnormal,
+            _ => FpCategory::Normal,
+        }
+    }
+
+    /// This operates on bits, and only bits, so it can ignore concerns about weird FPUs.
+    /// FIXME(jubilee): In a just world, this would be the entire impl for classify,
+    /// plus a transmute. We do not live in a just world, but we can make it more so.
+    #[inline]
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    const fn classify_bits(b: u16) -> FpCategory {
+        match (b & Self::MAN_MASK, b & Self::EXP_MASK) {
+            (0, Self::EXP_MASK) => FpCategory::Infinite,
+            (_, Self::EXP_MASK) => FpCategory::Nan,
+            (0, 0) => FpCategory::Zero,
+            (_, 0) => FpCategory::Subnormal,
+            _ => FpCategory::Normal,
+        }
+    }
+
     /// Returns `true` if `self` has a positive sign, including `+0.0`, NaNs with
     /// positive sign bit and positive infinity. Note that IEEE 754 doesn't assign any
     /// meaning to the sign bit in case of a NaN, and as Rust doesn't guarantee that
     /// the bit pattern of NaNs are conserved over arithmetic operations, the result of
     /// `is_sign_positive` on a NaN might produce an unexpected result in some cases.
-    /// See [explanation of NaN as a special value](f32) for more info.
+    /// See [explanation of NaN as a special value](f16) for more info.
     ///
     /// ```
     /// #![feature(f16)]
+    /// # // FIXME(f16_f128): LLVM crashes on s390x, llvm/llvm-project#50374
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
     ///
     /// let f = 7.0_f16;
     /// let g = -7.0_f16;
     ///
     /// assert!(f.is_sign_positive());
     /// assert!(!g.is_sign_positive());
+    /// # }
     /// ```
     #[inline]
     #[must_use]
@@ -246,16 +537,19 @@ impl f16 {
     /// meaning to the sign bit in case of a NaN, and as Rust doesn't guarantee that
     /// the bit pattern of NaNs are conserved over arithmetic operations, the result of
     /// `is_sign_negative` on a NaN might produce an unexpected result in some cases.
-    /// See [explanation of NaN as a special value](f32) for more info.
+    /// See [explanation of NaN as a special value](f16) for more info.
     ///
     /// ```
     /// #![feature(f16)]
+    /// # // FIXME(f16_f128): LLVM crashes on s390x, llvm/llvm-project#50374
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
     ///
     /// let f = 7.0_f16;
     /// let g = -7.0_f16;
     ///
     /// assert!(!f.is_sign_negative());
     /// assert!(g.is_sign_negative());
+    /// # }
     /// ```
     #[inline]
     #[must_use]
@@ -267,6 +561,220 @@ impl f16 {
         (self.to_bits() & (1 << 15)) != 0
     }
 
+    /// Returns the least number greater than `self`.
+    ///
+    /// Let `TINY` be the smallest representable positive `f16`. Then,
+    ///  - if `self.is_nan()`, this returns `self`;
+    ///  - if `self` is [`NEG_INFINITY`], this returns [`MIN`];
+    ///  - if `self` is `-TINY`, this returns -0.0;
+    ///  - if `self` is -0.0 or +0.0, this returns `TINY`;
+    ///  - if `self` is [`MAX`] or [`INFINITY`], this returns [`INFINITY`];
+    ///  - otherwise the unique least value greater than `self` is returned.
+    ///
+    /// The identity `x.next_up() == -(-x).next_down()` holds for all non-NaN `x`. When `x`
+    /// is finite `x == x.next_up().next_down()` also holds.
+    ///
+    /// ```rust
+    /// #![feature(f16)]
+    /// #![feature(float_next_up_down)]
+    /// # // FIXME(f16_f128): ABI issues on MSVC
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// // f16::EPSILON is the difference between 1.0 and the next number up.
+    /// assert_eq!(1.0f16.next_up(), 1.0 + f16::EPSILON);
+    /// // But not for most numbers.
+    /// assert!(0.1f16.next_up() < 0.1 + f16::EPSILON);
+    /// assert_eq!(4356f16.next_up(), 4360.0);
+    /// # }
+    /// ```
+    ///
+    /// [`NEG_INFINITY`]: Self::NEG_INFINITY
+    /// [`INFINITY`]: Self::INFINITY
+    /// [`MIN`]: Self::MIN
+    /// [`MAX`]: Self::MAX
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    // #[unstable(feature = "float_next_up_down", issue = "91399")]
+    pub fn next_up(self) -> Self {
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = bits & !Self::SIGN_MASK;
+        let next_bits = if abs == 0 {
+            Self::TINY_BITS
+        } else if bits == abs {
+            bits + 1
+        } else {
+            bits - 1
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// Returns the greatest number less than `self`.
+    ///
+    /// Let `TINY` be the smallest representable positive `f16`. Then,
+    ///  - if `self.is_nan()`, this returns `self`;
+    ///  - if `self` is [`INFINITY`], this returns [`MAX`];
+    ///  - if `self` is `TINY`, this returns 0.0;
+    ///  - if `self` is -0.0 or +0.0, this returns `-TINY`;
+    ///  - if `self` is [`MIN`] or [`NEG_INFINITY`], this returns [`NEG_INFINITY`];
+    ///  - otherwise the unique greatest value less than `self` is returned.
+    ///
+    /// The identity `x.next_down() == -(-x).next_up()` holds for all non-NaN `x`. When `x`
+    /// is finite `x == x.next_down().next_up()` also holds.
+    ///
+    /// ```rust
+    /// #![feature(f16)]
+    /// #![feature(float_next_up_down)]
+    /// # // FIXME(f16_f128): ABI issues on MSVC
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let x = 1.0f16;
+    /// // Clamp value into range [0, 1).
+    /// let clamped = x.clamp(0.0, 1.0f16.next_down());
+    /// assert!(clamped < 1.0);
+    /// assert_eq!(clamped.next_up(), 1.0);
+    /// # }
+    /// ```
+    ///
+    /// [`NEG_INFINITY`]: Self::NEG_INFINITY
+    /// [`INFINITY`]: Self::INFINITY
+    /// [`MIN`]: Self::MIN
+    /// [`MAX`]: Self::MAX
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    // #[unstable(feature = "float_next_up_down", issue = "91399")]
+    pub fn next_down(self) -> Self {
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = bits & !Self::SIGN_MASK;
+        let next_bits = if abs == 0 {
+            Self::NEG_TINY_BITS
+        } else if bits == abs {
+            bits - 1
+        } else {
+            bits + 1
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// Takes the reciprocal (inverse) of a number, `1/x`.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # // FIXME(f16_f128): extendhfsf2, truncsfhf2, __gnu_h2f_ieee, __gnu_f2h_ieee missing for many platforms
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let x = 2.0_f16;
+    /// let abs_difference = (x.recip() - (1.0 / x)).abs();
+    ///
+    /// assert!(abs_difference <= f16::EPSILON);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub fn recip(self) -> Self {
+        1.0 / self
+    }
+
+    /// Converts radians to degrees.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # // FIXME(f16_f128): extendhfsf2, truncsfhf2, __gnu_h2f_ieee, __gnu_f2h_ieee missing for many platforms
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let angle = std::f16::consts::PI;
+    ///
+    /// let abs_difference = (angle.to_degrees() - 180.0).abs();
+    /// assert!(abs_difference <= 0.5);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub fn to_degrees(self) -> Self {
+        // Use a literal for better precision.
+        const PIS_IN_180: f16 = 57.2957795130823208767981548141051703_f16;
+        self * PIS_IN_180
+    }
+
+    /// Converts degrees to radians.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # // FIXME(f16_f128): extendhfsf2, truncsfhf2, __gnu_h2f_ieee, __gnu_f2h_ieee missing for many platforms
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let angle = 180.0f16;
+    ///
+    /// let abs_difference = (angle.to_radians() - std::f16::consts::PI).abs();
+    ///
+    /// assert!(abs_difference <= 0.01);
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub fn to_radians(self) -> f16 {
+        // Use a literal for better precision.
+        const RADS_PER_DEG: f16 = 0.017453292519943295769236907684886_f16;
+        self * RADS_PER_DEG
+    }
+
+    /// Rounds toward zero and converts to any primitive integer type,
+    /// assuming that the value is finite and fits in that type.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let value = 4.6_f16;
+    /// let rounded = unsafe { value.to_int_unchecked::<u16>() };
+    /// assert_eq!(rounded, 4);
+    ///
+    /// let value = -128.9_f16;
+    /// let rounded = unsafe { value.to_int_unchecked::<i8>() };
+    /// assert_eq!(rounded, i8::MIN);
+    /// # }
+    /// ```
+    ///
+    /// # Safety
+    ///
+    /// The value must:
+    ///
+    /// * Not be `NaN`
+    /// * Not be infinite
+    /// * Be representable in the return type `Int`, after truncating off its fractional part
+    #[inline]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub unsafe fn to_int_unchecked<Int>(self) -> Int
+    where
+        Self: FloatToInt<Int>,
+    {
+        // SAFETY: the caller must uphold the safety contract for
+        // `FloatToInt::to_int_unchecked`.
+        unsafe { FloatToInt::<Int>::to_int_unchecked(self) }
+    }
+
     /// Raw transmutation to `u16`.
     ///
     /// This is currently identical to `transmute::<f16, u16>(self)` on all platforms.
@@ -276,14 +784,64 @@ impl f16 {
     ///
     /// Note that this function is distinct from `as` casting, which attempts to
     /// preserve the *numeric* value, and not the bitwise value.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// # // FIXME(f16_f128): enable this once const casting works
+    /// # // assert_ne!((1f16).to_bits(), 1f16 as u128); // to_bits() is not casting!
+    /// assert_eq!((12.5f16).to_bits(), 0x4a40);
+    /// # }
+    /// ```
     #[inline]
     #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
     #[must_use = "this returns the result of the operation, without modifying the original"]
-    pub fn to_bits(self) -> u16 {
-        // SAFETY: `u16` is a plain old datatype so we can always... uh...
-        // ...look, just pretend you forgot what you just read.
-        // Stability concerns.
-        unsafe { mem::transmute(self) }
+    pub const fn to_bits(self) -> u16 {
+        // SAFETY: `u16` is a plain old datatype so we can always transmute to it.
+        // ...sorta.
+        //
+        // It turns out that at runtime, it is possible for a floating point number
+        // to be subject to a floating point mode that alters nonzero subnormal numbers
+        // to zero on reads and writes, aka "denormals are zero" and "flush to zero".
+        //
+        // And, of course evaluating to a NaN value is fairly nondeterministic.
+        // More precisely: when NaN should be returned is knowable, but which NaN?
+        // So far that's defined by a combination of LLVM and the CPU, not Rust.
+        // This function, however, allows observing the bitstring of a NaN,
+        // thus introspection on CTFE.
+        //
+        // In order to preserve, at least for the moment, const-to-runtime equivalence,
+        // we reject any of these possible situations from happening.
+        #[inline]
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_f16_to_u16(ct: f16) -> u16 {
+            // FIXME(f16_f128): we should use `.classify()` like `f32` and `f64`, but we don't yet
+            // want to rely on that on all platforms because it is nondeterministic (e.g. x86 has
+            // convention discrepancies calling intrinsics). So just classify the bits instead.
+
+            // SAFETY: this is a POD transmutation
+            let bits = unsafe { mem::transmute::<f16, u16>(ct) };
+            match f16::classify_bits(bits) {
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f16::to_bits on a NaN")
+                }
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f16::to_bits on a subnormal number")
+                }
+                FpCategory::Infinite | FpCategory::Normal | FpCategory::Zero => bits,
+            }
+        }
+
+        #[inline(always)] // See https://github.com/rust-lang/compiler-builtins/issues/491
+        fn rt_f16_to_u16(x: f16) -> u16 {
+            // SAFETY: `u16` is a plain old datatype so we can always... uh...
+            // ...look, just pretend you forgot what you just read.
+            // Stability concerns.
+            unsafe { mem::transmute(x) }
+        }
+        intrinsics::const_eval_select((self,), ct_f16_to_u16, rt_f16_to_u16)
     }
 
     /// Raw transmutation from `u16`.
@@ -315,13 +873,370 @@ impl f16 {
     ///
     /// Note that this function is distinct from `as` casting, which attempts to
     /// preserve the *numeric* value, and not the bitwise value.
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let v = f16::from_bits(0x4a40);
+    /// assert_eq!(v, 12.5);
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_bits(v: u16) -> Self {
+        // It turns out the safety issues with sNaN were overblown! Hooray!
+        // SAFETY: `u16` is a plain old datatype so we can always transmute from it
+        // ...sorta.
+        //
+        // It turns out that at runtime, it is possible for a floating point number
+        // to be subject to floating point modes that alter nonzero subnormal numbers
+        // to zero on reads and writes, aka "denormals are zero" and "flush to zero".
+        // This is not a problem usually, but at least one tier2 platform for Rust
+        // actually exhibits this behavior by default: thumbv7neon
+        // aka "the Neon FPU in AArch32 state"
+        //
+        // And, of course evaluating to a NaN value is fairly nondeterministic.
+        // More precisely: when NaN should be returned is knowable, but which NaN?
+        // So far that's defined by a combination of LLVM and the CPU, not Rust.
+        // This function, however, allows observing the bitstring of a NaN,
+        // thus introspection on CTFE.
+        //
+        // In order to preserve, at least for the moment, const-to-runtime equivalence,
+        // reject any of these possible situations from happening.
+        #[inline]
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_u16_to_f16(ct: u16) -> f16 {
+            match f16::classify_bits(ct) {
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f16::from_bits on a subnormal number")
+                }
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f16::from_bits on NaN")
+                }
+                FpCategory::Infinite | FpCategory::Normal | FpCategory::Zero => {
+                    // SAFETY: It's not a frumious number
+                    unsafe { mem::transmute::<u16, f16>(ct) }
+                }
+            }
+        }
+
+        #[inline(always)] // See https://github.com/rust-lang/compiler-builtins/issues/491
+        fn rt_u16_to_f16(x: u16) -> f16 {
+            // SAFETY: `u16` is a plain old datatype so we can always... uh...
+            // ...look, just pretend you forgot what you just read.
+            // Stability concerns.
+            unsafe { mem::transmute(x) }
+        }
+        intrinsics::const_eval_select((v,), ct_u16_to_f16, rt_u16_to_f16)
+    }
+
+    /// Return the memory representation of this floating point number as a byte array in
+    /// big-endian (network) byte order.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # // FIXME(f16_f128): LLVM crashes on s390x, llvm/llvm-project#50374
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let bytes = 12.5f16.to_be_bytes();
+    /// assert_eq!(bytes, [0x4a, 0x40]);
+    /// # }
+    /// ```
+    #[inline]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub const fn to_be_bytes(self) -> [u8; 2] {
+        self.to_bits().to_be_bytes()
+    }
+
+    /// Return the memory representation of this floating point number as a byte array in
+    /// little-endian byte order.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # // FIXME(f16_f128): LLVM crashes on s390x, llvm/llvm-project#50374
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let bytes = 12.5f16.to_le_bytes();
+    /// assert_eq!(bytes, [0x40, 0x4a]);
+    /// # }
+    /// ```
+    #[inline]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub const fn to_le_bytes(self) -> [u8; 2] {
+        self.to_bits().to_le_bytes()
+    }
+
+    /// Return the memory representation of this floating point number as a byte array in
+    /// native byte order.
+    ///
+    /// As the target platform's native endianness is used, portable code
+    /// should use [`to_be_bytes`] or [`to_le_bytes`], as appropriate, instead.
+    ///
+    /// [`to_be_bytes`]: f16::to_be_bytes
+    /// [`to_le_bytes`]: f16::to_le_bytes
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # // FIXME(f16_f128): LLVM crashes on s390x, llvm/llvm-project#50374
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// let bytes = 12.5f16.to_ne_bytes();
+    /// assert_eq!(
+    ///     bytes,
+    ///     if cfg!(target_endian = "big") {
+    ///         [0x4a, 0x40]
+    ///     } else {
+    ///         [0x40, 0x4a]
+    ///     }
+    /// );
+    /// # }
+    /// ```
+    #[inline]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    #[must_use = "this returns the result of the operation, without modifying the original"]
+    pub const fn to_ne_bytes(self) -> [u8; 2] {
+        self.to_bits().to_ne_bytes()
+    }
+
+    /// Create a floating point value from its representation as a byte array in big endian.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let value = f16::from_be_bytes([0x4a, 0x40]);
+    /// assert_eq!(value, 12.5);
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_be_bytes(bytes: [u8; 2]) -> Self {
+        Self::from_bits(u16::from_be_bytes(bytes))
+    }
+
+    /// Create a floating point value from its representation as a byte array in little endian.
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let value = f16::from_le_bytes([0x40, 0x4a]);
+    /// assert_eq!(value, 12.5);
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_le_bytes(bytes: [u8; 2]) -> Self {
+        Self::from_bits(u16::from_le_bytes(bytes))
+    }
+
+    /// Create a floating point value from its representation as a byte array in native endian.
+    ///
+    /// As the target platform's native endianness is used, portable code
+    /// likely wants to use [`from_be_bytes`] or [`from_le_bytes`], as
+    /// appropriate instead.
+    ///
+    /// [`from_be_bytes`]: f16::from_be_bytes
+    /// [`from_le_bytes`]: f16::from_le_bytes
+    ///
+    /// See [`from_bits`](Self::from_bits) for some discussion of the
+    /// portability of this operation (there are almost no issues).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") {
+    ///     [0x4a, 0x40]
+    /// } else {
+    ///     [0x40, 0x4a]
+    /// });
+    /// assert_eq!(value, 12.5);
+    /// # }
+    /// ```
     #[inline]
     #[must_use]
     #[unstable(feature = "f16", issue = "116909")]
-    pub fn from_bits(v: u16) -> Self {
-        // SAFETY: `u16` is a plain old datatype so we can always... uh...
-        // ...look, just pretend you forgot what you just read.
-        // Stability concerns.
-        unsafe { mem::transmute(v) }
+    #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+    pub const fn from_ne_bytes(bytes: [u8; 2]) -> Self {
+        Self::from_bits(u16::from_ne_bytes(bytes))
+    }
+
+    /// Return the ordering between `self` and `other`.
+    ///
+    /// Unlike the standard partial comparison between floating point numbers,
+    /// this comparison always produces an ordering in accordance to
+    /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
+    /// floating point standard. The values are ordered in the following sequence:
+    ///
+    /// - negative quiet NaN
+    /// - negative signaling NaN
+    /// - negative infinity
+    /// - negative numbers
+    /// - negative subnormal numbers
+    /// - negative zero
+    /// - positive zero
+    /// - positive subnormal numbers
+    /// - positive numbers
+    /// - positive infinity
+    /// - positive signaling NaN
+    /// - positive quiet NaN.
+    ///
+    /// The ordering established by this function does not always agree with the
+    /// [`PartialOrd`] and [`PartialEq`] implementations of `f16`. For example,
+    /// they consider negative and positive zero equal, while `total_cmp`
+    /// doesn't.
+    ///
+    /// The interpretation of the signaling NaN bit follows the definition in
+    /// the IEEE 754 standard, which may not match the interpretation by some of
+    /// the older, non-conformant (e.g. MIPS) hardware implementations.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # // FIXME(f16_f128): extendhfsf2, truncsfhf2, __gnu_h2f_ieee, __gnu_f2h_ieee missing for many platforms
+    /// # #[cfg(all(target_arch = "x86_64", target_os = "linux"))] {
+    ///
+    /// struct GoodBoy {
+    ///     name: &'static str,
+    ///     weight: f16,
+    /// }
+    ///
+    /// let mut bois = vec![
+    ///     GoodBoy { name: "Pucci", weight: 0.1 },
+    ///     GoodBoy { name: "Woofer", weight: 99.0 },
+    ///     GoodBoy { name: "Yapper", weight: 10.0 },
+    ///     GoodBoy { name: "Chonk", weight: f16::INFINITY },
+    ///     GoodBoy { name: "Abs. Unit", weight: f16::NAN },
+    ///     GoodBoy { name: "Floaty", weight: -5.0 },
+    /// ];
+    ///
+    /// bois.sort_by(|a, b| a.weight.total_cmp(&b.weight));
+    ///
+    /// // `f16::NAN` could be positive or negative, which will affect the sort order.
+    /// if f16::NAN.is_sign_negative() {
+    ///     bois.into_iter().map(|b| b.weight)
+    ///         .zip([f16::NAN, -5.0, 0.1, 10.0, 99.0, f16::INFINITY].iter())
+    ///         .for_each(|(a, b)| assert_eq!(a.to_bits(), b.to_bits()))
+    /// } else {
+    ///     bois.into_iter().map(|b| b.weight)
+    ///         .zip([-5.0, 0.1, 10.0, 99.0, f16::INFINITY, f16::NAN].iter())
+    ///         .for_each(|(a, b)| assert_eq!(a.to_bits(), b.to_bits()))
+    /// }
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    pub fn total_cmp(&self, other: &Self) -> crate::cmp::Ordering {
+        let mut left = self.to_bits() as i16;
+        let mut right = other.to_bits() as i16;
+
+        // In case of negatives, flip all the bits except the sign
+        // to achieve a similar layout as two's complement integers
+        //
+        // Why does this work? IEEE 754 floats consist of three fields:
+        // Sign bit, exponent and mantissa. The set of exponent and mantissa
+        // fields as a whole have the property that their bitwise order is
+        // equal to the numeric magnitude where the magnitude is defined.
+        // The magnitude is not normally defined on NaN values, but
+        // IEEE 754 totalOrder defines the NaN values also to follow the
+        // bitwise order. This leads to order explained in the doc comment.
+        // However, the representation of magnitude is the same for negative
+        // and positive numbers – only the sign bit is different.
+        // To easily compare the floats as signed integers, we need to
+        // flip the exponent and mantissa bits in case of negative numbers.
+        // We effectively convert the numbers to "two's complement" form.
+        //
+        // To do the flipping, we construct a mask and XOR against it.
+        // We branchlessly calculate an "all-ones except for the sign bit"
+        // mask from negative-signed values: right shifting sign-extends
+        // the integer, so we "fill" the mask with sign bits, and then
+        // convert to unsigned to push one more zero bit.
+        // On positive values, the mask is all zeros, so it's a no-op.
+        left ^= (((left >> 15) as u16) >> 1) as i16;
+        right ^= (((right >> 15) as u16) >> 1) as i16;
+
+        left.cmp(&right)
+    }
+
+    /// Restrict a value to a certain interval unless it is NaN.
+    ///
+    /// Returns `max` if `self` is greater than `max`, and `min` if `self` is
+    /// less than `min`. Otherwise this returns `self`.
+    ///
+    /// Note that this function returns NaN if the initial value was NaN as
+    /// well.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `min > max`, `min` is NaN, or `max` is NaN.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(target_arch = "aarch64")] { // FIXME(f16_F128): rust-lang/rust#123885
+    ///
+    /// assert!((-3.0f16).clamp(-2.0, 1.0) == -2.0);
+    /// assert!((0.0f16).clamp(-2.0, 1.0) == 0.0);
+    /// assert!((2.0f16).clamp(-2.0, 1.0) == 1.0);
+    /// assert!((f16::NAN).clamp(-2.0, 1.0).is_nan());
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[must_use = "method returns a new number and does not mutate the original value"]
+    pub fn clamp(mut self, min: f16, max: f16) -> f16 {
+        assert!(min <= max, "min > max, or either was NaN. min = {min:?}, max = {max:?}");
+        if self < min {
+            self = min;
+        }
+        if self > max {
+            self = max;
+        }
+        self
     }
 }
diff --git a/core/src/num/f32.rs b/core/src/num/f32.rs
index 2e715fb0bdde7..b9c84a66ed138 100644
--- a/core/src/num/f32.rs
+++ b/core/src/num/f32.rs
@@ -327,6 +327,11 @@ pub mod consts {
     #[unstable(feature = "more_float_constants", issue = "103883")]
     pub const FRAC_1_SQRT_PI: f32 = 0.564189583547756286948079451560772586_f32;
 
+    /// 1/sqrt(2π)
+    #[doc(alias = "FRAC_1_SQRT_TAU")]
+    #[unstable(feature = "more_float_constants", issue = "103883")]
+    pub const FRAC_1_SQRT_2PI: f32 = 0.398942280401432677939946059934381868_f32;
+
     /// 2/π
     #[stable(feature = "rust1", since = "1.0.0")]
     pub const FRAC_2_PI: f32 = 0.636619772367581343075535053490057448_f32;
@@ -485,6 +490,21 @@ impl f32 {
     #[stable(feature = "assoc_int_consts", since = "1.43.0")]
     pub const NEG_INFINITY: f32 = -1.0_f32 / 0.0_f32;
 
+    /// Sign bit
+    const SIGN_MASK: u32 = 0x8000_0000;
+
+    /// Exponent mask
+    const EXP_MASK: u32 = 0x7f80_0000;
+
+    /// Mantissa mask
+    const MAN_MASK: u32 = 0x007f_ffff;
+
+    /// Minimum representable positive value (min subnormal)
+    const TINY_BITS: u32 = 0x1;
+
+    /// Minimum representable negative value (min negative subnormal)
+    const NEG_TINY_BITS: u32 = Self::TINY_BITS | Self::SIGN_MASK;
+
     /// Returns `true` if this value is NaN.
     ///
     /// ```
@@ -510,7 +530,7 @@ impl f32 {
     #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
     pub(crate) const fn abs_private(self) -> f32 {
         // SAFETY: This transmutation is fine. Probably. For the reasons std is using it.
-        unsafe { mem::transmute::<u32, f32>(mem::transmute::<f32, u32>(self) & 0x7fff_ffff) }
+        unsafe { mem::transmute::<u32, f32>(mem::transmute::<f32, u32>(self) & !Self::SIGN_MASK) }
     }
 
     /// Returns `true` if this value is positive infinity or negative infinity, and
@@ -677,12 +697,9 @@ impl f32 {
     // runtime-deviating logic which may or may not be acceptable.
     #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
     const unsafe fn partial_classify(self) -> FpCategory {
-        const EXP_MASK: u32 = 0x7f800000;
-        const MAN_MASK: u32 = 0x007fffff;
-
         // SAFETY: The caller is not asking questions for which this will tell lies.
         let b = unsafe { mem::transmute::<f32, u32>(self) };
-        match (b & MAN_MASK, b & EXP_MASK) {
+        match (b & Self::MAN_MASK, b & Self::EXP_MASK) {
             (0, 0) => FpCategory::Zero,
             (_, 0) => FpCategory::Subnormal,
             _ => FpCategory::Normal,
@@ -694,12 +711,9 @@ impl f32 {
     // plus a transmute. We do not live in a just world, but we can make it more so.
     #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
     const fn classify_bits(b: u32) -> FpCategory {
-        const EXP_MASK: u32 = 0x7f800000;
-        const MAN_MASK: u32 = 0x007fffff;
-
-        match (b & MAN_MASK, b & EXP_MASK) {
-            (0, EXP_MASK) => FpCategory::Infinite,
-            (_, EXP_MASK) => FpCategory::Nan,
+        match (b & Self::MAN_MASK, b & Self::EXP_MASK) {
+            (0, Self::EXP_MASK) => FpCategory::Infinite,
+            (_, Self::EXP_MASK) => FpCategory::Nan,
             (0, 0) => FpCategory::Zero,
             (_, 0) => FpCategory::Subnormal,
             _ => FpCategory::Normal,
@@ -782,19 +796,17 @@ impl f32 {
     #[unstable(feature = "float_next_up_down", issue = "91399")]
     #[rustc_const_unstable(feature = "float_next_up_down", issue = "91399")]
     pub const fn next_up(self) -> Self {
-        // We must use strictly integer arithmetic to prevent denormals from
-        // flushing to zero after an arithmetic operation on some platforms.
-        const TINY_BITS: u32 = 0x1; // Smallest positive f32.
-        const CLEAR_SIGN_MASK: u32 = 0x7fff_ffff;
-
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
         let bits = self.to_bits();
         if self.is_nan() || bits == Self::INFINITY.to_bits() {
             return self;
         }
 
-        let abs = bits & CLEAR_SIGN_MASK;
+        let abs = bits & !Self::SIGN_MASK;
         let next_bits = if abs == 0 {
-            TINY_BITS
+            Self::TINY_BITS
         } else if bits == abs {
             bits + 1
         } else {
@@ -832,19 +844,17 @@ impl f32 {
     #[unstable(feature = "float_next_up_down", issue = "91399")]
     #[rustc_const_unstable(feature = "float_next_up_down", issue = "91399")]
     pub const fn next_down(self) -> Self {
-        // We must use strictly integer arithmetic to prevent denormals from
-        // flushing to zero after an arithmetic operation on some platforms.
-        const NEG_TINY_BITS: u32 = 0x8000_0001; // Smallest (in magnitude) negative f32.
-        const CLEAR_SIGN_MASK: u32 = 0x7fff_ffff;
-
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
         let bits = self.to_bits();
         if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() {
             return self;
         }
 
-        let abs = bits & CLEAR_SIGN_MASK;
+        let abs = bits & !Self::SIGN_MASK;
         let next_bits = if abs == 0 {
-            NEG_TINY_BITS
+            Self::NEG_TINY_BITS
         } else if bits == abs {
             bits - 1
         } else {
@@ -901,8 +911,8 @@ impl f32 {
     #[stable(feature = "f32_deg_rad_conversions", since = "1.7.0")]
     #[inline]
     pub fn to_radians(self) -> f32 {
-        let value: f32 = consts::PI;
-        self * (value / 180.0f32)
+        const RADS_PER_DEG: f32 = consts::PI / 180.0;
+        self * RADS_PER_DEG
     }
 
     /// Returns the maximum of the two numbers, ignoring NaN.
@@ -1030,25 +1040,42 @@ impl f32 {
     /// ```
     #[unstable(feature = "num_midpoint", issue = "110840")]
     pub fn midpoint(self, other: f32) -> f32 {
-        const LO: f32 = f32::MIN_POSITIVE * 2.;
-        const HI: f32 = f32::MAX / 2.;
-
-        let (a, b) = (self, other);
-        let abs_a = a.abs_private();
-        let abs_b = b.abs_private();
-
-        if abs_a <= HI && abs_b <= HI {
-            // Overflow is impossible
-            (a + b) / 2.
-        } else if abs_a < LO {
-            // Not safe to halve a
-            a + (b / 2.)
-        } else if abs_b < LO {
-            // Not safe to halve b
-            (a / 2.) + b
-        } else {
-            // Not safe to halve a and b
-            (a / 2.) + (b / 2.)
+        cfg_if! {
+            if #[cfg(any(
+                    target_arch = "x86_64",
+                    target_arch = "aarch64",
+                    all(any(target_arch="riscv32", target_arch= "riscv64"), target_feature="d"),
+                    all(target_arch = "arm", target_feature="vfp2"),
+                    target_arch = "wasm32",
+                    target_arch = "wasm64",
+                ))] {
+                // whitelist the faster implementation to targets that have known good 64-bit float
+                // implementations. Falling back to the branchy code on targets that don't have
+                // 64-bit hardware floats or buggy implementations.
+                // see: https://github.com/rust-lang/rust/pull/121062#issuecomment-2123408114
+                ((f64::from(self) + f64::from(other)) / 2.0) as f32
+            } else {
+                const LO: f32 = f32::MIN_POSITIVE * 2.;
+                const HI: f32 = f32::MAX / 2.;
+
+                let (a, b) = (self, other);
+                let abs_a = a.abs_private();
+                let abs_b = b.abs_private();
+
+                if abs_a <= HI && abs_b <= HI {
+                    // Overflow is impossible
+                    (a + b) / 2.
+                } else if abs_a < LO {
+                    // Not safe to halve a
+                    a + (b / 2.)
+                } else if abs_b < LO {
+                    // Not safe to halve b
+                    (a / 2.) + b
+                } else {
+                    // Not safe to halve a and b
+                    (a / 2.) + (b / 2.)
+                }
+            }
         }
     }
 
diff --git a/core/src/num/f64.rs b/core/src/num/f64.rs
index db8e1f318adba..f8e4555fc44f2 100644
--- a/core/src/num/f64.rs
+++ b/core/src/num/f64.rs
@@ -327,6 +327,11 @@ pub mod consts {
     #[unstable(feature = "more_float_constants", issue = "103883")]
     pub const FRAC_1_SQRT_PI: f64 = 0.564189583547756286948079451560772586_f64;
 
+    /// 1/sqrt(2π)
+    #[doc(alias = "FRAC_1_SQRT_TAU")]
+    #[unstable(feature = "more_float_constants", issue = "103883")]
+    pub const FRAC_1_SQRT_2PI: f64 = 0.398942280401432677939946059934381868_f64;
+
     /// 2/π
     #[stable(feature = "rust1", since = "1.0.0")]
     pub const FRAC_2_PI: f64 = 0.636619772367581343075535053490057448_f64;
@@ -484,6 +489,21 @@ impl f64 {
     #[stable(feature = "assoc_int_consts", since = "1.43.0")]
     pub const NEG_INFINITY: f64 = -1.0_f64 / 0.0_f64;
 
+    /// Sign bit
+    const SIGN_MASK: u64 = 0x8000_0000_0000_0000;
+
+    /// Exponent mask
+    const EXP_MASK: u64 = 0x7ff0_0000_0000_0000;
+
+    /// Mantissa mask
+    const MAN_MASK: u64 = 0x000f_ffff_ffff_ffff;
+
+    /// Minimum representable positive value (min subnormal)
+    const TINY_BITS: u64 = 0x1;
+
+    /// Minimum representable negative value (min negative subnormal)
+    const NEG_TINY_BITS: u64 = Self::TINY_BITS | Self::SIGN_MASK;
+
     /// Returns `true` if this value is NaN.
     ///
     /// ```
@@ -509,9 +529,7 @@ impl f64 {
     #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
     pub(crate) const fn abs_private(self) -> f64 {
         // SAFETY: This transmutation is fine. Probably. For the reasons std is using it.
-        unsafe {
-            mem::transmute::<u64, f64>(mem::transmute::<f64, u64>(self) & 0x7fff_ffff_ffff_ffff)
-        }
+        unsafe { mem::transmute::<u64, f64>(mem::transmute::<f64, u64>(self) & !Self::SIGN_MASK) }
     }
 
     /// Returns `true` if this value is positive infinity or negative infinity, and
@@ -668,13 +686,10 @@ impl f64 {
     // and some normal floating point numbers truncated from an x87 FPU.
     #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
     const unsafe fn partial_classify(self) -> FpCategory {
-        const EXP_MASK: u64 = 0x7ff0000000000000;
-        const MAN_MASK: u64 = 0x000fffffffffffff;
-
         // SAFETY: The caller is not asking questions for which this will tell lies.
         let b = unsafe { mem::transmute::<f64, u64>(self) };
-        match (b & MAN_MASK, b & EXP_MASK) {
-            (0, EXP_MASK) => FpCategory::Infinite,
+        match (b & Self::MAN_MASK, b & Self::EXP_MASK) {
+            (0, Self::EXP_MASK) => FpCategory::Infinite,
             (0, 0) => FpCategory::Zero,
             (_, 0) => FpCategory::Subnormal,
             _ => FpCategory::Normal,
@@ -686,12 +701,9 @@ impl f64 {
     // plus a transmute. We do not live in a just world, but we can make it more so.
     #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
     const fn classify_bits(b: u64) -> FpCategory {
-        const EXP_MASK: u64 = 0x7ff0000000000000;
-        const MAN_MASK: u64 = 0x000fffffffffffff;
-
-        match (b & MAN_MASK, b & EXP_MASK) {
-            (0, EXP_MASK) => FpCategory::Infinite,
-            (_, EXP_MASK) => FpCategory::Nan,
+        match (b & Self::MAN_MASK, b & Self::EXP_MASK) {
+            (0, Self::EXP_MASK) => FpCategory::Infinite,
+            (_, Self::EXP_MASK) => FpCategory::Nan,
             (0, 0) => FpCategory::Zero,
             (_, 0) => FpCategory::Subnormal,
             _ => FpCategory::Normal,
@@ -751,7 +763,7 @@ impl f64 {
         // IEEE754 says: isSignMinus(x) is true if and only if x has negative sign. isSignMinus
         // applies to zeros and NaNs as well.
         // SAFETY: This is just transmuting to get the sign bit, it's fine.
-        unsafe { mem::transmute::<f64, u64>(self) & 0x8000_0000_0000_0000 != 0 }
+        unsafe { mem::transmute::<f64, u64>(self) & Self::SIGN_MASK != 0 }
     }
 
     #[must_use]
@@ -792,19 +804,17 @@ impl f64 {
     #[unstable(feature = "float_next_up_down", issue = "91399")]
     #[rustc_const_unstable(feature = "float_next_up_down", issue = "91399")]
     pub const fn next_up(self) -> Self {
-        // We must use strictly integer arithmetic to prevent denormals from
-        // flushing to zero after an arithmetic operation on some platforms.
-        const TINY_BITS: u64 = 0x1; // Smallest positive f64.
-        const CLEAR_SIGN_MASK: u64 = 0x7fff_ffff_ffff_ffff;
-
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
         let bits = self.to_bits();
         if self.is_nan() || bits == Self::INFINITY.to_bits() {
             return self;
         }
 
-        let abs = bits & CLEAR_SIGN_MASK;
+        let abs = bits & !Self::SIGN_MASK;
         let next_bits = if abs == 0 {
-            TINY_BITS
+            Self::TINY_BITS
         } else if bits == abs {
             bits + 1
         } else {
@@ -842,19 +852,17 @@ impl f64 {
     #[unstable(feature = "float_next_up_down", issue = "91399")]
     #[rustc_const_unstable(feature = "float_next_up_down", issue = "91399")]
     pub const fn next_down(self) -> Self {
-        // We must use strictly integer arithmetic to prevent denormals from
-        // flushing to zero after an arithmetic operation on some platforms.
-        const NEG_TINY_BITS: u64 = 0x8000_0000_0000_0001; // Smallest (in magnitude) negative f64.
-        const CLEAR_SIGN_MASK: u64 = 0x7fff_ffff_ffff_ffff;
-
+        // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing
+        // denormals to zero. This is in general unsound and unsupported, but here
+        // we do our best to still produce the correct result on such targets.
         let bits = self.to_bits();
         if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() {
             return self;
         }
 
-        let abs = bits & CLEAR_SIGN_MASK;
+        let abs = bits & !Self::SIGN_MASK;
         let next_bits = if abs == 0 {
-            NEG_TINY_BITS
+            Self::NEG_TINY_BITS
         } else if bits == abs {
             bits - 1
         } else {
@@ -912,8 +920,8 @@ impl f64 {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn to_radians(self) -> f64 {
-        let value: f64 = consts::PI;
-        self * (value / 180.0)
+        const RADS_PER_DEG: f64 = consts::PI / 180.0;
+        self * RADS_PER_DEG
     }
 
     /// Returns the maximum of the two numbers, ignoring NaN.
diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs
index 77b1039039b1d..d40e02352a1d0 100644
--- a/core/src/num/int_macros.rs
+++ b/core/src/num/int_macros.rs
@@ -183,6 +183,30 @@ macro_rules! int_impl {
             (self as $UnsignedT).trailing_ones()
         }
 
+        /// Returns the bit pattern of `self` reinterpreted as an unsigned integer of the same size.
+        ///
+        /// This produces the same result as an `as` cast, but ensures that the bit-width remains
+        /// the same.
+        ///
+        /// # Examples
+        ///
+        /// Basic usage:
+        ///
+        /// ```
+        /// #![feature(integer_sign_cast)]
+        ///
+        #[doc = concat!("let n = -1", stringify!($SelfT), ";")]
+        ///
+        #[doc = concat!("assert_eq!(n.cast_unsigned(), ", stringify!($UnsignedT), "::MAX);")]
+        /// ```
+        #[unstable(feature = "integer_sign_cast", issue = "125882")]
+        #[must_use = "this returns the result of the operation, \
+                      without modifying the original"]
+        #[inline(always)]
+        pub const fn cast_unsigned(self) -> $UnsignedT {
+            self as $UnsignedT
+        }
+
         /// Shifts the bits to the left by a specified amount, `n`,
         /// wrapping the truncated bits to the end of the resulting integer.
         ///
@@ -460,7 +484,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_add(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_add(rhs);
-            if unlikely!(b) { overflow_panic::add() } else { a }
+            if b { overflow_panic::add() } else { a }
         }
 
         /// Unchecked integer addition. Computes `self + rhs`, assuming overflow
@@ -488,9 +512,19 @@ macro_rules! int_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_add(self, rhs: Self) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_add`.
-            unsafe { intrinsics::unchecked_add(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_add cannot overflow"),
+                (
+                    lhs: $SelfT = self,
+                    rhs: $SelfT = rhs,
+                ) => !lhs.overflowing_add(rhs).1,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_add(self, rhs)
+            }
         }
 
         /// Checked addition with an unsigned integer. Computes `self + rhs`,
@@ -546,7 +580,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_add_unsigned(self, rhs: $UnsignedT) -> Self {
             let (a, b) = self.overflowing_add_unsigned(rhs);
-            if unlikely!(b) { overflow_panic::add() } else { a }
+            if b { overflow_panic::add() } else { a }
         }
 
         /// Checked integer subtraction. Computes `self - rhs`, returning `None` if
@@ -602,7 +636,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_sub(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_sub(rhs);
-            if unlikely!(b) { overflow_panic::sub() } else { a }
+            if b { overflow_panic::sub() } else { a }
         }
 
         /// Unchecked integer subtraction. Computes `self - rhs`, assuming overflow
@@ -630,9 +664,19 @@ macro_rules! int_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_sub(self, rhs: Self) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_sub`.
-            unsafe { intrinsics::unchecked_sub(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_sub cannot overflow"),
+                (
+                    lhs: $SelfT = self,
+                    rhs: $SelfT = rhs,
+                ) => !lhs.overflowing_sub(rhs).1,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_sub(self, rhs)
+            }
         }
 
         /// Checked subtraction with an unsigned integer. Computes `self - rhs`,
@@ -688,7 +732,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_sub_unsigned(self, rhs: $UnsignedT) -> Self {
             let (a, b) = self.overflowing_sub_unsigned(rhs);
-            if unlikely!(b) { overflow_panic::sub() } else { a }
+            if b { overflow_panic::sub() } else { a }
         }
 
         /// Checked integer multiplication. Computes `self * rhs`, returning `None` if
@@ -744,7 +788,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_mul(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_mul(rhs);
-            if unlikely!(b) { overflow_panic::mul() } else { a }
+            if b { overflow_panic::mul() } else { a }
         }
 
         /// Unchecked integer multiplication. Computes `self * rhs`, assuming overflow
@@ -772,9 +816,19 @@ macro_rules! int_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_mul(self, rhs: Self) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_mul`.
-            unsafe { intrinsics::unchecked_mul(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_mul cannot overflow"),
+                (
+                    lhs: $SelfT = self,
+                    rhs: $SelfT = rhs,
+                ) => !lhs.overflowing_mul(rhs).1,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_mul(self, rhs)
+            }
         }
 
         /// Checked integer division. Computes `self / rhs`, returning `None` if `rhs == 0`
@@ -848,7 +902,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_div(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_div(rhs);
-            if unlikely!(b) { overflow_panic::div() } else { a }
+            if b { overflow_panic::div() } else { a }
         }
 
         /// Checked Euclidean division. Computes `self.div_euclid(rhs)`,
@@ -922,7 +976,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_div_euclid(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_div_euclid(rhs);
-            if unlikely!(b) { overflow_panic::div() } else { a }
+            if b { overflow_panic::div() } else { a }
         }
 
         /// Checked integer remainder. Computes `self % rhs`, returning `None` if
@@ -995,7 +1049,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_rem(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_rem(rhs);
-            if unlikely!(b) { overflow_panic::rem() } else { a }
+            if b { overflow_panic::rem() } else { a }
         }
 
         /// Checked Euclidean remainder. Computes `self.rem_euclid(rhs)`, returning `None`
@@ -1068,7 +1122,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_rem_euclid(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_rem_euclid(rhs);
-            if unlikely!(b) { overflow_panic::rem() } else { a }
+            if b { overflow_panic::rem() } else { a }
         }
 
         /// Checked negation. Computes `-self`, returning `None` if `self == MIN`.
@@ -1111,9 +1165,18 @@ macro_rules! int_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_neg(self) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_neg`.
-            unsafe { intrinsics::unchecked_sub(0, self) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_neg cannot overflow"),
+                (
+                    lhs: $SelfT = self,
+                ) => !lhs.overflowing_neg().1,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_sub(0, self)
+            }
         }
 
         /// Strict negation. Computes `-self`, panicking if `self == MIN`.
@@ -1147,7 +1210,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_neg(self) -> Self {
             let (a, b) = self.overflowing_neg();
-            if unlikely!(b) { overflow_panic::neg() } else { a }
+            if b { overflow_panic::neg() } else { a }
         }
 
         /// Checked shift left. Computes `self << rhs`, returning `None` if `rhs` is larger
@@ -1210,7 +1273,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_shl(self, rhs: u32) -> Self {
             let (a, b) = self.overflowing_shl(rhs);
-            if unlikely!(b) { overflow_panic::shl() } else { a }
+            if b { overflow_panic::shl() } else { a }
         }
 
         /// Unchecked shift left. Computes `self << rhs`, assuming that
@@ -1234,9 +1297,18 @@ macro_rules! int_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
-                // SAFETY: the caller must uphold the safety contract for
-                // `unchecked_shl`.
-                unsafe { intrinsics::unchecked_shl(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_shl cannot overflow"),
+                (
+                    rhs: u32 = rhs,
+                ) => rhs < <$ActualT>::BITS,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_shl(self, rhs)
+            }
         }
 
         /// Checked shift right. Computes `self >> rhs`, returning `None` if `rhs` is
@@ -1299,7 +1371,7 @@ macro_rules! int_impl {
         #[track_caller]
         pub const fn strict_shr(self, rhs: u32) -> Self {
             let (a, b) = self.overflowing_shr(rhs);
-            if unlikely!(b) { overflow_panic::shr() } else { a }
+            if b { overflow_panic::shr() } else { a }
         }
 
         /// Unchecked shift right. Computes `self >> rhs`, assuming that
@@ -1323,9 +1395,18 @@ macro_rules! int_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_shr`.
-            unsafe { intrinsics::unchecked_shr(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_shr cannot overflow"),
+                (
+                    rhs: u32 = rhs,
+                ) => rhs < <$ActualT>::BITS,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_shr(self, rhs)
+            }
         }
 
         /// Checked absolute value. Computes `self.abs()`, returning `None` if
@@ -2703,8 +2784,10 @@ macro_rules! int_impl {
         ///
         /// In other words, the result is `self / rhs` rounded to the integer `q`
         /// such that `self >= q * rhs`.
-        /// If `self > 0`, this is equal to round towards zero (the default in Rust);
-        /// if `self < 0`, this is equal to round towards +/- infinity.
+        /// If `self > 0`, this is equal to rounding towards zero (the default in Rust);
+        /// if `self < 0`, this is equal to rounding away from zero (towards +/- infinity).
+        /// If `rhs > 0`, this is equal to rounding towards -infinity;
+        /// if `rhs < 0`, this is equal to rounding towards +infinity.
         ///
         /// # Panics
         ///
@@ -2742,8 +2825,8 @@ macro_rules! int_impl {
         /// Calculates the least nonnegative remainder of `self (mod rhs)`.
         ///
         /// This is done as if by the Euclidean division algorithm -- given
-        /// `r = self.rem_euclid(rhs)`, `self = rhs * self.div_euclid(rhs) + r`, and
-        /// `0 <= r < abs(rhs)`.
+        /// `r = self.rem_euclid(rhs)`, the result satisfies
+        /// `self = rhs * self.div_euclid(rhs) + r` and `0 <= r < abs(rhs)`.
         ///
         /// # Panics
         ///
diff --git a/core/src/num/mod.rs b/core/src/num/mod.rs
index 09a341e4d80ac..034af6a0d5731 100644
--- a/core/src/num/mod.rs
+++ b/core/src/num/mod.rs
@@ -7,6 +7,7 @@ use crate::hint;
 use crate::intrinsics;
 use crate::mem;
 use crate::str::FromStr;
+use crate::ub_checks::assert_unsafe_precondition;
 
 // Used because the `?` operator is not allowed in a const context.
 macro_rules! try_opt {
@@ -482,7 +483,6 @@ impl u8 {
         Self = u8,
         ActualT = u8,
         SignedT = i8,
-        NonZeroT = NonZero<u8>,
         BITS = 8,
         MAX = 255,
         rot = 2,
@@ -1097,7 +1097,6 @@ impl u16 {
         Self = u16,
         ActualT = u16,
         SignedT = i16,
-        NonZeroT = NonZero<u16>,
         BITS = 16,
         MAX = 65535,
         rot = 4,
@@ -1146,7 +1145,6 @@ impl u32 {
         Self = u32,
         ActualT = u32,
         SignedT = i32,
-        NonZeroT = NonZero<u32>,
         BITS = 32,
         MAX = 4294967295,
         rot = 8,
@@ -1170,7 +1168,6 @@ impl u64 {
         Self = u64,
         ActualT = u64,
         SignedT = i64,
-        NonZeroT = NonZero<u64>,
         BITS = 64,
         MAX = 18446744073709551615,
         rot = 12,
@@ -1194,7 +1191,6 @@ impl u128 {
         Self = u128,
         ActualT = u128,
         SignedT = i128,
-        NonZeroT = NonZero<u128>,
         BITS = 128,
         MAX = 340282366920938463463374607431768211455,
         rot = 16,
@@ -1220,7 +1216,6 @@ impl usize {
         Self = usize,
         ActualT = u16,
         SignedT = isize,
-        NonZeroT = NonZero<usize>,
         BITS = 16,
         MAX = 65535,
         rot = 4,
@@ -1245,7 +1240,6 @@ impl usize {
         Self = usize,
         ActualT = u32,
         SignedT = isize,
-        NonZeroT = NonZero<usize>,
         BITS = 32,
         MAX = 4294967295,
         rot = 8,
@@ -1270,7 +1264,6 @@ impl usize {
         Self = usize,
         ActualT = u64,
         SignedT = isize,
-        NonZeroT = NonZero<usize>,
         BITS = 64,
         MAX = 18446744073709551615,
         rot = 12,
diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs
index fcdd983343d62..0c6f06dc017e7 100644
--- a/core/src/num/nonzero.rs
+++ b/core/src/num/nonzero.rs
@@ -33,7 +33,6 @@ use super::{IntErrorKind, ParseIntError};
     reason = "implementation detail which may disappear or be replaced at any time",
     issue = "none"
 )]
-#[const_trait]
 pub unsafe trait ZeroablePrimitive: Sized + Copy + private::Sealed {
     #[doc(hidden)]
     type NonZeroInner: Sized + Copy;
@@ -47,7 +46,6 @@ macro_rules! impl_zeroable_primitive {
                 reason = "implementation detail which may disappear or be replaced at any time",
                 issue = "none"
             )]
-            #[const_trait]
             pub trait Sealed {}
 
             $(
@@ -70,14 +68,14 @@ macro_rules! impl_zeroable_primitive {
                 reason = "implementation detail which may disappear or be replaced at any time",
                 issue = "none"
             )]
-            impl const private::Sealed for $primitive {}
+            impl private::Sealed for $primitive {}
 
             #[unstable(
                 feature = "nonzero_internals",
                 reason = "implementation detail which may disappear or be replaced at any time",
                 issue = "none"
             )]
-            unsafe impl const ZeroablePrimitive for $primitive {
+            unsafe impl ZeroablePrimitive for $primitive {
                 type NonZeroInner = private::$NonZeroInner;
             }
         )+
@@ -517,9 +515,13 @@ macro_rules! nonzero_integer {
             /// ```
             /// # use std::num::NonZero;
             /// #
-            #[doc = concat!("let n = NonZero::<", stringify!($Int), ">::new(", $leading_zeros_test, ").unwrap();")]
+            /// # fn main() { test().unwrap(); }
+            /// # fn test() -> Option<()> {
+            #[doc = concat!("let n = NonZero::<", stringify!($Int), ">::new(", $leading_zeros_test, ")?;")]
             ///
             /// assert_eq!(n.leading_zeros(), 0);
+            /// # Some(())
+            /// # }
             /// ```
             #[stable(feature = "nonzero_leading_trailing_zeros", since = "1.53.0")]
             #[rustc_const_stable(feature = "nonzero_leading_trailing_zeros", since = "1.53.0")]
@@ -545,9 +547,13 @@ macro_rules! nonzero_integer {
             /// ```
             /// # use std::num::NonZero;
             /// #
-            #[doc = concat!("let n = NonZero::<", stringify!($Int), ">::new(0b0101000).unwrap();")]
+            /// # fn main() { test().unwrap(); }
+            /// # fn test() -> Option<()> {
+            #[doc = concat!("let n = NonZero::<", stringify!($Int), ">::new(0b0101000)?;")]
             ///
             /// assert_eq!(n.trailing_zeros(), 3);
+            /// # Some(())
+            /// # }
             /// ```
             #[stable(feature = "nonzero_leading_trailing_zeros", since = "1.53.0")]
             #[rustc_const_stable(feature = "nonzero_leading_trailing_zeros", since = "1.53.0")]
@@ -1051,7 +1057,7 @@ macro_rules! nonzero_integer_signedness_dependent_methods {
             unsafe { Self::new_unchecked(self.get().unchecked_add(other)) }
         }
 
-        /// Returns the smallest power of two greater than or equal to n.
+        /// Returns the smallest power of two greater than or equal to `self`.
         /// Checks for overflow and returns [`None`]
         /// if the next power of two is greater than the type’s maximum value.
         /// As a consequence, the result cannot wrap to zero.
@@ -1101,9 +1107,13 @@ macro_rules! nonzero_integer_signedness_dependent_methods {
         /// ```
         /// # use std::num::NonZero;
         /// #
-        #[doc = concat!("assert_eq!(NonZero::new(7", stringify!($Int), ").unwrap().ilog2(), 2);")]
-        #[doc = concat!("assert_eq!(NonZero::new(8", stringify!($Int), ").unwrap().ilog2(), 3);")]
-        #[doc = concat!("assert_eq!(NonZero::new(9", stringify!($Int), ").unwrap().ilog2(), 3);")]
+        /// # fn main() { test().unwrap(); }
+        /// # fn test() -> Option<()> {
+        #[doc = concat!("assert_eq!(NonZero::new(7", stringify!($Int), ")?.ilog2(), 2);")]
+        #[doc = concat!("assert_eq!(NonZero::new(8", stringify!($Int), ")?.ilog2(), 3);")]
+        #[doc = concat!("assert_eq!(NonZero::new(9", stringify!($Int), ")?.ilog2(), 3);")]
+        /// # Some(())
+        /// # }
         /// ```
         #[stable(feature = "int_log", since = "1.67.0")]
         #[rustc_const_stable(feature = "int_log", since = "1.67.0")]
@@ -1126,9 +1136,13 @@ macro_rules! nonzero_integer_signedness_dependent_methods {
         /// ```
         /// # use std::num::NonZero;
         /// #
-        #[doc = concat!("assert_eq!(NonZero::new(99", stringify!($Int), ").unwrap().ilog10(), 1);")]
-        #[doc = concat!("assert_eq!(NonZero::new(100", stringify!($Int), ").unwrap().ilog10(), 2);")]
-        #[doc = concat!("assert_eq!(NonZero::new(101", stringify!($Int), ").unwrap().ilog10(), 2);")]
+        /// # fn main() { test().unwrap(); }
+        /// # fn test() -> Option<()> {
+        #[doc = concat!("assert_eq!(NonZero::new(99", stringify!($Int), ")?.ilog10(), 1);")]
+        #[doc = concat!("assert_eq!(NonZero::new(100", stringify!($Int), ")?.ilog10(), 2);")]
+        #[doc = concat!("assert_eq!(NonZero::new(101", stringify!($Int), ")?.ilog10(), 2);")]
+        /// # Some(())
+        /// # }
         /// ```
         #[stable(feature = "int_log", since = "1.67.0")]
         #[rustc_const_stable(feature = "int_log", since = "1.67.0")]
@@ -1187,10 +1201,16 @@ macro_rules! nonzero_integer_signedness_dependent_methods {
         /// Basic usage:
         ///
         /// ```
-        #[doc = concat!("let eight = std::num::NonZero::new(8", stringify!($Int), ").unwrap();")]
+        /// # use std::num::NonZero;
+        /// #
+        /// # fn main() { test().unwrap(); }
+        /// # fn test() -> Option<()> {
+        #[doc = concat!("let eight = NonZero::new(8", stringify!($Int), ")?;")]
         /// assert!(eight.is_power_of_two());
-        #[doc = concat!("let ten = std::num::NonZero::new(10", stringify!($Int), ").unwrap();")]
+        #[doc = concat!("let ten = NonZero::new(10", stringify!($Int), ")?;")]
         /// assert!(!ten.is_power_of_two());
+        /// # Some(())
+        /// # }
         /// ```
         #[must_use]
         #[stable(feature = "nonzero_is_power_of_two", since = "1.59.0")]
diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs
index 446d0658c1262..ad72c29758bd7 100644
--- a/core/src/num/uint_macros.rs
+++ b/core/src/num/uint_macros.rs
@@ -3,7 +3,6 @@ macro_rules! uint_impl {
         Self = $SelfT:ty,
         ActualT = $ActualT:ident,
         SignedT = $SignedT:ident,
-        NonZeroT = $NonZeroT:ty,
 
         // There are all for use *only* in doc comments.
         // As such, they're all passed as literals -- passing them as a string
@@ -184,6 +183,30 @@ macro_rules! uint_impl {
             (!self).trailing_zeros()
         }
 
+        /// Returns the bit pattern of `self` reinterpreted as a signed integer of the same size.
+        ///
+        /// This produces the same result as an `as` cast, but ensures that the bit-width remains
+        /// the same.
+        ///
+        /// # Examples
+        ///
+        /// Basic usage:
+        ///
+        /// ```
+        /// #![feature(integer_sign_cast)]
+        ///
+        #[doc = concat!("let n = ", stringify!($SelfT), "::MAX;")]
+        ///
+        #[doc = concat!("assert_eq!(n.cast_signed(), -1", stringify!($SignedT), ");")]
+        /// ```
+        #[unstable(feature = "integer_sign_cast", issue = "125882")]
+        #[must_use = "this returns the result of the operation, \
+                      without modifying the original"]
+        #[inline(always)]
+        pub const fn cast_signed(self) -> $SignedT {
+            self as $SignedT
+        }
+
         /// Shifts the bits to the left by a specified amount, `n`,
         /// wrapping the truncated bits to the end of the resulting integer.
         ///
@@ -431,8 +454,19 @@ macro_rules! uint_impl {
                       without modifying the original"]
         #[inline]
         pub const fn checked_add(self, rhs: Self) -> Option<Self> {
-            let (a, b) = self.overflowing_add(rhs);
-            if unlikely!(b) { None } else { Some(a) }
+            // This used to use `overflowing_add`, but that means it ends up being
+            // a `wrapping_add`, losing some optimization opportunities. Notably,
+            // phrasing it this way helps `.checked_add(1)` optimize to a check
+            // against `MAX` and a `add nuw`.
+            // Per <https://github.com/rust-lang/rust/pull/124114#issuecomment-2066173305>,
+            // LLVM is happy to re-form the intrinsic later if useful.
+
+            if unlikely!(intrinsics::add_with_overflow(self, rhs).1) {
+                None
+            } else {
+                // SAFETY: Just checked it doesn't overflow
+                Some(unsafe { intrinsics::unchecked_add(self, rhs) })
+            }
         }
 
         /// Strict integer addition. Computes `self + rhs`, panicking
@@ -467,7 +501,7 @@ macro_rules! uint_impl {
         #[track_caller]
         pub const fn strict_add(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_add(rhs);
-            if unlikely!(b) { overflow_panic ::add()} else {a}
+            if b { overflow_panic::add() } else { a }
          }
 
         /// Unchecked integer addition. Computes `self + rhs`, assuming overflow
@@ -495,9 +529,19 @@ macro_rules! uint_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_add(self, rhs: Self) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_add`.
-            unsafe { intrinsics::unchecked_add(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_add cannot overflow"),
+                (
+                    lhs: $SelfT = self,
+                    rhs: $SelfT = rhs,
+                ) => !lhs.overflowing_add(rhs).1,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_add(self, rhs)
+            }
         }
 
         /// Checked addition with a signed integer. Computes `self + rhs`,
@@ -559,7 +603,7 @@ macro_rules! uint_impl {
         #[track_caller]
         pub const fn strict_add_signed(self, rhs: $SignedT) -> Self {
             let (a, b) = self.overflowing_add_signed(rhs);
-            if unlikely!(b) { overflow_panic ::add()} else {a}
+            if b { overflow_panic::add() } else { a }
          }
 
         /// Checked integer subtraction. Computes `self - rhs`, returning
@@ -624,7 +668,7 @@ macro_rules! uint_impl {
         #[track_caller]
         pub const fn strict_sub(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_sub(rhs);
-            if unlikely!(b) { overflow_panic ::sub()} else {a}
+            if b { overflow_panic::sub() } else { a }
          }
 
         /// Unchecked integer subtraction. Computes `self - rhs`, assuming overflow
@@ -677,9 +721,19 @@ macro_rules! uint_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_sub(self, rhs: Self) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_sub`.
-            unsafe { intrinsics::unchecked_sub(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_sub cannot overflow"),
+                (
+                    lhs: $SelfT = self,
+                    rhs: $SelfT = rhs,
+                ) => !lhs.overflowing_sub(rhs).1,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_sub(self, rhs)
+            }
         }
 
         /// Checked integer multiplication. Computes `self * rhs`, returning
@@ -735,7 +789,7 @@ macro_rules! uint_impl {
         #[track_caller]
         pub const fn strict_mul(self, rhs: Self) -> Self {
             let (a, b) = self.overflowing_mul(rhs);
-            if unlikely!(b) { overflow_panic ::mul()} else {a}
+            if b { overflow_panic::mul() } else { a }
          }
 
         /// Unchecked integer multiplication. Computes `self * rhs`, assuming overflow
@@ -763,9 +817,19 @@ macro_rules! uint_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_mul(self, rhs: Self) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_mul`.
-            unsafe { intrinsics::unchecked_mul(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_mul cannot overflow"),
+                (
+                    lhs: $SelfT = self,
+                    rhs: $SelfT = rhs,
+                ) => !lhs.overflowing_mul(rhs).1,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_mul(self, rhs)
+            }
         }
 
         /// Checked integer division. Computes `self / rhs`, returning `None`
@@ -1118,9 +1182,12 @@ macro_rules! uint_impl {
         pub const fn checked_ilog(self, base: Self) -> Option<u32> {
             if self <= 0 || base <= 1 {
                 None
+            } else if self < base {
+                Some(0)
             } else {
-                let mut n = 0;
-                let mut r = 1;
+                // Since base >= self, n >= 1
+                let mut n = 1;
+                let mut r = base;
 
                 // Optimization for 128 bit wide integers.
                 if Self::BITS == 128 {
@@ -1159,8 +1226,7 @@ macro_rules! uint_impl {
                       without modifying the original"]
         #[inline]
         pub const fn checked_ilog2(self) -> Option<u32> {
-            // FIXME: Simply use `NonZero::new` once it is actually generic.
-            if let Some(x) = <$NonZeroT>::new(self) {
+            if let Some(x) = NonZero::new(self) {
                 Some(x.ilog2())
             } else {
                 None
@@ -1182,8 +1248,7 @@ macro_rules! uint_impl {
                       without modifying the original"]
         #[inline]
         pub const fn checked_ilog10(self) -> Option<u32> {
-            // FIXME: Simply use `NonZero::new` once it is actually generic.
-            if let Some(x) = <$NonZeroT>::new(self) {
+            if let Some(x) = NonZero::new(self) {
                 Some(x.ilog10())
             } else {
                 None
@@ -1247,7 +1312,7 @@ macro_rules! uint_impl {
         #[track_caller]
         pub const fn strict_neg(self) -> Self {
             let (a, b) = self.overflowing_neg();
-            if unlikely!(b) { overflow_panic::neg() } else { a }
+            if b { overflow_panic::neg() } else { a }
         }
 
         /// Checked shift left. Computes `self << rhs`, returning `None`
@@ -1310,7 +1375,7 @@ macro_rules! uint_impl {
         #[track_caller]
         pub const fn strict_shl(self, rhs: u32) -> Self {
             let (a, b) = self.overflowing_shl(rhs);
-            if unlikely!(b) { overflow_panic::shl() } else { a }
+            if b { overflow_panic::shl() } else { a }
         }
 
         /// Unchecked shift left. Computes `self << rhs`, assuming that
@@ -1334,9 +1399,18 @@ macro_rules! uint_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
-            // SAFETY: the caller must uphold the safety contract for
-            // `unchecked_shl`.
-            unsafe { intrinsics::unchecked_shl(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_shl cannot overflow"),
+                (
+                    rhs: u32 = rhs,
+                ) => rhs < <$ActualT>::BITS,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_shl(self, rhs)
+            }
         }
 
         /// Checked shift right. Computes `self >> rhs`, returning `None`
@@ -1399,7 +1473,7 @@ macro_rules! uint_impl {
         #[track_caller]
         pub const fn strict_shr(self, rhs: u32) -> Self {
             let (a, b) = self.overflowing_shr(rhs);
-            if unlikely!(b) { overflow_panic::shr() } else { a }
+            if b { overflow_panic::shr() } else { a }
         }
 
         /// Unchecked shift right. Computes `self >> rhs`, assuming that
@@ -1423,9 +1497,18 @@ macro_rules! uint_impl {
         #[inline(always)]
         #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
         pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
-                // SAFETY: the caller must uphold the safety contract for
-                // `unchecked_shr`.
-                unsafe { intrinsics::unchecked_shr(self, rhs) }
+            assert_unsafe_precondition!(
+                check_language_ub,
+                concat!(stringify!($SelfT), "::unchecked_shr cannot overflow"),
+                (
+                    rhs: u32 = rhs,
+                ) => rhs < <$ActualT>::BITS,
+            );
+
+            // SAFETY: this is guaranteed to be safe by the caller.
+            unsafe {
+                intrinsics::unchecked_shr(self, rhs)
+            }
         }
 
         /// Checked exponentiation. Computes `self.pow(exp)`, returning `None` if
@@ -2643,7 +2726,7 @@ macro_rules! uint_impl {
         pub const fn div_ceil(self, rhs: Self) -> Self {
             let d = self / rhs;
             let r = self % rhs;
-            if r > 0 && rhs > 0 {
+            if r > 0 {
                 d + 1
             } else {
                 d
@@ -2755,7 +2838,7 @@ macro_rules! uint_impl {
         ///
         /// When return value overflows (i.e., `self > (1 << (N-1))` for type
         /// `uN`), it panics in debug mode and the return value is wrapped to 0 in
-        /// release mode (the only situation in which method can return 0).
+        /// release mode (the only situation in which this method can return 0).
         ///
         /// # Examples
         ///
@@ -2776,7 +2859,7 @@ macro_rules! uint_impl {
             self.one_less_than_next_power_of_two() + 1
         }
 
-        /// Returns the smallest power of two greater than or equal to `n`. If
+        /// Returns the smallest power of two greater than or equal to `self`. If
         /// the next power of two is greater than the type's maximum value,
         /// `None` is returned, otherwise the power of two is wrapped in `Some`.
         ///
diff --git a/core/src/ops/arith.rs b/core/src/ops/arith.rs
index 5e77788d8ea36..133ae04f02618 100644
--- a/core/src/ops/arith.rs
+++ b/core/src/ops/arith.rs
@@ -73,7 +73,6 @@
     append_const_msg
 )]
 #[doc(alias = "+")]
-#[const_trait]
 pub trait Add<Rhs = Self> {
     /// The resulting type after applying the `+` operator.
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -95,8 +94,7 @@ pub trait Add<Rhs = Self> {
 macro_rules! add_impl {
     ($($t:ty)*) => ($(
         #[stable(feature = "rust1", since = "1.0.0")]
-        #[rustc_const_unstable(feature = "const_ops", issue = "90080")]
-        impl const Add for $t {
+        impl Add for $t {
             type Output = $t;
 
             #[inline]
diff --git a/core/src/ops/async_function.rs b/core/src/ops/async_function.rs
index 18bcee5a1c7e0..48d1042d9df4a 100644
--- a/core/src/ops/async_function.rs
+++ b/core/src/ops/async_function.rs
@@ -26,6 +26,7 @@ pub trait AsyncFn<Args: Tuple>: AsyncFnMut<Args> {
 pub trait AsyncFnMut<Args: Tuple>: AsyncFnOnce<Args> {
     /// Future returned by [`AsyncFnMut::async_call_mut`] and [`AsyncFn::async_call`].
     #[unstable(feature = "async_fn_traits", issue = "none")]
+    #[lang = "call_ref_future"]
     type CallRefFuture<'a>: Future<Output = Self::Output>
     where
         Self: 'a;
@@ -46,10 +47,12 @@ pub trait AsyncFnMut<Args: Tuple>: AsyncFnOnce<Args> {
 pub trait AsyncFnOnce<Args: Tuple> {
     /// Future returned by [`AsyncFnOnce::async_call_once`].
     #[unstable(feature = "async_fn_traits", issue = "none")]
+    #[lang = "call_once_future"]
     type CallOnceFuture: Future<Output = Self::Output>;
 
     /// Output type of the called closure's future.
     #[unstable(feature = "async_fn_traits", issue = "none")]
+    #[lang = "async_fn_once_output"]
     type Output;
 
     /// Call the [`AsyncFnOnce`], returning a future which may move out of the called closure.
@@ -143,6 +146,7 @@ mod internal_implementation_detail {
         // `for<'env> fn() -> (&'env T, ...)`. This allows us to represent the binder
         // of the closure's self-capture, and these upvar types will be instantiated with
         // the `'closure_env` region provided to the associated type.
+        #[lang = "async_fn_kind_upvars"]
         type Upvars<'closure_env, Inputs, Upvars, BorrowedUpvarsAsFnPtr>;
     }
 }
diff --git a/core/src/ops/coroutine.rs b/core/src/ops/coroutine.rs
index 6a6c5db1ab115..753f14c6b85ec 100644
--- a/core/src/ops/coroutine.rs
+++ b/core/src/ops/coroutine.rs
@@ -76,6 +76,7 @@ pub trait Coroutine<R = ()> {
     /// values which are allowed to be returned each time a coroutine yields.
     /// For example an iterator-as-a-coroutine would likely have this type as
     /// `T`, the type being iterated over.
+    #[cfg_attr(not(bootstrap), lang = "coroutine_yield")]
     type Yield;
 
     /// The type of value this coroutine returns.
@@ -84,6 +85,7 @@ pub trait Coroutine<R = ()> {
     /// `return` statement or implicitly as the last expression of a coroutine
     /// literal. For example futures would use this as `Result<T, E>` as it
     /// represents a completed future.
+    #[cfg_attr(not(bootstrap), lang = "coroutine_return")]
     type Return;
 
     /// Resumes the execution of this coroutine.
diff --git a/core/src/ops/index_range.rs b/core/src/ops/index_range.rs
index 65bda9177c7be..64214eae377dd 100644
--- a/core/src/ops/index_range.rs
+++ b/core/src/ops/index_range.rs
@@ -1,4 +1,3 @@
-use crate::intrinsics::{unchecked_add, unchecked_sub};
 use crate::iter::{FusedIterator, TrustedLen};
 use crate::num::NonZero;
 use crate::ub_checks;
@@ -46,7 +45,7 @@ impl IndexRange {
     #[inline]
     pub const fn len(&self) -> usize {
         // SAFETY: By invariant, this cannot wrap
-        unsafe { unchecked_sub(self.end, self.start) }
+        unsafe { self.end.unchecked_sub(self.start) }
     }
 
     /// # Safety
@@ -57,7 +56,7 @@ impl IndexRange {
 
         let value = self.start;
         // SAFETY: The range isn't empty, so this cannot overflow
-        self.start = unsafe { unchecked_add(value, 1) };
+        self.start = unsafe { value.unchecked_add(1) };
         value
     }
 
@@ -68,7 +67,7 @@ impl IndexRange {
         debug_assert!(self.start < self.end);
 
         // SAFETY: The range isn't empty, so this cannot overflow
-        let value = unsafe { unchecked_sub(self.end, 1) };
+        let value = unsafe { self.end.unchecked_sub(1) };
         self.end = value;
         value
     }
@@ -83,7 +82,7 @@ impl IndexRange {
         let mid = if n <= self.len() {
             // SAFETY: We just checked that this will be between start and end,
             // and thus the addition cannot overflow.
-            unsafe { unchecked_add(self.start, n) }
+            unsafe { self.start.unchecked_add(n) }
         } else {
             self.end
         };
@@ -102,7 +101,7 @@ impl IndexRange {
         let mid = if n <= self.len() {
             // SAFETY: We just checked that this will be between start and end,
             // and thus the addition cannot overflow.
-            unsafe { unchecked_sub(self.end, n) }
+            unsafe { self.end.unchecked_sub(n) }
         } else {
             self.start
         };
diff --git a/core/src/ops/try_trait.rs b/core/src/ops/try_trait.rs
index 483f55b207093..cd444c86ed06e 100644
--- a/core/src/ops/try_trait.rs
+++ b/core/src/ops/try_trait.rs
@@ -363,7 +363,9 @@ pub trait Residual<O> {
 }
 
 #[unstable(feature = "pub_crate_should_not_need_unstable_attr", issue = "none")]
-pub(crate) type ChangeOutputType<T, V> = <<T as Try>::Residual as Residual<V>>::TryType;
+#[allow(type_alias_bounds)]
+pub(crate) type ChangeOutputType<T: Try<Residual: Residual<V>>, V> =
+    <T::Residual as Residual<V>>::TryType;
 
 /// An adapter for implementing non-try methods via the `Try` implementation.
 ///
diff --git a/core/src/option.rs b/core/src/option.rs
index 1e3ed0f7c49f1..8ec7716012f59 100644
--- a/core/src/option.rs
+++ b/core/src/option.rs
@@ -137,10 +137,13 @@
 //!
 //! [^extern_fn]: this remains true for any argument/return types and any other ABI: `extern "abi" fn` (_e.g._, `extern "system" fn`)
 //!
+//! Under some conditions the above types `T` are also null pointer optimized when wrapped in a [`Result`][result_repr].
+//!
 //! [`Box<U>`]: ../../std/boxed/struct.Box.html
 //! [`num::NonZero*`]: crate::num
 //! [`ptr::NonNull<U>`]: crate::ptr::NonNull
 //! [function call ABI]: ../primitive.fn.html#abi-compatibility
+//! [result_repr]: crate::result#representation
 //!
 //! This is called the "null pointer optimization" or NPO.
 //!
@@ -651,6 +654,32 @@ impl<T> Option<T> {
         !self.is_some()
     }
 
+    /// Returns `true` if the option is a [`None`] or the value inside of it matches a predicate.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(is_none_or)]
+    ///
+    /// let x: Option<u32> = Some(2);
+    /// assert_eq!(x.is_none_or(|x| x > 1), true);
+    ///
+    /// let x: Option<u32> = Some(0);
+    /// assert_eq!(x.is_none_or(|x| x > 1), false);
+    ///
+    /// let x: Option<u32> = None;
+    /// assert_eq!(x.is_none_or(|x| x > 1), true);
+    /// ```
+    #[must_use]
+    #[inline]
+    #[unstable(feature = "is_none_or", issue = "126383")]
+    pub fn is_none_or(self, f: impl FnOnce(T) -> bool) -> bool {
+        match self {
+            None => true,
+            Some(x) => f(x),
+        }
+    }
+
     /////////////////////////////////////////////////////////////////////////
     // Adapter for working with references
     /////////////////////////////////////////////////////////////////////////
@@ -768,7 +797,8 @@ impl<T> Option<T> {
     #[inline]
     #[must_use]
     #[stable(feature = "option_as_slice", since = "1.75.0")]
-    pub fn as_slice(&self) -> &[T] {
+    #[rustc_const_unstable(feature = "const_option_ext", issue = "91930")]
+    pub const fn as_slice(&self) -> &[T] {
         // SAFETY: When the `Option` is `Some`, we're using the actual pointer
         // to the payload, with a length of 1, so this is equivalent to
         // `slice::from_ref`, and thus is safe.
@@ -782,7 +812,7 @@ impl<T> Option<T> {
         unsafe {
             slice::from_raw_parts(
                 (self as *const Self).byte_add(core::mem::offset_of!(Self, Some.0)).cast(),
-                usize::from(self.is_some()),
+                self.is_some() as usize,
             )
         }
     }
@@ -822,7 +852,8 @@ impl<T> Option<T> {
     #[inline]
     #[must_use]
     #[stable(feature = "option_as_slice", since = "1.75.0")]
-    pub fn as_mut_slice(&mut self) -> &mut [T] {
+    #[rustc_const_unstable(feature = "const_option_ext", issue = "91930")]
+    pub const fn as_mut_slice(&mut self) -> &mut [T] {
         // SAFETY: When the `Option` is `Some`, we're using the actual pointer
         // to the payload, with a length of 1, so this is equivalent to
         // `slice::from_mut`, and thus is safe.
@@ -838,7 +869,7 @@ impl<T> Option<T> {
         unsafe {
             slice::from_raw_parts_mut(
                 (self as *mut Self).byte_add(core::mem::offset_of!(Self, Some.0)).cast(),
-                usize::from(self.is_some()),
+                self.is_some() as usize,
             )
         }
     }
@@ -1705,8 +1736,6 @@ impl<T> Option<T> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(option_take_if)]
-    ///
     /// let mut x = Some(42);
     ///
     /// let prev = x.take_if(|v| if *v == 42 {
@@ -1723,7 +1752,7 @@ impl<T> Option<T> {
     /// assert_eq!(prev, Some(43));
     /// ```
     #[inline]
-    #[unstable(feature = "option_take_if", issue = "98934")]
+    #[stable(feature = "option_take_if", since = "1.80.0")]
     pub fn take_if<P>(&mut self, predicate: P) -> Option<T>
     where
         P: FnOnce(&mut T) -> bool,
diff --git a/core/src/panic.rs b/core/src/panic.rs
index 8771f40f9b42b..37c338dd9b778 100644
--- a/core/src/panic.rs
+++ b/core/src/panic.rs
@@ -12,6 +12,8 @@ use crate::any::Any;
 pub use self::location::Location;
 #[stable(feature = "panic_hooks", since = "1.10.0")]
 pub use self::panic_info::PanicInfo;
+#[stable(feature = "panic_info_message", since = "CURRENT_RUSTC_VERSION")]
+pub use self::panic_info::PanicMessage;
 #[stable(feature = "catch_unwind", since = "1.9.0")]
 pub use self::unwind_safe::{AssertUnwindSafe, RefUnwindSafe, UnwindSafe};
 
@@ -144,7 +146,7 @@ pub macro unreachable_2021 {
 /// use.
 #[unstable(feature = "std_internals", issue = "none")]
 #[doc(hidden)]
-pub unsafe trait PanicPayload {
+pub unsafe trait PanicPayload: crate::fmt::Display {
     /// Take full ownership of the contents.
     /// The return type is actually `Box<dyn Any + Send>`, but we cannot use `Box` in core.
     ///
@@ -157,4 +159,9 @@ pub unsafe trait PanicPayload {
 
     /// Just borrow the contents.
     fn get(&mut self) -> &(dyn Any + Send);
+
+    /// Try to borrow the contents as `&str`, if possible without doing any allocations.
+    fn as_str(&mut self) -> Option<&str> {
+        None
+    }
 }
diff --git a/core/src/panic/location.rs b/core/src/panic/location.rs
index eb27da1724ec9..8c04994ac0fc4 100644
--- a/core/src/panic/location.rs
+++ b/core/src/panic/location.rs
@@ -2,9 +2,10 @@ use crate::fmt;
 
 /// A struct containing information about the location of a panic.
 ///
-/// This structure is created by [`PanicInfo::location()`].
+/// This structure is created by [`PanicHookInfo::location()`] and [`PanicInfo::location()`].
 ///
 /// [`PanicInfo::location()`]: crate::panic::PanicInfo::location
+/// [`PanicHookInfo::location()`]: ../../std/panic/struct.PanicHookInfo.html#method.location
 ///
 /// # Examples
 ///
diff --git a/core/src/panic/panic_info.rs b/core/src/panic/panic_info.rs
index 403262212580c..6bbb9c3017110 100644
--- a/core/src/panic/panic_info.rs
+++ b/core/src/panic/panic_info.rs
@@ -1,99 +1,65 @@
-use crate::any::Any;
-use crate::fmt;
+use crate::fmt::{self, Display};
 use crate::panic::Location;
 
 /// A struct providing information about a panic.
 ///
-/// `PanicInfo` structure is passed to a panic hook set by the [`set_hook`]
-/// function.
+/// A `PanicInfo` structure is passed to the panic handler defined by `#[panic_handler]`.
 ///
-/// [`set_hook`]: ../../std/panic/fn.set_hook.html
+/// For the type used by the panic hook mechanism in `std`, see [`std::panic::PanicHookInfo`].
 ///
-/// # Examples
-///
-/// ```should_panic
-/// use std::panic;
-///
-/// panic::set_hook(Box::new(|panic_info| {
-///     println!("panic occurred: {panic_info}");
-/// }));
-///
-/// panic!("critical system failure");
-/// ```
+/// [`std::panic::PanicHookInfo`]: ../../std/panic/struct.PanicHookInfo.html
 #[lang = "panic_info"]
 #[stable(feature = "panic_hooks", since = "1.10.0")]
 #[derive(Debug)]
 pub struct PanicInfo<'a> {
-    payload: &'a (dyn Any + Send),
-    message: Option<&'a fmt::Arguments<'a>>,
+    message: fmt::Arguments<'a>,
     location: &'a Location<'a>,
     can_unwind: bool,
     force_no_backtrace: bool,
 }
 
+/// A message that was given to the `panic!()` macro.
+///
+/// The [`Display`] implementation of this type will format the message with the arguments
+/// that were given to the `panic!()` macro.
+///
+/// See [`PanicInfo::message`].
+#[stable(feature = "panic_info_message", since = "CURRENT_RUSTC_VERSION")]
+pub struct PanicMessage<'a> {
+    message: fmt::Arguments<'a>,
+}
+
 impl<'a> PanicInfo<'a> {
-    #[unstable(
-        feature = "panic_internals",
-        reason = "internal details of the implementation of the `panic!` and related macros",
-        issue = "none"
-    )]
-    #[doc(hidden)]
     #[inline]
-    pub fn internal_constructor(
-        message: Option<&'a fmt::Arguments<'a>>,
+    pub(crate) fn new(
+        message: fmt::Arguments<'a>,
         location: &'a Location<'a>,
         can_unwind: bool,
         force_no_backtrace: bool,
     ) -> Self {
-        struct NoPayload;
-        PanicInfo { location, message, payload: &NoPayload, can_unwind, force_no_backtrace }
+        PanicInfo { location, message, can_unwind, force_no_backtrace }
     }
 
-    #[unstable(
-        feature = "panic_internals",
-        reason = "internal details of the implementation of the `panic!` and related macros",
-        issue = "none"
-    )]
-    #[doc(hidden)]
-    #[inline]
-    pub fn set_payload(&mut self, info: &'a (dyn Any + Send)) {
-        self.payload = info;
-    }
-
-    /// Returns the payload associated with the panic.
-    ///
-    /// This will commonly, but not always, be a `&'static str` or [`String`].
+    /// The message that was given to the `panic!` macro.
     ///
-    /// [`String`]: ../../std/string/struct.String.html
+    /// # Example
     ///
-    /// # Examples
-    ///
-    /// ```should_panic
-    /// use std::panic;
+    /// The type returned by this method implements `Display`, so it can
+    /// be passed directly to [`write!()`] and similar macros.
     ///
-    /// panic::set_hook(Box::new(|panic_info| {
-    ///     if let Some(s) = panic_info.payload().downcast_ref::<&str>() {
-    ///         println!("panic occurred: {s:?}");
-    ///     } else {
-    ///         println!("panic occurred");
-    ///     }
-    /// }));
+    /// [`write!()`]: core::write
     ///
-    /// panic!("Normal panic");
+    /// ```ignore (no_std)
+    /// #[panic_handler]
+    /// fn panic_handler(panic_info: &PanicInfo<'_>) -> ! {
+    ///     write!(DEBUG_OUTPUT, "panicked: {}", panic_info.message());
+    ///     loop {}
+    /// }
     /// ```
     #[must_use]
-    #[stable(feature = "panic_hooks", since = "1.10.0")]
-    pub fn payload(&self) -> &(dyn Any + Send) {
-        self.payload
-    }
-
-    /// If the `panic!` macro from the `core` crate (not from `std`)
-    /// was used with a formatting string and some additional arguments,
-    /// returns that message ready to be used for example with [`fmt::write`]
-    #[must_use]
-    #[unstable(feature = "panic_info_message", issue = "66745")]
-    pub fn message(&self) -> Option<&fmt::Arguments<'_>> {
-        self.message
+    #[stable(feature = "panic_info_message", since = "CURRENT_RUSTC_VERSION")]
+    pub fn message(&self) -> PanicMessage<'_> {
+        PanicMessage { message: self.message }
     }
 
     /// Returns information about the location from which the panic originated,
@@ -128,6 +94,24 @@ impl<'a> PanicInfo<'a> {
         Some(&self.location)
     }
 
+    /// Returns the payload associated with the panic.
+    ///
+    /// On this type, `core::panic::PanicInfo`, this method never returns anything useful.
+    /// It only exists because of compatibility with [`std::panic::PanicHookInfo`],
+    /// which used to be the same type.
+    ///
+    /// See [`std::panic::PanicHookInfo::payload`].
+    ///
+    /// [`std::panic::PanicHookInfo`]: ../../std/panic/struct.PanicHookInfo.html
+    /// [`std::panic::PanicHookInfo::payload`]: ../../std/panic/struct.PanicHookInfo.html#method.payload
+    #[deprecated(since = "1.81.0", note = "this never returns anything useful")]
+    #[stable(feature = "panic_hooks", since = "1.10.0")]
+    #[allow(deprecated, deprecated_in_future)]
+    pub fn payload(&self) -> &(dyn crate::any::Any + Send) {
+        struct NoPayload;
+        &NoPayload
+    }
+
     /// Returns whether the panic handler is allowed to unwind the stack from
     /// the point where the panic occurred.
     ///
@@ -157,22 +141,50 @@ impl<'a> PanicInfo<'a> {
 }
 
 #[stable(feature = "panic_hook_display", since = "1.26.0")]
-impl fmt::Display for PanicInfo<'_> {
+impl Display for PanicInfo<'_> {
     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
         formatter.write_str("panicked at ")?;
         self.location.fmt(formatter)?;
-        formatter.write_str(":")?;
-        if let Some(message) = self.message {
-            formatter.write_str("\n")?;
-            formatter.write_fmt(*message)?;
-        } else if let Some(payload) = self.payload.downcast_ref::<&'static str>() {
-            formatter.write_str("\n")?;
-            formatter.write_str(payload)?;
-        }
-        // NOTE: we cannot use downcast_ref::<String>() here
-        // since String is not available in core!
-        // The payload is a String when `std::panic!` is called with multiple arguments,
-        // but in that case the message is also available.
+        formatter.write_str(":\n")?;
+        formatter.write_fmt(self.message)?;
         Ok(())
     }
 }
+
+impl<'a> PanicMessage<'a> {
+    /// Get the formatted message, if it has no arguments to be formatted at runtime.
+    ///
+    /// This can be used to avoid allocations in some cases.
+    ///
+    /// # Guarantees
+    ///
+    /// For `panic!("just a literal")`, this function is guaranteed to
+    /// return `Some("just a literal")`.
+    ///
+    /// For most cases with placeholders, this function will return `None`.
+    ///
+    /// See [`fmt::Arguments::as_str`] for details.
+    #[stable(feature = "panic_info_message", since = "CURRENT_RUSTC_VERSION")]
+    #[rustc_const_unstable(feature = "const_arguments_as_str", issue = "103900")]
+    #[must_use]
+    #[inline]
+    pub const fn as_str(&self) -> Option<&'static str> {
+        self.message.as_str()
+    }
+}
+
+#[stable(feature = "panic_info_message", since = "CURRENT_RUSTC_VERSION")]
+impl Display for PanicMessage<'_> {
+    #[inline]
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        formatter.write_fmt(self.message)
+    }
+}
+
+#[stable(feature = "panic_info_message", since = "CURRENT_RUSTC_VERSION")]
+impl fmt::Debug for PanicMessage<'_> {
+    #[inline]
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        formatter.write_fmt(self.message)
+    }
+}
diff --git a/core/src/panicking.rs b/core/src/panicking.rs
index ca06e059b75ac..97fb1d6b7323f 100644
--- a/core/src/panicking.rs
+++ b/core/src/panicking.rs
@@ -1,7 +1,14 @@
 //! Panic support for core
 //!
-//! The core library cannot define panicking, but it does *declare* panicking. This
-//! means that the functions inside of core are allowed to panic, but to be
+//! In core, panicking is always done with a message, resulting in a `core::panic::PanicInfo`
+//! containing a `fmt::Arguments`. In std, however, panicking can be done with panic_any, which
+//! throws a `Box<dyn Any>` containing any type of value. Because of this,
+//! `std::panic::PanicHookInfo` is a different type, which contains a `&dyn Any` instead of a
+//! `fmt::Arguments`. std's panic handler will convert the `fmt::Arguments` to a `&dyn Any`
+//! containing either a `&'static str` or `String` containing the formatted message.
+//!
+//! The core library cannot define any panic handler, but it can invoke it.
+//! This means that the functions inside of core are allowed to panic, but to be
 //! useful an upstream crate must define panicking for core to use. The current
 //! interface for panicking is:
 //!
@@ -10,11 +17,6 @@
 //! # { loop {} }
 //! ```
 //!
-//! This definition allows for panicking with any general message, but it does not
-//! allow for failing with a `Box<Any>` value. (`PanicInfo` just contains a `&(dyn Any + Send)`,
-//! for which we fill in a dummy value in `PanicInfo::internal_constructor`.)
-//! The reason for this is that core is not allowed to allocate.
-//!
 //! This module contains a few other panicking functions, but these are just the
 //! necessary lang items for the compiler. All panics are funneled through this
 //! one function. The actual symbol is declared through the `#[panic_handler]` attribute.
@@ -61,8 +63,8 @@ pub const fn panic_fmt(fmt: fmt::Arguments<'_>) -> ! {
         fn panic_impl(pi: &PanicInfo<'_>) -> !;
     }
 
-    let pi = PanicInfo::internal_constructor(
-        Some(&fmt),
+    let pi = PanicInfo::new(
+        fmt,
         Location::caller(),
         /* can_unwind */ true,
         /* force_no_backtrace */ false,
@@ -99,8 +101,8 @@ pub const fn panic_nounwind_fmt(fmt: fmt::Arguments<'_>, force_no_backtrace: boo
         }
 
         // PanicInfo with the `can_unwind` flag set to false forces an abort.
-        let pi = PanicInfo::internal_constructor(
-            Some(&fmt),
+        let pi = PanicInfo::new(
+            fmt,
             Location::caller(),
             /* can_unwind */ false,
             force_no_backtrace,
diff --git a/core/src/pin.rs b/core/src/pin.rs
index d8fc3b7177f38..0d2aa3070a19f 100644
--- a/core/src/pin.rs
+++ b/core/src/pin.rs
@@ -184,7 +184,7 @@
 //! requires at least a level of pointer indirection each time a new object is added to the mix
 //! (and, practically, a heap allocation).
 //!
-//! Although there were other reason as well, this issue of expensive composition is the key thing
+//! Although there were other reasons as well, this issue of expensive composition is the key thing
 //! that drove Rust towards adopting a different model. It is particularly a problem
 //! when one considers, for example, the implications of composing together the [`Future`]s which
 //! will eventually make up an asynchronous task (including address-sensitive `async fn` state
diff --git a/core/src/prelude/common.rs b/core/src/prelude/common.rs
index afc6817aa1d24..e38ef1e147c76 100644
--- a/core/src/prelude/common.rs
+++ b/core/src/prelude/common.rs
@@ -2,6 +2,9 @@
 //!
 //! See the [module-level documentation](super) for more.
 
+// No formatting: this file is nothing but re-exports, and their order is worth preserving.
+#![cfg_attr(rustfmt, rustfmt::skip)]
+
 // Re-exported core operators
 #[stable(feature = "core_prelude", since = "1.4.0")]
 #[doc(no_inline)]
@@ -14,6 +17,9 @@ pub use crate::ops::{Drop, Fn, FnMut, FnOnce};
 #[stable(feature = "core_prelude", since = "1.4.0")]
 #[doc(no_inline)]
 pub use crate::mem::drop;
+#[stable(feature = "size_of_prelude", since = "1.80.0")]
+#[doc(no_inline)]
+pub use crate::mem::{align_of, align_of_val, size_of, size_of_val};
 
 // Re-exported types and traits
 #[stable(feature = "core_prelude", since = "1.4.0")]
@@ -30,10 +36,7 @@ pub use crate::convert::{AsMut, AsRef, From, Into};
 pub use crate::default::Default;
 #[stable(feature = "core_prelude", since = "1.4.0")]
 #[doc(no_inline)]
-pub use crate::iter::{DoubleEndedIterator, ExactSizeIterator};
-#[stable(feature = "core_prelude", since = "1.4.0")]
-#[doc(no_inline)]
-pub use crate::iter::{Extend, IntoIterator, Iterator};
+pub use crate::iter::{DoubleEndedIterator, ExactSizeIterator, Extend, IntoIterator, Iterator};
 #[stable(feature = "core_prelude", since = "1.4.0")]
 #[doc(no_inline)]
 pub use crate::option::Option::{self, None, Some};
diff --git a/core/src/prelude/mod.rs b/core/src/prelude/mod.rs
index ca33ef160e88b..496b78439ea6c 100644
--- a/core/src/prelude/mod.rs
+++ b/core/src/prelude/mod.rs
@@ -4,6 +4,9 @@
 //! This module is imported by default when `#![no_std]` is used in the same
 //! manner as the standard library's prelude.
 
+// No formatting: this file is nothing but re-exports, and their order is worth preserving.
+#![cfg_attr(rustfmt, rustfmt::skip)]
+
 #![stable(feature = "core_prelude", since = "1.4.0")]
 
 mod common;
diff --git a/core/src/ptr/const_ptr.rs b/core/src/ptr/const_ptr.rs
index c8065b2e70906..3e7933e9eec86 100644
--- a/core/src/ptr/const_ptr.rs
+++ b/core/src/ptr/const_ptr.rs
@@ -112,71 +112,6 @@ impl<T: ?Sized> *const T {
         self as _
     }
 
-    /// Casts a pointer to its raw bits.
-    ///
-    /// This is equivalent to `as usize`, but is more specific to enhance readability.
-    /// The inverse method is [`from_bits`](#method.from_bits).
-    ///
-    /// In particular, `*p as usize` and `p as usize` will both compile for
-    /// pointers to numeric types but do very different things, so using this
-    /// helps emphasize that reading the bits was intentional.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(ptr_to_from_bits)]
-    /// # #[cfg(not(miri))] { // doctest does not work with strict provenance
-    /// let array = [13, 42];
-    /// let p0: *const i32 = &array[0];
-    /// assert_eq!(<*const _>::from_bits(p0.to_bits()), p0);
-    /// let p1: *const i32 = &array[1];
-    /// assert_eq!(p1.to_bits() - p0.to_bits(), 4);
-    /// # }
-    /// ```
-    #[unstable(feature = "ptr_to_from_bits", issue = "91126")]
-    #[deprecated(
-        since = "1.67.0",
-        note = "replaced by the `expose_provenance` method, or update your code \
-            to follow the strict provenance rules using its APIs"
-    )]
-    #[inline(always)]
-    pub fn to_bits(self) -> usize
-    where
-        T: Sized,
-    {
-        self as usize
-    }
-
-    /// Creates a pointer from its raw bits.
-    ///
-    /// This is equivalent to `as *const T`, but is more specific to enhance readability.
-    /// The inverse method is [`to_bits`](#method.to_bits).
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(ptr_to_from_bits)]
-    /// # #[cfg(not(miri))] { // doctest does not work with strict provenance
-    /// use std::ptr::NonNull;
-    /// let dangling: *const u8 = NonNull::dangling().as_ptr();
-    /// assert_eq!(<*const u8>::from_bits(1), dangling);
-    /// # }
-    /// ```
-    #[unstable(feature = "ptr_to_from_bits", issue = "91126")]
-    #[deprecated(
-        since = "1.67.0",
-        note = "replaced by the `ptr::with_exposed_provenance` function, or update \
-            your code to follow the strict provenance rules using its APIs"
-    )]
-    #[allow(fuzzy_provenance_casts)] // this is an unstable and semi-deprecated cast function
-    #[inline(always)]
-    pub fn from_bits(bits: usize) -> Self
-    where
-        T: Sized,
-    {
-        bits as Self
-    }
-
     /// Gets the "address" portion of the pointer.
     ///
     /// This is similar to `self as usize`, which semantically discards *provenance* and
@@ -330,7 +265,7 @@ impl<T: ?Sized> *const T {
     ///
     /// unsafe {
     ///     if let Some(val_back) = ptr.as_ref() {
-    ///         println!("We got back the value: {val_back}!");
+    ///         assert_eq!(val_back, &10);
     ///     }
     /// }
     /// ```
@@ -346,7 +281,7 @@ impl<T: ?Sized> *const T {
     ///
     /// unsafe {
     ///     let val_back = &*ptr;
-    ///     println!("We got back the value: {val_back}!");
+    ///     assert_eq!(val_back, &10);
     /// }
     /// ```
     #[stable(feature = "ptr_as_ref", since = "1.9.0")]
@@ -393,7 +328,7 @@ impl<T: ?Sized> *const T {
     /// let ptr: *const u8 = &10u8 as *const u8;
     ///
     /// unsafe {
-    ///     println!("We got back the value: {}!", ptr.as_ref_unchecked());
+    ///     assert_eq!(ptr.as_ref_unchecked(), &10);
     /// }
     /// ```
     // FIXME: mention it in the docs for `as_ref` and `as_uninit_ref` once stabilized.
@@ -439,7 +374,7 @@ impl<T: ?Sized> *const T {
     ///
     /// unsafe {
     ///     if let Some(val_back) = ptr.as_uninit_ref() {
-    ///         println!("We got back the value: {}!", val_back.assume_init());
+    ///         assert_eq!(val_back.assume_init(), 10);
     ///     }
     /// }
     /// ```
@@ -455,37 +390,26 @@ impl<T: ?Sized> *const T {
         if self.is_null() { None } else { Some(unsafe { &*(self as *const MaybeUninit<T>) }) }
     }
 
-    /// Calculates the offset from a pointer.
+    /// Adds an offset to a pointer.
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
     /// offset of `3 * size_of::<T>()` bytes.
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
-    ///   pointer must be either in bounds or at the end of the same [allocated object].
-    ///   (If it is zero, then the function is always well-defined.)
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum, **in bytes** must fit in a usize.
-    ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len())` is always safe.
-    ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// Consider using [`wrapping_offset`] instead if these constraints are
     /// difficult to satisfy. The only advantage of this method is that it
@@ -501,8 +425,8 @@ impl<T: ?Sized> *const T {
     /// let ptr: *const u8 = s.as_ptr();
     ///
     /// unsafe {
-    ///     println!("{}", *ptr.offset(1) as char);
-    ///     println!("{}", *ptr.offset(2) as char);
+    ///     assert_eq!(*ptr.offset(1) as char, '2');
+    ///     assert_eq!(*ptr.offset(2) as char, '3');
     /// }
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -573,19 +497,21 @@ impl<T: ?Sized> *const T {
     /// # Examples
     ///
     /// ```
+    /// # use std::fmt::Write;
     /// // Iterate using a raw pointer in increments of two elements
     /// let data = [1u8, 2, 3, 4, 5];
     /// let mut ptr: *const u8 = data.as_ptr();
     /// let step = 2;
     /// let end_rounded_up = ptr.wrapping_offset(6);
     ///
-    /// // This loop prints "1, 3, 5, "
+    /// let mut out = String::new();
     /// while ptr != end_rounded_up {
     ///     unsafe {
-    ///         print!("{}, ", *ptr);
+    ///         write!(&mut out, "{}, ", *ptr).unwrap();
     ///     }
     ///     ptr = ptr.wrapping_offset(step);
     /// }
+    /// assert_eq!(out.as_str(), "1, 3, 5, ");
     /// ```
     #[stable(feature = "ptr_wrapping_offset", since = "1.16.0")]
     #[must_use = "returns a new pointer rather than modifying its argument"]
@@ -674,38 +600,21 @@ impl<T: ?Sized> *const T {
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
     /// * `self` and `origin` must either
     ///
+    ///   * point to the same address, or
     ///   * both be *derived from* a pointer to the same [allocated object], and the memory range between
-    ///     the two pointers must be either empty or in bounds of that object. (See below for an example.)
-    ///   * or both be derived from an integer literal/constant, and point to the same address.
+    ///     the two pointers must be in bounds of that object. (See below for an example.)
     ///
     /// * The distance between the pointers, in bytes, must be an exact multiple
     ///   of the size of `T`.
     ///
-    /// * The distance between the pointers, **in bytes**, cannot overflow an `isize`.
-    ///
-    /// * The distance being in bounds cannot rely on "wrapping around" the address space.
-    ///
-    /// Rust types are never larger than `isize::MAX` and Rust allocations never wrap around the
-    /// address space, so two pointers within some value of any Rust type `T` will always satisfy
-    /// the last two conditions. The standard library also generally ensures that allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec` and `Box` ensure they
-    /// never allocate more than `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())`
-    /// always satisfies the last two conditions.
-    ///
-    /// Most platforms fundamentally can't even construct such a large allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
-    /// (Note that [`offset`] and [`add`] also have a similar limitation and hence cannot be used on
-    /// such large allocations either.)
+    /// As a consequence, the absolute distance between the pointers, in bytes, computed on
+    /// mathematical integers (without "wrapping around"), cannot overflow an `isize`. This is
+    /// implied by the in-bounds requirement, and the fact that no allocated object can be larger
+    /// than `isize::MAX` bytes.
     ///
     /// The requirement for pointers to be derived from the same allocated object is primarily
     /// needed for `const`-compatibility: the distance between pointers into *different* allocated
@@ -744,14 +653,14 @@ impl<T: ?Sized> *const T {
     /// let ptr1 = Box::into_raw(Box::new(0u8)) as *const u8;
     /// let ptr2 = Box::into_raw(Box::new(1u8)) as *const u8;
     /// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize);
-    /// // Make ptr2_other an "alias" of ptr2, but derived from ptr1.
-    /// let ptr2_other = (ptr1 as *const u8).wrapping_offset(diff);
+    /// // Make ptr2_other an "alias" of ptr2.add(1), but derived from ptr1.
+    /// let ptr2_other = (ptr1 as *const u8).wrapping_offset(diff).wrapping_offset(1);
     /// assert_eq!(ptr2 as usize, ptr2_other as usize);
     /// // Since ptr2_other and ptr2 are derived from pointers to different objects,
     /// // computing their offset is undefined behavior, even though
-    /// // they point to the same address!
+    /// // they point to addresses that are in-bounds of the same object!
     /// unsafe {
-    ///     let zero = ptr2_other.offset_from(ptr2); // Undefined Behavior
+    ///     let one = ptr2_other.offset_from(ptr2); // Undefined Behavior! ⚠️
     /// }
     /// ```
     #[stable(feature = "ptr_offset_from", since = "1.47.0")]
@@ -942,37 +851,26 @@ impl<T: ?Sized> *const T {
         }
     }
 
-    /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
+    /// Adds an offset to a pointer (convenience for `.offset(count as isize)`).
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
     /// offset of `3 * size_of::<T>()` bytes.
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
-    ///   pointer must be either in bounds or at the end of the same [allocated object].
-    ///   (If it is zero, then the function is always well-defined.)
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum must fit in a `usize`.
-    ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len())` is always safe.
-    ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// Consider using [`wrapping_add`] instead if these constraints are
     /// difficult to satisfy. The only advantage of this method is that it
@@ -988,8 +886,8 @@ impl<T: ?Sized> *const T {
     /// let ptr: *const u8 = s.as_ptr();
     ///
     /// unsafe {
-    ///     println!("{}", *ptr.add(1) as char);
-    ///     println!("{}", *ptr.add(2) as char);
+    ///     assert_eq!(*ptr.add(1), b'2');
+    ///     assert_eq!(*ptr.add(2), b'3');
     /// }
     /// ```
     #[stable(feature = "pointer_methods", since = "1.26.0")]
@@ -1026,7 +924,7 @@ impl<T: ?Sized> *const T {
         unsafe { self.cast::<u8>().add(count).with_metadata_of(self) }
     }
 
-    /// Calculates the offset from a pointer (convenience for
+    /// Subtracts an offset from a pointer (convenience for
     /// `.offset((count as isize).wrapping_neg())`).
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
@@ -1034,30 +932,19 @@ impl<T: ?Sized> *const T {
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
-    ///
-    /// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
-    ///   pointer must be either in bounds or at the end of the same [allocated object].
-    ///   (If it is zero, then the function is always well-defined.)
-    ///
-    /// * The computed offset cannot exceed `isize::MAX` **bytes**.
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum must fit in a usize.
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// Consider using [`wrapping_sub`] instead if these constraints are
     /// difficult to satisfy. The only advantage of this method is that it
@@ -1073,13 +960,14 @@ impl<T: ?Sized> *const T {
     ///
     /// unsafe {
     ///     let end: *const u8 = s.as_ptr().add(3);
-    ///     println!("{}", *end.sub(1) as char);
-    ///     println!("{}", *end.sub(2) as char);
+    ///     assert_eq!(*end.sub(1), b'3');
+    ///     assert_eq!(*end.sub(2), b'2');
     /// }
     /// ```
     #[stable(feature = "pointer_methods", since = "1.26.0")]
     #[must_use = "returns a new pointer rather than modifying its argument"]
     #[rustc_const_stable(feature = "const_ptr_offset", since = "1.61.0")]
+    #[rustc_allow_const_fn_unstable(unchecked_neg)]
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     pub const unsafe fn sub(self, count: usize) -> Self
@@ -1093,7 +981,7 @@ impl<T: ?Sized> *const T {
             // SAFETY: the caller must uphold the safety contract for `offset`.
             // Because the pointee is *not* a ZST, that means that `count` is
             // at most `isize::MAX`, and thus the negation cannot overflow.
-            unsafe { self.offset(intrinsics::unchecked_sub(0, count as isize)) }
+            unsafe { self.offset((count as isize).unchecked_neg()) }
         }
     }
 
@@ -1154,19 +1042,21 @@ impl<T: ?Sized> *const T {
     /// # Examples
     ///
     /// ```
+    /// # use std::fmt::Write;
     /// // Iterate using a raw pointer in increments of two elements
     /// let data = [1u8, 2, 3, 4, 5];
     /// let mut ptr: *const u8 = data.as_ptr();
     /// let step = 2;
     /// let end_rounded_up = ptr.wrapping_add(6);
     ///
-    /// // This loop prints "1, 3, 5, "
+    /// let mut out = String::new();
     /// while ptr != end_rounded_up {
     ///     unsafe {
-    ///         print!("{}, ", *ptr);
+    ///         write!(&mut out, "{}, ", *ptr).unwrap();
     ///     }
     ///     ptr = ptr.wrapping_add(step);
     /// }
+    /// assert_eq!(out, "1, 3, 5, ");
     /// ```
     #[stable(feature = "pointer_methods", since = "1.26.0")]
     #[must_use = "returns a new pointer rather than modifying its argument"]
@@ -1233,19 +1123,21 @@ impl<T: ?Sized> *const T {
     /// # Examples
     ///
     /// ```
+    /// # use std::fmt::Write;
     /// // Iterate using a raw pointer in increments of two elements (backwards)
     /// let data = [1u8, 2, 3, 4, 5];
     /// let mut ptr: *const u8 = data.as_ptr();
     /// let start_rounded_down = ptr.wrapping_sub(2);
     /// ptr = ptr.wrapping_add(4);
     /// let step = 2;
-    /// // This loop prints "5, 3, 1, "
+    /// let mut out = String::new();
     /// while ptr != start_rounded_down {
     ///     unsafe {
-    ///         print!("{}, ", *ptr);
+    ///         write!(&mut out, "{}, ", *ptr).unwrap();
     ///     }
     ///     ptr = ptr.wrapping_sub(step);
     /// }
+    /// assert_eq!(out, "5, 3, 1, ");
     /// ```
     #[stable(feature = "pointer_methods", since = "1.26.0")]
     #[must_use = "returns a new pointer rather than modifying its argument"]
diff --git a/core/src/ptr/metadata.rs b/core/src/ptr/metadata.rs
index e501970b580de..eb86bf6620652 100644
--- a/core/src/ptr/metadata.rs
+++ b/core/src/ptr/metadata.rs
@@ -3,6 +3,7 @@
 use crate::fmt;
 use crate::hash::{Hash, Hasher};
 use crate::intrinsics::aggregate_raw_ptr;
+use crate::intrinsics::ptr_metadata;
 use crate::marker::Freeze;
 
 /// Provides the pointer metadata type of any pointed-to type.
@@ -94,10 +95,7 @@ pub trait Thin = Pointee<Metadata = ()>;
 #[rustc_const_unstable(feature = "ptr_metadata", issue = "81513")]
 #[inline]
 pub const fn metadata<T: ?Sized>(ptr: *const T) -> <T as Pointee>::Metadata {
-    // SAFETY: Accessing the value from the `PtrRepr` union is safe since *const T
-    // and PtrComponents<T> have the same memory layouts. Only std can make this
-    // guarantee.
-    unsafe { PtrRepr { const_ptr: ptr }.components.metadata }
+    ptr_metadata(ptr)
 }
 
 /// Forms a (possibly-wide) raw pointer from a data pointer and metadata.
@@ -111,7 +109,7 @@ pub const fn metadata<T: ?Sized>(ptr: *const T) -> <T as Pointee>::Metadata {
 #[rustc_const_unstable(feature = "ptr_metadata", issue = "81513")]
 #[inline]
 pub const fn from_raw_parts<T: ?Sized>(
-    data_pointer: *const (),
+    data_pointer: *const impl Thin,
     metadata: <T as Pointee>::Metadata,
 ) -> *const T {
     aggregate_raw_ptr(data_pointer, metadata)
@@ -125,35 +123,12 @@ pub const fn from_raw_parts<T: ?Sized>(
 #[rustc_const_unstable(feature = "ptr_metadata", issue = "81513")]
 #[inline]
 pub const fn from_raw_parts_mut<T: ?Sized>(
-    data_pointer: *mut (),
+    data_pointer: *mut impl Thin,
     metadata: <T as Pointee>::Metadata,
 ) -> *mut T {
     aggregate_raw_ptr(data_pointer, metadata)
 }
 
-#[repr(C)]
-union PtrRepr<T: ?Sized> {
-    const_ptr: *const T,
-    mut_ptr: *mut T,
-    components: PtrComponents<T>,
-}
-
-#[repr(C)]
-struct PtrComponents<T: ?Sized> {
-    data_pointer: *const (),
-    metadata: <T as Pointee>::Metadata,
-}
-
-// Manual impl needed to avoid `T: Copy` bound.
-impl<T: ?Sized> Copy for PtrComponents<T> {}
-
-// Manual impl needed to avoid `T: Clone` bound.
-impl<T: ?Sized> Clone for PtrComponents<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
 /// The metadata for a `Dyn = dyn SomeTrait` trait object type.
 ///
 /// It is a pointer to a vtable (virtual call table)
@@ -209,18 +184,14 @@ impl<Dyn: ?Sized> DynMetadata<Dyn> {
         // Consider a reference like `&(i32, dyn Send)`: the vtable will only store the size of the
         // `Send` part!
         // SAFETY: DynMetadata always contains a valid vtable pointer
-        return unsafe {
-            crate::intrinsics::vtable_size(self.vtable_ptr() as *const ())
-        };
+        return unsafe { crate::intrinsics::vtable_size(self.vtable_ptr() as *const ()) };
     }
 
     /// Returns the alignment of the type associated with this vtable.
     #[inline]
     pub fn align_of(self) -> usize {
         // SAFETY: DynMetadata always contains a valid vtable pointer
-        return unsafe {
-            crate::intrinsics::vtable_align(self.vtable_ptr() as *const ())
-        };
+        return unsafe { crate::intrinsics::vtable_align(self.vtable_ptr() as *const ()) };
     }
 
     /// Returns the size and alignment together as a `Layout`
diff --git a/core/src/ptr/mod.rs b/core/src/ptr/mod.rs
index d2bbdc84d4dd1..f2247e83ec5c5 100644
--- a/core/src/ptr/mod.rs
+++ b/core/src/ptr/mod.rs
@@ -237,7 +237,7 @@
 //! pointer. For code which *does* cast a usize to a pointer, the scope of the change depends
 //! on exactly what you're doing.
 //!
-//! In general you just need to make sure that if you want to convert a usize address to a
+//! In general, you just need to make sure that if you want to convert a usize address to a
 //! pointer and then use that pointer to read/write memory, you need to keep around a pointer
 //! that has sufficient provenance to perform that read/write itself. In this way all of your
 //! casts from an address to a pointer are essentially just applying offsets/indexing.
@@ -309,7 +309,7 @@
 //!   i.e. the usual "ZSTs are fake, do what you want" rules apply *but* this only applies
 //!   for actual forgery (integers cast to pointers). If you borrow some struct's field
 //!   that *happens* to be zero-sized, the resulting pointer will have provenance tied to
-//!   that allocation and it will still get invalidated if the allocation gets deallocated.
+//!   that allocation, and it will still get invalidated if the allocation gets deallocated.
 //!   In the future we may introduce an API to make such a forged allocation explicit.
 //!
 //! * [`wrapping_offset`][] a pointer outside its provenance. This includes pointers
@@ -415,7 +415,7 @@ use crate::intrinsics;
 use crate::marker::FnPtr;
 use crate::ub_checks;
 
-use crate::mem::{self, align_of, size_of, MaybeUninit};
+use crate::mem::{self, MaybeUninit};
 
 mod alignment;
 #[unstable(feature = "ptr_alignment_type", issue = "102070")]
@@ -450,8 +450,13 @@ mod mut_ptr;
 
 /// Executes the destructor (if any) of the pointed-to value.
 ///
-/// This is semantically equivalent to calling [`ptr::read`] and discarding
+/// This is almost the same as calling [`ptr::read`] and discarding
 /// the result, but has the following advantages:
+// FIXME: say something more useful than "almost the same"?
+// There are open questions here: `read` requires the value to be fully valid, e.g. if `T` is a
+// `bool` it must be 0 or 1, if it is a reference then it must be dereferenceable. `drop_in_place`
+// only requires that `*to_drop` be "valid for dropping" and we have not defined what that means. In
+// Miri it currently (May 2024) requires nothing at all for types without drop glue.
 ///
 /// * It is *required* to use `drop_in_place` to drop unsized types like
 ///   trait objects, because they can't be read out onto the stack and
@@ -565,7 +570,7 @@ pub unsafe fn drop_in_place<T: ?Sized>(to_drop: *mut T) {
 #[rustc_allow_const_fn_unstable(ptr_metadata)]
 #[rustc_diagnostic_item = "ptr_null"]
 pub const fn null<T: ?Sized + Thin>() -> *const T {
-    from_raw_parts(without_provenance(0), ())
+    from_raw_parts(without_provenance::<()>(0), ())
 }
 
 /// Creates a null mutable raw pointer.
@@ -591,7 +596,7 @@ pub const fn null<T: ?Sized + Thin>() -> *const T {
 #[rustc_allow_const_fn_unstable(ptr_metadata)]
 #[rustc_diagnostic_item = "ptr_null_mut"]
 pub const fn null_mut<T: ?Sized + Thin>() -> *mut T {
-    from_raw_parts_mut(without_provenance_mut(0), ())
+    from_raw_parts_mut(without_provenance_mut::<()>(0), ())
 }
 
 /// Creates a pointer with the given address and no provenance.
@@ -693,7 +698,7 @@ pub const fn dangling_mut<T>() -> *mut T {
 ///
 /// If there is no 'exposed' provenance that justifies the way this pointer will be used,
 /// the program has undefined behavior. In particular, the aliasing rules still apply: pointers
-/// and references that have been invalidated due to aliasing accesses cannot be used any more,
+/// and references that have been invalidated due to aliasing accesses cannot be used anymore,
 /// even if they have been exposed!
 ///
 /// Note that there is no algorithm that decides which provenance will be used. You can think of this
@@ -835,7 +840,7 @@ pub const fn from_mut<T: ?Sized>(r: &mut T) -> *mut T {
 #[rustc_allow_const_fn_unstable(ptr_metadata)]
 #[rustc_diagnostic_item = "ptr_slice_from_raw_parts"]
 pub const fn slice_from_raw_parts<T>(data: *const T, len: usize) -> *const [T] {
-    intrinsics::aggregate_raw_ptr(data, len)
+    from_raw_parts(data, len)
 }
 
 /// Forms a raw mutable slice from a pointer and a length.
@@ -881,7 +886,7 @@ pub const fn slice_from_raw_parts<T>(data: *const T, len: usize) -> *const [T] {
 #[rustc_const_unstable(feature = "const_slice_from_raw_parts_mut", issue = "67456")]
 #[rustc_diagnostic_item = "ptr_slice_from_raw_parts_mut"]
 pub const fn slice_from_raw_parts_mut<T>(data: *mut T, len: usize) -> *mut [T] {
-    intrinsics::aggregate_raw_ptr(data, len)
+    from_raw_parts_mut(data, len)
 }
 
 /// Swaps the values at two mutable locations of the same type, without
@@ -1092,7 +1097,7 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
         // If we end up here, it's because we're using a simple type -- like
         // a small power-of-two-sized thing -- or a special type with particularly
         // large alignment, particularly SIMD types.
-        // Thus we're fine just reading-and-writing it, as either it's small
+        // Thus, we're fine just reading-and-writing it, as either it's small
         // and that works well anyway or it's special and the type's author
         // presumably wanted things to be done in the larger chunk.
 
@@ -1285,7 +1290,7 @@ pub const unsafe fn read<T>(src: *const T) -> T {
     // provides enough information to know that this is a typed operation.
 
     // However, as of March 2023 the compiler was not capable of taking advantage
-    // of that information.  Thus the implementation here switched to an intrinsic,
+    // of that information. Thus, the implementation here switched to an intrinsic,
     // which lowers to `_0 = *src` in MIR, to address a few issues:
     //
     // - Using `MaybeUninit::assume_init` after a `copy_nonoverlapping` was not
@@ -1565,7 +1570,7 @@ pub const unsafe fn write<T>(dst: *mut T, src: T) {
 /// As a result, using `&packed.unaligned as *const FieldType` causes immediate
 /// *undefined behavior* in your program.
 ///
-/// Instead you must use the [`ptr::addr_of_mut!`](addr_of_mut)
+/// Instead, you must use the [`ptr::addr_of_mut!`](addr_of_mut)
 /// macro to create the pointer. You may use that returned pointer together with
 /// this function.
 ///
@@ -1804,10 +1809,9 @@ pub(crate) const unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usiz
     // FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
     // 1, where the method versions of these operations are not inlined.
     use intrinsics::{
-        assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_sub,
-        wrapping_add, wrapping_mul, wrapping_sub,
+        assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl,
+        unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub,
     };
-    use intrinsics::{unchecked_shl, unchecked_shr};
 
     /// Calculate multiplicative modular inverse of `x` modulo `m`.
     ///
diff --git a/core/src/ptr/mut_ptr.rs b/core/src/ptr/mut_ptr.rs
index c53953400addd..904d6c62dcf1e 100644
--- a/core/src/ptr/mut_ptr.rs
+++ b/core/src/ptr/mut_ptr.rs
@@ -117,72 +117,6 @@ impl<T: ?Sized> *mut T {
         self as _
     }
 
-    /// Casts a pointer to its raw bits.
-    ///
-    /// This is equivalent to `as usize`, but is more specific to enhance readability.
-    /// The inverse method is [`from_bits`](pointer#method.from_bits-1).
-    ///
-    /// In particular, `*p as usize` and `p as usize` will both compile for
-    /// pointers to numeric types but do very different things, so using this
-    /// helps emphasize that reading the bits was intentional.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(ptr_to_from_bits)]
-    /// # #[cfg(not(miri))] { // doctest does not work with strict provenance
-    /// let mut array = [13, 42];
-    /// let mut it = array.iter_mut();
-    /// let p0: *mut i32 = it.next().unwrap();
-    /// assert_eq!(<*mut _>::from_bits(p0.to_bits()), p0);
-    /// let p1: *mut i32 = it.next().unwrap();
-    /// assert_eq!(p1.to_bits() - p0.to_bits(), 4);
-    /// }
-    /// ```
-    #[unstable(feature = "ptr_to_from_bits", issue = "91126")]
-    #[deprecated(
-        since = "1.67.0",
-        note = "replaced by the `expose_provenance` method, or update your code \
-            to follow the strict provenance rules using its APIs"
-    )]
-    #[inline(always)]
-    pub fn to_bits(self) -> usize
-    where
-        T: Sized,
-    {
-        self as usize
-    }
-
-    /// Creates a pointer from its raw bits.
-    ///
-    /// This is equivalent to `as *mut T`, but is more specific to enhance readability.
-    /// The inverse method is [`to_bits`](pointer#method.to_bits-1).
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(ptr_to_from_bits)]
-    /// # #[cfg(not(miri))] { // doctest does not work with strict provenance
-    /// use std::ptr::NonNull;
-    /// let dangling: *mut u8 = NonNull::dangling().as_ptr();
-    /// assert_eq!(<*mut u8>::from_bits(1), dangling);
-    /// }
-    /// ```
-    #[unstable(feature = "ptr_to_from_bits", issue = "91126")]
-    #[deprecated(
-        since = "1.67.0",
-        note = "replaced by the `ptr::with_exposed_provenance_mut` function, or \
-            update your code to follow the strict provenance rules using its APIs"
-    )]
-    #[allow(fuzzy_provenance_casts)] // this is an unstable and semi-deprecated cast function
-    #[inline(always)]
-    pub fn from_bits(bits: usize) -> Self
-    where
-        T: Sized,
-    {
-        bits as Self
-    }
-
     /// Gets the "address" portion of the pointer.
     ///
     /// This is similar to `self as usize`, which semantically discards *provenance* and
@@ -470,37 +404,26 @@ impl<T: ?Sized> *mut T {
         if self.is_null() { None } else { Some(unsafe { &*(self as *const MaybeUninit<T>) }) }
     }
 
-    /// Calculates the offset from a pointer.
+    /// Adds an offset to a pointer.
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
     /// offset of `3 * size_of::<T>()` bytes.
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
-    ///   pointer must be either in bounds or at the end of the same [allocated object].
-    ///   (If it is zero, then the function is always well-defined.)
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum, **in bytes** must fit in a usize.
-    ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len())` is always safe.
-    ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// Consider using [`wrapping_offset`] instead if these constraints are
     /// difficult to satisfy. The only advantage of this method is that it
@@ -902,38 +825,21 @@ impl<T: ?Sized> *mut T {
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
     /// * `self` and `origin` must either
     ///
+    ///   * point to the same address, or
     ///   * both be *derived from* a pointer to the same [allocated object], and the memory range between
-    ///     the two pointers must be either empty or in bounds of that object. (See below for an example.)
-    ///   * or both be derived from an integer literal/constant, and point to the same address.
+    ///     the two pointers must be in bounds of that object. (See below for an example.)
     ///
     /// * The distance between the pointers, in bytes, must be an exact multiple
     ///   of the size of `T`.
     ///
-    /// * The distance between the pointers, **in bytes**, cannot overflow an `isize`.
-    ///
-    /// * The distance being in bounds cannot rely on "wrapping around" the address space.
-    ///
-    /// Rust types are never larger than `isize::MAX` and Rust allocations never wrap around the
-    /// address space, so two pointers within some value of any Rust type `T` will always satisfy
-    /// the last two conditions. The standard library also generally ensures that allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec` and `Box` ensure they
-    /// never allocate more than `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())`
-    /// always satisfies the last two conditions.
-    ///
-    /// Most platforms fundamentally can't even construct such a large allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
-    /// (Note that [`offset`] and [`add`] also have a similar limitation and hence cannot be used on
-    /// such large allocations either.)
+    /// As a consequence, the absolute distance between the pointers, in bytes, computed on
+    /// mathematical integers (without "wrapping around"), cannot overflow an `isize`. This is
+    /// implied by the in-bounds requirement, and the fact that no allocated object can be larger
+    /// than `isize::MAX` bytes.
     ///
     /// The requirement for pointers to be derived from the same allocated object is primarily
     /// needed for `const`-compatibility: the distance between pointers into *different* allocated
@@ -972,14 +878,14 @@ impl<T: ?Sized> *mut T {
     /// let ptr1 = Box::into_raw(Box::new(0u8));
     /// let ptr2 = Box::into_raw(Box::new(1u8));
     /// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize);
-    /// // Make ptr2_other an "alias" of ptr2, but derived from ptr1.
-    /// let ptr2_other = (ptr1 as *mut u8).wrapping_offset(diff);
+    /// // Make ptr2_other an "alias" of ptr2.add(1), but derived from ptr1.
+    /// let ptr2_other = (ptr1 as *mut u8).wrapping_offset(diff).wrapping_offset(1);
     /// assert_eq!(ptr2 as usize, ptr2_other as usize);
     /// // Since ptr2_other and ptr2 are derived from pointers to different objects,
     /// // computing their offset is undefined behavior, even though
-    /// // they point to the same address!
+    /// // they point to addresses that are in-bounds of the same object!
     /// unsafe {
-    ///     let zero = ptr2_other.offset_from(ptr2); // Undefined Behavior
+    ///     let one = ptr2_other.offset_from(ptr2); // Undefined Behavior! ⚠️
     /// }
     /// ```
     #[stable(feature = "ptr_offset_from", since = "1.47.0")]
@@ -1086,37 +992,26 @@ impl<T: ?Sized> *mut T {
         unsafe { (self as *const T).sub_ptr(origin) }
     }
 
-    /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
+    /// Adds an offset to a pointer (convenience for `.offset(count as isize)`).
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
     /// offset of `3 * size_of::<T>()` bytes.
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
-    ///   pointer must be either in bounds or at the end of the same [allocated object].
-    ///   (If it is zero, then the function is always well-defined.)
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum must fit in a `usize`.
-    ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len())` is always safe.
-    ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// Consider using [`wrapping_add`] instead if these constraints are
     /// difficult to satisfy. The only advantage of this method is that it
@@ -1170,7 +1065,7 @@ impl<T: ?Sized> *mut T {
         unsafe { self.cast::<u8>().add(count).with_metadata_of(self) }
     }
 
-    /// Calculates the offset from a pointer (convenience for
+    /// Subtracts an offset from a pointer (convenience for
     /// `.offset((count as isize).wrapping_neg())`).
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
@@ -1178,30 +1073,19 @@ impl<T: ?Sized> *mut T {
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
-    ///
-    /// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
-    ///   pointer must be either in bounds or at the end of the same [allocated object].
-    ///   (If it is zero, then the function is always well-defined.)
-    ///
-    /// * The computed offset cannot exceed `isize::MAX` **bytes**.
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum must fit in a usize.
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// Consider using [`wrapping_sub`] instead if these constraints are
     /// difficult to satisfy. The only advantage of this method is that it
@@ -1224,6 +1108,7 @@ impl<T: ?Sized> *mut T {
     #[stable(feature = "pointer_methods", since = "1.26.0")]
     #[must_use = "returns a new pointer rather than modifying its argument"]
     #[rustc_const_stable(feature = "const_ptr_offset", since = "1.61.0")]
+    #[rustc_allow_const_fn_unstable(unchecked_neg)]
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     pub const unsafe fn sub(self, count: usize) -> Self
@@ -1237,7 +1122,7 @@ impl<T: ?Sized> *mut T {
             // SAFETY: the caller must uphold the safety contract for `offset`.
             // Because the pointee is *not* a ZST, that means that `count` is
             // at most `isize::MAX`, and thus the negation cannot overflow.
-            unsafe { self.offset(intrinsics::unchecked_sub(0, count as isize)) }
+            unsafe { self.offset((count as isize).unchecked_neg()) }
         }
     }
 
diff --git a/core/src/ptr/non_null.rs b/core/src/ptr/non_null.rs
index 617890cf083b1..796c85d0cacc7 100644
--- a/core/src/ptr/non_null.rs
+++ b/core/src/ptr/non_null.rs
@@ -476,36 +476,26 @@ impl<T: ?Sized> NonNull<T> {
         unsafe { NonNull { pointer: self.as_ptr() as *mut U } }
     }
 
-    /// Calculates the offset from a pointer.
+    /// Adds an offset to a pointer.
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
     /// offset of `3 * size_of::<T>()` bytes.
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * Both the starting and resulting pointer must be either in bounds or one
-    ///   byte past the end of the same [allocated object].
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum, **in bytes** must fit in a usize.
-    ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len())` is always safe.
-    ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// [allocated object]: crate::ptr#allocated-object
     ///
@@ -525,8 +515,8 @@ impl<T: ?Sized> NonNull<T> {
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     #[must_use = "returns a new pointer rather than modifying its argument"]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn offset(self, count: isize) -> Self
     where
         T: Sized,
@@ -551,8 +541,8 @@ impl<T: ?Sized> NonNull<T> {
     #[must_use]
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn byte_offset(self, count: isize) -> Self {
         // SAFETY: the caller must uphold the safety contract for `offset` and `byte_offset` has
         // the same safety contract.
@@ -562,36 +552,26 @@ impl<T: ?Sized> NonNull<T> {
         unsafe { NonNull { pointer: self.pointer.byte_offset(count) } }
     }
 
-    /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
+    /// Adds an offset to a pointer (convenience for `.offset(count as isize)`).
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
     /// offset of `3 * size_of::<T>()` bytes.
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
-    ///
-    /// * Both the starting and resulting pointer must be either in bounds or one
-    ///   byte past the end of the same [allocated object].
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum must fit in a `usize`.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len())` is always safe.
-    ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// [allocated object]: crate::ptr#allocated-object
     ///
@@ -611,8 +591,8 @@ impl<T: ?Sized> NonNull<T> {
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     #[must_use = "returns a new pointer rather than modifying its argument"]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn add(self, count: usize) -> Self
     where
         T: Sized,
@@ -638,8 +618,8 @@ impl<T: ?Sized> NonNull<T> {
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     #[rustc_allow_const_fn_unstable(set_ptr_value)]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn byte_add(self, count: usize) -> Self {
         // SAFETY: the caller must uphold the safety contract for `add` and `byte_add` has the same
         // safety contract.
@@ -649,7 +629,7 @@ impl<T: ?Sized> NonNull<T> {
         unsafe { NonNull { pointer: self.pointer.byte_add(count) } }
     }
 
-    /// Calculates the offset from a pointer (convenience for
+    /// Subtracts an offset from a pointer (convenience for
     /// `.offset((count as isize).wrapping_neg())`).
     ///
     /// `count` is in units of T; e.g., a `count` of 3 represents a pointer
@@ -657,29 +637,19 @@ impl<T: ?Sized> NonNull<T> {
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
-    ///
-    /// * Both the starting and resulting pointer must be either in bounds or one
-    ///   byte past the end of the same [allocated object].
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * The computed offset cannot exceed `isize::MAX` **bytes**.
+    /// * The computed offset, `count * size_of::<T>()` bytes, must not overflow `isize`.
     ///
-    /// * The offset being in bounds cannot rely on "wrapping around" the address
-    ///   space. That is, the infinite-precision sum must fit in a usize.
+    /// * If the computed offset is non-zero, then `self` must be derived from a pointer to some
+    ///   [allocated object], and the entire memory range between `self` and the result must be in
+    ///   bounds of that allocated object. In particular, this range must not "wrap around" the edge
+    ///   of the address space.
     ///
-    /// The compiler and standard library generally tries to ensure allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec`
-    /// and `Box` ensure they never allocate more than `isize::MAX` bytes, so
-    /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe.
-    ///
-    /// Most platforms fundamentally can't even construct such an allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
+    /// Allocated objects can never be larger than `isize::MAX` bytes, so if the computed offset
+    /// stays in bounds of the allocated object, it is guaranteed to satisfy the first requirement.
+    /// This implies, for instance, that `vec.as_ptr().add(vec.len())` (for `vec: Vec<T>`) is always
+    /// safe.
     ///
     /// [allocated object]: crate::ptr#allocated-object
     ///
@@ -699,8 +669,9 @@ impl<T: ?Sized> NonNull<T> {
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     #[must_use = "returns a new pointer rather than modifying its argument"]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_allow_const_fn_unstable(unchecked_neg)]
     pub const unsafe fn sub(self, count: usize) -> Self
     where
         T: Sized,
@@ -712,7 +683,7 @@ impl<T: ?Sized> NonNull<T> {
             // SAFETY: the caller must uphold the safety contract for `offset`.
             // Because the pointee is *not* a ZST, that means that `count` is
             // at most `isize::MAX`, and thus the negation cannot overflow.
-            unsafe { self.offset(intrinsics::unchecked_sub(0, count as isize)) }
+            unsafe { self.offset((count as isize).unchecked_neg()) }
         }
     }
 
@@ -731,8 +702,8 @@ impl<T: ?Sized> NonNull<T> {
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     #[rustc_allow_const_fn_unstable(set_ptr_value)]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn byte_sub(self, count: usize) -> Self {
         // SAFETY: the caller must uphold the safety contract for `sub` and `byte_sub` has the same
         // safety contract.
@@ -760,38 +731,21 @@ impl<T: ?Sized> NonNull<T> {
     ///
     /// # Safety
     ///
-    /// If any of the following conditions are violated, the result is Undefined
-    /// Behavior:
+    /// If any of the following conditions are violated, the result is Undefined Behavior:
     ///
-    /// * Both `self` and `origin` must be either in bounds or one
-    ///   byte past the end of the same [allocated object].
+    /// * `self` and `origin` must either
     ///
-    /// * Both pointers must be *derived from* a pointer to the same object.
-    ///   (See below for an example.)
+    ///   * point to the same address, or
+    ///   * both be *derived from* a pointer to the same [allocated object], and the memory range between
+    ///     the two pointers must be in bounds of that object. (See below for an example.)
     ///
     /// * The distance between the pointers, in bytes, must be an exact multiple
     ///   of the size of `T`.
     ///
-    /// * The distance between the pointers, **in bytes**, cannot overflow an `isize`.
-    ///
-    /// * The distance being in bounds cannot rely on "wrapping around" the address space.
-    ///
-    /// Rust types are never larger than `isize::MAX` and Rust allocations never wrap around the
-    /// address space, so two pointers within some value of any Rust type `T` will always satisfy
-    /// the last two conditions. The standard library also generally ensures that allocations
-    /// never reach a size where an offset is a concern. For instance, `Vec` and `Box` ensure they
-    /// never allocate more than `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())`
-    /// always satisfies the last two conditions.
-    ///
-    /// Most platforms fundamentally can't even construct such a large allocation.
-    /// For instance, no known 64-bit platform can ever serve a request
-    /// for 2<sup>63</sup> bytes due to page-table limitations or splitting the address space.
-    /// However, some 32-bit and 16-bit platforms may successfully serve a request for
-    /// more than `isize::MAX` bytes with things like Physical Address
-    /// Extension. As such, memory acquired directly from allocators or memory
-    /// mapped files *may* be too large to handle with this function.
-    /// (Note that [`offset`] and [`add`] also have a similar limitation and hence cannot be used on
-    /// such large allocations either.)
+    /// As a consequence, the absolute distance between the pointers, in bytes, computed on
+    /// mathematical integers (without "wrapping around"), cannot overflow an `isize`. This is
+    /// implied by the in-bounds requirement, and the fact that no allocated object can be larger
+    /// than `isize::MAX` bytes.
     ///
     /// The requirement for pointers to be derived from the same allocated object is primarily
     /// needed for `const`-compatibility: the distance between pointers into *different* allocated
@@ -835,19 +789,20 @@ impl<T: ?Sized> NonNull<T> {
     /// let ptr1 = NonNull::new(Box::into_raw(Box::new(0u8))).unwrap();
     /// let ptr2 = NonNull::new(Box::into_raw(Box::new(1u8))).unwrap();
     /// let diff = (ptr2.addr().get() as isize).wrapping_sub(ptr1.addr().get() as isize);
-    /// // Make ptr2_other an "alias" of ptr2, but derived from ptr1.
-    /// let ptr2_other = NonNull::new(ptr1.as_ptr().wrapping_byte_offset(diff)).unwrap();
+    /// // Make ptr2_other an "alias" of ptr2.add(1), but derived from ptr1.
+    /// let diff_plus_1 = diff.wrapping_add(1);
+    /// let ptr2_other = NonNull::new(ptr1.as_ptr().wrapping_byte_offset(diff_plus_1)).unwrap();
     /// assert_eq!(ptr2.addr(), ptr2_other.addr());
     /// // Since ptr2_other and ptr2 are derived from pointers to different objects,
     /// // computing their offset is undefined behavior, even though
-    /// // they point to the same address!
+    /// // they point to addresses that are in-bounds of the same object!
     ///
-    /// let zero = unsafe { ptr2_other.offset_from(ptr2) }; // Undefined Behavior
+    /// let one = unsafe { ptr2_other.offset_from(ptr2) }; // Undefined Behavior! ⚠️
     /// ```
     #[inline]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn offset_from(self, origin: NonNull<T>) -> isize
     where
         T: Sized,
@@ -867,8 +822,8 @@ impl<T: ?Sized> NonNull<T> {
     /// ignoring the metadata.
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn byte_offset_from<U: ?Sized>(self, origin: NonNull<U>) -> isize {
         // SAFETY: the caller must uphold the safety contract for `byte_offset_from`.
         unsafe { self.pointer.byte_offset_from(origin.pointer) }
@@ -957,8 +912,8 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::read`]: crate::ptr::read()
     #[inline]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn read(self) -> T
     where
         T: Sized,
@@ -979,7 +934,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::read_volatile`]: crate::ptr::read_volatile()
     #[inline]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     pub unsafe fn read_volatile(self) -> T
     where
         T: Sized,
@@ -998,8 +953,8 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::read_unaligned`]: crate::ptr::read_unaligned()
     #[inline]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
+    #[rustc_const_stable(feature = "non_null_convenience", since = "1.80.0")]
     pub const unsafe fn read_unaligned(self) -> T
     where
         T: Sized,
@@ -1018,7 +973,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::copy`]: crate::ptr::copy()
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_intrinsic_copy", issue = "80697")]
     pub const unsafe fn copy_to(self, dest: NonNull<T>, count: usize)
     where
@@ -1038,7 +993,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::copy_nonoverlapping`]: crate::ptr::copy_nonoverlapping()
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_intrinsic_copy", issue = "80697")]
     pub const unsafe fn copy_to_nonoverlapping(self, dest: NonNull<T>, count: usize)
     where
@@ -1058,7 +1013,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::copy`]: crate::ptr::copy()
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_intrinsic_copy", issue = "80697")]
     pub const unsafe fn copy_from(self, src: NonNull<T>, count: usize)
     where
@@ -1078,7 +1033,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::copy_nonoverlapping`]: crate::ptr::copy_nonoverlapping()
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_intrinsic_copy", issue = "80697")]
     pub const unsafe fn copy_from_nonoverlapping(self, src: NonNull<T>, count: usize)
     where
@@ -1094,7 +1049,7 @@ impl<T: ?Sized> NonNull<T> {
     ///
     /// [`ptr::drop_in_place`]: crate::ptr::drop_in_place()
     #[inline(always)]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     pub unsafe fn drop_in_place(self) {
         // SAFETY: the caller must uphold the safety contract for `drop_in_place`.
         unsafe { ptr::drop_in_place(self.as_ptr()) }
@@ -1108,7 +1063,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::write`]: crate::ptr::write()
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_ptr_write", issue = "86302")]
     pub const unsafe fn write(self, val: T)
     where
@@ -1127,7 +1082,7 @@ impl<T: ?Sized> NonNull<T> {
     #[inline(always)]
     #[doc(alias = "memset")]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_ptr_write", issue = "86302")]
     pub const unsafe fn write_bytes(self, val: u8, count: usize)
     where
@@ -1149,7 +1104,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::write_volatile`]: crate::ptr::write_volatile()
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     pub unsafe fn write_volatile(self, val: T)
     where
         T: Sized,
@@ -1168,7 +1123,7 @@ impl<T: ?Sized> NonNull<T> {
     /// [`ptr::write_unaligned`]: crate::ptr::write_unaligned()
     #[inline(always)]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_ptr_write", issue = "86302")]
     pub const unsafe fn write_unaligned(self, val: T)
     where
@@ -1185,7 +1140,7 @@ impl<T: ?Sized> NonNull<T> {
     ///
     /// [`ptr::replace`]: crate::ptr::replace()
     #[inline(always)]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     pub unsafe fn replace(self, src: T) -> T
     where
         T: Sized,
@@ -1202,7 +1157,7 @@ impl<T: ?Sized> NonNull<T> {
     ///
     /// [`ptr::swap`]: crate::ptr::swap()
     #[inline(always)]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_swap", issue = "83163")]
     pub const unsafe fn swap(self, with: NonNull<T>)
     where
@@ -1254,7 +1209,7 @@ impl<T: ?Sized> NonNull<T> {
     /// ```
     #[inline]
     #[must_use]
-    #[stable(feature = "non_null_convenience", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "non_null_convenience", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_align_offset", issue = "90962")]
     pub const fn align_offset(self, align: usize) -> usize
     where
@@ -1709,6 +1664,8 @@ impl<T> NonNull<[T]> {
     /// // Note that calling `memory.as_mut()` is not allowed here as the content may be uninitialized.
     /// # #[allow(unused_variables)]
     /// let slice: &mut [MaybeUninit<u8>] = unsafe { memory.as_uninit_slice_mut() };
+    /// # // Prevent leaks for Miri.
+    /// # unsafe { Global.deallocate(memory.cast(), Layout::new::<[u8; 32]>()); }
     /// # Ok::<_, std::alloc::AllocError>(())
     /// ```
     #[inline]
diff --git a/core/src/range.rs b/core/src/range.rs
new file mode 100644
index 0000000000000..bfbbf123b1ca5
--- /dev/null
+++ b/core/src/range.rs
@@ -0,0 +1,494 @@
+//! # Experimental replacement range types
+//!
+//! The types within this module are meant to replace the existing
+//! `Range`, `RangeInclusive`, and `RangeFrom` types in a future edition.
+//!
+//! ```
+//! #![feature(new_range_api)]
+//! use core::range::{Range, RangeFrom, RangeInclusive};
+//!
+//! let arr = [0, 1, 2, 3, 4];
+//! assert_eq!(arr[                      ..   ], [0, 1, 2, 3, 4]);
+//! assert_eq!(arr[                      .. 3 ], [0, 1, 2      ]);
+//! assert_eq!(arr[                      ..=3 ], [0, 1, 2, 3   ]);
+//! assert_eq!(arr[     RangeFrom::from(1..  )], [   1, 2, 3, 4]);
+//! assert_eq!(arr[         Range::from(1..3 )], [   1, 2      ]);
+//! assert_eq!(arr[RangeInclusive::from(1..=3)], [   1, 2, 3   ]);
+//! ```
+
+use crate::fmt;
+use crate::hash::Hash;
+
+mod iter;
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+pub mod legacy;
+
+#[doc(inline)]
+pub use crate::ops::{Bound, OneSidedRange, RangeBounds, RangeFull, RangeTo, RangeToInclusive};
+
+use Bound::{Excluded, Included, Unbounded};
+
+#[doc(inline)]
+pub use crate::iter::Step;
+
+#[doc(inline)]
+pub use iter::{IterRange, IterRangeFrom, IterRangeInclusive};
+
+/// A (half-open) range bounded inclusively below and exclusively above
+/// (`start..end` in a future edition).
+///
+/// The range `start..end` contains all values with `start <= x < end`.
+/// It is empty if `start >= end`.
+///
+/// # Examples
+///
+/// ```
+/// #![feature(new_range_api)]
+/// use core::range::Range;
+///
+/// assert_eq!(Range::from(3..5), Range { start: 3, end: 5 });
+/// assert_eq!(3 + 4 + 5, Range::from(3..6).into_iter().sum());
+/// ```
+#[derive(Clone, Copy, Default, PartialEq, Eq, Hash)]
+#[unstable(feature = "new_range_api", issue = "125687")]
+pub struct Range<Idx> {
+    /// The lower bound of the range (inclusive).
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub start: Idx,
+    /// The upper bound of the range (exclusive).
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub end: Idx,
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<Idx: fmt::Debug> fmt::Debug for Range<Idx> {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.start.fmt(fmt)?;
+        write!(fmt, "..")?;
+        self.end.fmt(fmt)?;
+        Ok(())
+    }
+}
+
+impl<Idx: Step> Range<Idx> {
+    /// Create an iterator over the elements within this range.
+    ///
+    /// Shorthand for `.clone().into_iter()`
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::Range;
+    ///
+    /// let mut i = Range::from(3..9).iter().map(|n| n*n);
+    /// assert_eq!(i.next(), Some(9));
+    /// assert_eq!(i.next(), Some(16));
+    /// assert_eq!(i.next(), Some(25));
+    /// ```
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    #[inline]
+    pub fn iter(&self) -> IterRange<Idx> {
+        self.clone().into_iter()
+    }
+}
+
+impl<Idx: PartialOrd<Idx>> Range<Idx> {
+    /// Returns `true` if `item` is contained in the range.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::Range;
+    ///
+    /// assert!(!Range::from(3..5).contains(&2));
+    /// assert!( Range::from(3..5).contains(&3));
+    /// assert!( Range::from(3..5).contains(&4));
+    /// assert!(!Range::from(3..5).contains(&5));
+    ///
+    /// assert!(!Range::from(3..3).contains(&3));
+    /// assert!(!Range::from(3..2).contains(&3));
+    ///
+    /// assert!( Range::from(0.0..1.0).contains(&0.5));
+    /// assert!(!Range::from(0.0..1.0).contains(&f32::NAN));
+    /// assert!(!Range::from(0.0..f32::NAN).contains(&0.5));
+    /// assert!(!Range::from(f32::NAN..1.0).contains(&0.5));
+    /// ```
+    #[inline]
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub fn contains<U>(&self, item: &U) -> bool
+    where
+        Idx: PartialOrd<U>,
+        U: ?Sized + PartialOrd<Idx>,
+    {
+        <Self as RangeBounds<Idx>>::contains(self, item)
+    }
+
+    /// Returns `true` if the range contains no items.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::Range;
+    ///
+    /// assert!(!Range::from(3..5).is_empty());
+    /// assert!( Range::from(3..3).is_empty());
+    /// assert!( Range::from(3..2).is_empty());
+    /// ```
+    ///
+    /// The range is empty if either side is incomparable:
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::Range;
+    ///
+    /// assert!(!Range::from(3.0..5.0).is_empty());
+    /// assert!( Range::from(3.0..f32::NAN).is_empty());
+    /// assert!( Range::from(f32::NAN..5.0).is_empty());
+    /// ```
+    #[inline]
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub fn is_empty(&self) -> bool {
+        !(self.start < self.end)
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> RangeBounds<T> for Range<T> {
+    fn start_bound(&self) -> Bound<&T> {
+        Included(&self.start)
+    }
+    fn end_bound(&self) -> Bound<&T> {
+        Excluded(&self.end)
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> RangeBounds<T> for Range<&T> {
+    fn start_bound(&self) -> Bound<&T> {
+        Included(self.start)
+    }
+    fn end_bound(&self) -> Bound<&T> {
+        Excluded(self.end)
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> From<Range<T>> for legacy::Range<T> {
+    #[inline]
+    fn from(value: Range<T>) -> Self {
+        Self { start: value.start, end: value.end }
+    }
+}
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> From<legacy::Range<T>> for Range<T> {
+    #[inline]
+    fn from(value: legacy::Range<T>) -> Self {
+        Self { start: value.start, end: value.end }
+    }
+}
+
+/// A range bounded inclusively below and above (`start..=end`).
+///
+/// The `RangeInclusive` `start..=end` contains all values with `x >= start`
+/// and `x <= end`. It is empty unless `start <= end`.
+///
+/// # Examples
+///
+/// The `start..=end` syntax is a `RangeInclusive`:
+///
+/// ```
+/// #![feature(new_range_api)]
+/// use core::range::RangeInclusive;
+///
+/// assert_eq!(RangeInclusive::from(3..=5), RangeInclusive { start: 3, end: 5 });
+/// assert_eq!(3 + 4 + 5, RangeInclusive::from(3..=5).into_iter().sum());
+/// ```
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+#[unstable(feature = "new_range_api", issue = "125687")]
+pub struct RangeInclusive<Idx> {
+    /// The lower bound of the range (inclusive).
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub start: Idx,
+    /// The upper bound of the range (inclusive).
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub end: Idx,
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<Idx: fmt::Debug> fmt::Debug for RangeInclusive<Idx> {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.start.fmt(fmt)?;
+        write!(fmt, "..=")?;
+        self.end.fmt(fmt)?;
+        Ok(())
+    }
+}
+
+impl<Idx: PartialOrd<Idx>> RangeInclusive<Idx> {
+    /// Returns `true` if `item` is contained in the range.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::RangeInclusive;
+    ///
+    /// assert!(!RangeInclusive::from(3..=5).contains(&2));
+    /// assert!( RangeInclusive::from(3..=5).contains(&3));
+    /// assert!( RangeInclusive::from(3..=5).contains(&4));
+    /// assert!( RangeInclusive::from(3..=5).contains(&5));
+    /// assert!(!RangeInclusive::from(3..=5).contains(&6));
+    ///
+    /// assert!( RangeInclusive::from(3..=3).contains(&3));
+    /// assert!(!RangeInclusive::from(3..=2).contains(&3));
+    ///
+    /// assert!( RangeInclusive::from(0.0..=1.0).contains(&1.0));
+    /// assert!(!RangeInclusive::from(0.0..=1.0).contains(&f32::NAN));
+    /// assert!(!RangeInclusive::from(0.0..=f32::NAN).contains(&0.0));
+    /// assert!(!RangeInclusive::from(f32::NAN..=1.0).contains(&1.0));
+    /// ```
+    #[inline]
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub fn contains<U>(&self, item: &U) -> bool
+    where
+        Idx: PartialOrd<U>,
+        U: ?Sized + PartialOrd<Idx>,
+    {
+        <Self as RangeBounds<Idx>>::contains(self, item)
+    }
+
+    /// Returns `true` if the range contains no items.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::RangeInclusive;
+    ///
+    /// assert!(!RangeInclusive::from(3..=5).is_empty());
+    /// assert!(!RangeInclusive::from(3..=3).is_empty());
+    /// assert!( RangeInclusive::from(3..=2).is_empty());
+    /// ```
+    ///
+    /// The range is empty if either side is incomparable:
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::RangeInclusive;
+    ///
+    /// assert!(!RangeInclusive::from(3.0..=5.0).is_empty());
+    /// assert!( RangeInclusive::from(3.0..=f32::NAN).is_empty());
+    /// assert!( RangeInclusive::from(f32::NAN..=5.0).is_empty());
+    /// ```
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        !(self.start <= self.end)
+    }
+}
+
+impl<Idx: Step> RangeInclusive<Idx> {
+    /// Create an iterator over the elements within this range.
+    ///
+    /// Shorthand for `.clone().into_iter()`
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::RangeInclusive;
+    ///
+    /// let mut i = RangeInclusive::from(3..=8).iter().map(|n| n*n);
+    /// assert_eq!(i.next(), Some(9));
+    /// assert_eq!(i.next(), Some(16));
+    /// assert_eq!(i.next(), Some(25));
+    /// ```
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    #[inline]
+    pub fn iter(&self) -> IterRangeInclusive<Idx> {
+        self.clone().into_iter()
+    }
+}
+
+impl RangeInclusive<usize> {
+    /// Converts to an exclusive `Range` for `SliceIndex` implementations.
+    /// The caller is responsible for dealing with `end == usize::MAX`.
+    #[inline]
+    pub(crate) const fn into_slice_range(self) -> Range<usize> {
+        Range { start: self.start, end: self.end + 1 }
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> RangeBounds<T> for RangeInclusive<T> {
+    fn start_bound(&self) -> Bound<&T> {
+        Included(&self.start)
+    }
+    fn end_bound(&self) -> Bound<&T> {
+        Included(&self.end)
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> RangeBounds<T> for RangeInclusive<&T> {
+    fn start_bound(&self) -> Bound<&T> {
+        Included(self.start)
+    }
+    fn end_bound(&self) -> Bound<&T> {
+        Included(self.end)
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> From<RangeInclusive<T>> for legacy::RangeInclusive<T> {
+    #[inline]
+    fn from(value: RangeInclusive<T>) -> Self {
+        Self::new(value.start, value.end)
+    }
+}
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> From<legacy::RangeInclusive<T>> for RangeInclusive<T> {
+    #[inline]
+    fn from(value: legacy::RangeInclusive<T>) -> Self {
+        assert!(
+            !value.exhausted,
+            "attempted to convert from an exhausted `legacy::RangeInclusive` (unspecified behavior)"
+        );
+
+        let (start, end) = value.into_inner();
+        RangeInclusive { start, end }
+    }
+}
+
+/// A range only bounded inclusively below (`start..`).
+///
+/// The `RangeFrom` `start..` contains all values with `x >= start`.
+///
+/// *Note*: Overflow in the [`Iterator`] implementation (when the contained
+/// data type reaches its numerical limit) is allowed to panic, wrap, or
+/// saturate. This behavior is defined by the implementation of the [`Step`]
+/// trait. For primitive integers, this follows the normal rules, and respects
+/// the overflow checks profile (panic in debug, wrap in release). Note also
+/// that overflow happens earlier than you might assume: the overflow happens
+/// in the call to `next` that yields the maximum value, as the range must be
+/// set to a state to yield the next value.
+///
+/// [`Step`]: crate::iter::Step
+///
+/// # Examples
+///
+/// The `start..` syntax is a `RangeFrom`:
+///
+/// ```
+/// #![feature(new_range_api)]
+/// use core::range::RangeFrom;
+///
+/// assert_eq!(RangeFrom::from(2..), core::range::RangeFrom { start: 2 });
+/// assert_eq!(2 + 3 + 4, RangeFrom::from(2..).into_iter().take(3).sum());
+/// ```
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+#[unstable(feature = "new_range_api", issue = "125687")]
+pub struct RangeFrom<Idx> {
+    /// The lower bound of the range (inclusive).
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub start: Idx,
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<Idx: fmt::Debug> fmt::Debug for RangeFrom<Idx> {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.start.fmt(fmt)?;
+        write!(fmt, "..")?;
+        Ok(())
+    }
+}
+
+impl<Idx: Step> RangeFrom<Idx> {
+    /// Create an iterator over the elements within this range.
+    ///
+    /// Shorthand for `.clone().into_iter()`
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::RangeFrom;
+    ///
+    /// let mut i = RangeFrom::from(3..).iter().map(|n| n*n);
+    /// assert_eq!(i.next(), Some(9));
+    /// assert_eq!(i.next(), Some(16));
+    /// assert_eq!(i.next(), Some(25));
+    /// ```
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    #[inline]
+    pub fn iter(&self) -> IterRangeFrom<Idx> {
+        self.clone().into_iter()
+    }
+}
+
+impl<Idx: PartialOrd<Idx>> RangeFrom<Idx> {
+    /// Returns `true` if `item` is contained in the range.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(new_range_api)]
+    /// use core::range::RangeFrom;
+    ///
+    /// assert!(!RangeFrom::from(3..).contains(&2));
+    /// assert!( RangeFrom::from(3..).contains(&3));
+    /// assert!( RangeFrom::from(3..).contains(&1_000_000_000));
+    ///
+    /// assert!( RangeFrom::from(0.0..).contains(&0.5));
+    /// assert!(!RangeFrom::from(0.0..).contains(&f32::NAN));
+    /// assert!(!RangeFrom::from(f32::NAN..).contains(&0.5));
+    /// ```
+    #[inline]
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    pub fn contains<U>(&self, item: &U) -> bool
+    where
+        Idx: PartialOrd<U>,
+        U: ?Sized + PartialOrd<Idx>,
+    {
+        <Self as RangeBounds<Idx>>::contains(self, item)
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> RangeBounds<T> for RangeFrom<T> {
+    fn start_bound(&self) -> Bound<&T> {
+        Included(&self.start)
+    }
+    fn end_bound(&self) -> Bound<&T> {
+        Unbounded
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> RangeBounds<T> for RangeFrom<&T> {
+    fn start_bound(&self) -> Bound<&T> {
+        Included(self.start)
+    }
+    fn end_bound(&self) -> Bound<&T> {
+        Unbounded
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> From<RangeFrom<T>> for legacy::RangeFrom<T> {
+    #[inline]
+    fn from(value: RangeFrom<T>) -> Self {
+        Self { start: value.start }
+    }
+}
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<T> From<legacy::RangeFrom<T>> for RangeFrom<T> {
+    #[inline]
+    fn from(value: legacy::RangeFrom<T>) -> Self {
+        Self { start: value.start }
+    }
+}
diff --git a/core/src/range/iter.rs b/core/src/range/iter.rs
new file mode 100644
index 0000000000000..2b7db475ffb2c
--- /dev/null
+++ b/core/src/range/iter.rs
@@ -0,0 +1,340 @@
+use crate::num::NonZero;
+use crate::range::{legacy, Range, RangeFrom, RangeInclusive};
+
+use crate::iter::{
+    FusedIterator, Step, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, TrustedStep,
+};
+
+/// By-value [`Range`] iterator.
+#[unstable(feature = "new_range_api", issue = "125687")]
+#[derive(Debug, Clone)]
+pub struct IterRange<A>(legacy::Range<A>);
+
+impl<A> IterRange<A> {
+    /// Returns the remainder of the range being iterated over.
+    pub fn remainder(self) -> Range<A> {
+        Range { start: self.0.start, end: self.0.end }
+    }
+}
+
+/// Safety: This macro must only be used on types that are `Copy` and result in ranges
+/// which have an exact `size_hint()` where the upper bound must not be `None`.
+macro_rules! unsafe_range_trusted_random_access_impl {
+    ($($t:ty)*) => ($(
+        #[doc(hidden)]
+        #[unstable(feature = "trusted_random_access", issue = "none")]
+        unsafe impl TrustedRandomAccess for IterRange<$t> {}
+
+        #[doc(hidden)]
+        #[unstable(feature = "trusted_random_access", issue = "none")]
+        unsafe impl TrustedRandomAccessNoCoerce for IterRange<$t> {
+            const MAY_HAVE_SIDE_EFFECT: bool = false;
+        }
+    )*)
+}
+
+unsafe_range_trusted_random_access_impl! {
+    usize u8 u16
+    isize i8 i16
+}
+
+#[cfg(target_pointer_width = "32")]
+unsafe_range_trusted_random_access_impl! {
+    u32 i32
+}
+
+#[cfg(target_pointer_width = "64")]
+unsafe_range_trusted_random_access_impl! {
+    u32 i32
+    u64 i64
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> Iterator for IterRange<A> {
+    type Item = A;
+
+    #[inline]
+    fn next(&mut self) -> Option<A> {
+        self.0.next()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+
+    #[inline]
+    fn count(self) -> usize {
+        self.0.count()
+    }
+
+    #[inline]
+    fn nth(&mut self, n: usize) -> Option<A> {
+        self.0.nth(n)
+    }
+
+    #[inline]
+    fn last(self) -> Option<A> {
+        self.0.last()
+    }
+
+    #[inline]
+    fn min(self) -> Option<A>
+    where
+        A: Ord,
+    {
+        self.0.min()
+    }
+
+    #[inline]
+    fn max(self) -> Option<A>
+    where
+        A: Ord,
+    {
+        self.0.max()
+    }
+
+    #[inline]
+    fn is_sorted(self) -> bool {
+        true
+    }
+
+    #[inline]
+    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.0.advance_by(n)
+    }
+
+    #[inline]
+    unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item
+    where
+        Self: TrustedRandomAccessNoCoerce,
+    {
+        // SAFETY: The TrustedRandomAccess contract requires that callers only pass an index
+        // that is in bounds.
+        // Additionally Self: TrustedRandomAccess is only implemented for Copy types
+        // which means even repeated reads of the same index would be safe.
+        unsafe { Step::forward_unchecked(self.0.start.clone(), idx) }
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> DoubleEndedIterator for IterRange<A> {
+    #[inline]
+    fn next_back(&mut self) -> Option<A> {
+        self.0.next_back()
+    }
+
+    #[inline]
+    fn nth_back(&mut self, n: usize) -> Option<A> {
+        self.0.nth_back(n)
+    }
+
+    #[inline]
+    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.0.advance_back_by(n)
+    }
+}
+
+#[unstable(feature = "trusted_len", issue = "37572")]
+unsafe impl<A: TrustedStep> TrustedLen for IterRange<A> {}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> FusedIterator for IterRange<A> {}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> IntoIterator for Range<A> {
+    type Item = A;
+    type IntoIter = IterRange<A>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        IterRange(self.into())
+    }
+}
+
+/// By-value [`RangeInclusive`] iterator.
+#[unstable(feature = "new_range_api", issue = "125687")]
+#[derive(Debug, Clone)]
+pub struct IterRangeInclusive<A>(legacy::RangeInclusive<A>);
+
+impl<A: Step> IterRangeInclusive<A> {
+    /// Returns the remainder of the range being iterated over.
+    ///
+    /// If the iterator is exhausted or empty, returns `None`.
+    pub fn remainder(self) -> Option<RangeInclusive<A>> {
+        if self.0.is_empty() {
+            return None;
+        }
+
+        Some(RangeInclusive { start: self.0.start, end: self.0.end })
+    }
+}
+
+#[unstable(feature = "trusted_random_access", issue = "none")]
+impl<A: Step> Iterator for IterRangeInclusive<A> {
+    type Item = A;
+
+    #[inline]
+    fn next(&mut self) -> Option<A> {
+        self.0.next()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+
+    #[inline]
+    fn count(self) -> usize {
+        self.0.count()
+    }
+
+    #[inline]
+    fn nth(&mut self, n: usize) -> Option<A> {
+        self.0.nth(n)
+    }
+
+    #[inline]
+    fn last(self) -> Option<A> {
+        self.0.last()
+    }
+
+    #[inline]
+    fn min(self) -> Option<A>
+    where
+        A: Ord,
+    {
+        self.0.min()
+    }
+
+    #[inline]
+    fn max(self) -> Option<A>
+    where
+        A: Ord,
+    {
+        self.0.max()
+    }
+
+    #[inline]
+    fn is_sorted(self) -> bool {
+        true
+    }
+
+    #[inline]
+    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.0.advance_by(n)
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> DoubleEndedIterator for IterRangeInclusive<A> {
+    #[inline]
+    fn next_back(&mut self) -> Option<A> {
+        self.0.next_back()
+    }
+
+    #[inline]
+    fn nth_back(&mut self, n: usize) -> Option<A> {
+        self.0.nth_back(n)
+    }
+
+    #[inline]
+    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.0.advance_back_by(n)
+    }
+}
+
+#[unstable(feature = "trusted_len", issue = "37572")]
+unsafe impl<A: TrustedStep> TrustedLen for IterRangeInclusive<A> {}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> FusedIterator for IterRangeInclusive<A> {}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> IntoIterator for RangeInclusive<A> {
+    type Item = A;
+    type IntoIter = IterRangeInclusive<A>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        IterRangeInclusive(self.into())
+    }
+}
+
+// These macros generate `ExactSizeIterator` impls for various range types.
+//
+// * `ExactSizeIterator::len` is required to always return an exact `usize`,
+//   so no range can be longer than `usize::MAX`.
+// * For integer types in `Range<_>` this is the case for types narrower than or as wide as `usize`.
+//   For integer types in `RangeInclusive<_>`
+//   this is the case for types *strictly narrower* than `usize`
+//   since e.g. `(0..=u64::MAX).len()` would be `u64::MAX + 1`.
+macro_rules! range_exact_iter_impl {
+    ($($t:ty)*) => ($(
+        #[unstable(feature = "new_range_api", issue = "125687")]
+        impl ExactSizeIterator for IterRange<$t> { }
+    )*)
+}
+
+macro_rules! range_incl_exact_iter_impl {
+    ($($t:ty)*) => ($(
+        #[unstable(feature = "new_range_api", issue = "125687")]
+        impl ExactSizeIterator for IterRangeInclusive<$t> { }
+    )*)
+}
+
+range_exact_iter_impl! {
+    usize u8 u16
+    isize i8 i16
+}
+
+range_incl_exact_iter_impl! {
+    u8
+    i8
+}
+
+/// By-value [`RangeFrom`] iterator.
+#[unstable(feature = "new_range_api", issue = "125687")]
+#[derive(Debug, Clone)]
+pub struct IterRangeFrom<A>(legacy::RangeFrom<A>);
+
+impl<A> IterRangeFrom<A> {
+    /// Returns the remainder of the range being iterated over.
+    pub fn remainder(self) -> RangeFrom<A> {
+        RangeFrom { start: self.0.start }
+    }
+}
+
+#[unstable(feature = "trusted_random_access", issue = "none")]
+impl<A: Step> Iterator for IterRangeFrom<A> {
+    type Item = A;
+
+    #[inline]
+    fn next(&mut self) -> Option<A> {
+        self.0.next()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.0.size_hint()
+    }
+
+    #[inline]
+    fn nth(&mut self, n: usize) -> Option<A> {
+        self.0.nth(n)
+    }
+}
+
+#[unstable(feature = "trusted_len", issue = "37572")]
+unsafe impl<A: TrustedStep> TrustedLen for IterRangeFrom<A> {}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> FusedIterator for IterRangeFrom<A> {}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+impl<A: Step> IntoIterator for RangeFrom<A> {
+    type Item = A;
+    type IntoIter = IterRangeFrom<A>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        IterRangeFrom(self.into())
+    }
+}
diff --git a/core/src/range/legacy.rs b/core/src/range/legacy.rs
new file mode 100644
index 0000000000000..6723c4903f756
--- /dev/null
+++ b/core/src/range/legacy.rs
@@ -0,0 +1,10 @@
+//! # Legacy range types
+//!
+//! The types within this module will be replaced by the types
+//! [`Range`], [`RangeInclusive`], and [`RangeFrom`] in the parent
+//! module, [`core::range`].
+//!
+//! The types here are equivalent to those in [`core::ops`].
+
+#[doc(inline)]
+pub use crate::ops::{Range, RangeFrom, RangeInclusive};
diff --git a/core/src/result.rs b/core/src/result.rs
index 4c6dc4bba4377..f8cdcc000c50e 100644
--- a/core/src/result.rs
+++ b/core/src/result.rs
@@ -228,6 +228,27 @@
 //! [`Err(E)`]: Err
 //! [io::Error]: ../../std/io/struct.Error.html "io::Error"
 //!
+//! # Representation
+//!
+//! In some cases, [`Result<T, E>`] will gain the same size, alignment, and ABI
+//! guarantees as [`Option<U>`] has. One of either the `T` or `E` type must be a
+//! type that qualifies for the `Option` [representation guarantees][opt-rep],
+//! and the *other* type must meet all of the following conditions:
+//! * Is a zero-sized type with alignment 1 (a "1-ZST").
+//! * Has no fields.
+//! * Does not have the `#[non_exhaustive]` attribute.
+//!
+//! For example, `NonZeroI32` qualifies for the `Option` representation
+//! guarantees, and `()` is a zero-sized type with alignment 1, no fields, and
+//! it isn't `non_exhaustive`. This means that both `Result<NonZeroI32, ()>` and
+//! `Result<(), NonZeroI32>` have the same size, alignment, and ABI guarantees
+//! as `Option<NonZeroI32>`. The only difference is the implied semantics:
+//! * `Option<NonZeroI32>` is "a non-zero i32 might be present"
+//! * `Result<NonZeroI32, ()>` is "a non-zero i32 success result, if any"
+//! * `Result<(), NonZeroI32>` is "a non-zero i32 error result, if any"
+//!
+//! [opt-rep]: ../option/index.html#representation "Option Representation"
+//!
 //! # Method overview
 //!
 //! In addition to working with pattern matching, [`Result`] provides a
diff --git a/core/src/slice/ascii.rs b/core/src/slice/ascii.rs
index 19c91ba2eb988..bf444d2f68af8 100644
--- a/core/src/slice/ascii.rs
+++ b/core/src/slice/ascii.rs
@@ -108,7 +108,7 @@ impl [u8] {
                   without modifying the original"]
     #[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
     pub fn escape_ascii(&self) -> EscapeAscii<'_> {
-        EscapeAscii { inner: self.iter().flat_map(|byte| byte.escape_ascii()) }
+        EscapeAscii { inner: self.iter().flat_map(EscapeByte) }
     }
 
     /// Returns a byte slice with leading ASCII whitespace bytes removed.
@@ -123,8 +123,8 @@ impl [u8] {
     /// assert_eq!(b"  ".trim_ascii_start(), b"");
     /// assert_eq!(b"".trim_ascii_start(), b"");
     /// ```
-    #[stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
+    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
     #[inline]
     pub const fn trim_ascii_start(&self) -> &[u8] {
         let mut bytes = self;
@@ -152,8 +152,8 @@ impl [u8] {
     /// assert_eq!(b"  ".trim_ascii_end(), b"");
     /// assert_eq!(b"".trim_ascii_end(), b"");
     /// ```
-    #[stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
+    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
     #[inline]
     pub const fn trim_ascii_end(&self) -> &[u8] {
         let mut bytes = self;
@@ -182,15 +182,20 @@ impl [u8] {
     /// assert_eq!(b"  ".trim_ascii(), b"");
     /// assert_eq!(b"".trim_ascii(), b"");
     /// ```
-    #[stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
+    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
     #[inline]
     pub const fn trim_ascii(&self) -> &[u8] {
         self.trim_ascii_start().trim_ascii_end()
     }
 }
 
-type EscapeByte = impl (Fn(&u8) -> ascii::EscapeDefault) + Copy;
+impl_fn_for_zst! {
+    #[derive(Clone)]
+    struct EscapeByte impl Fn = |byte: &u8| -> ascii::EscapeDefault {
+        ascii::escape_default(*byte)
+    };
+}
 
 /// An iterator over the escaped version of a byte slice.
 ///
diff --git a/core/src/slice/index.rs b/core/src/slice/index.rs
index 8d7b6165510a8..2624a44bb4bcb 100644
--- a/core/src/slice/index.rs
+++ b/core/src/slice/index.rs
@@ -1,9 +1,8 @@
 //! Indexing implementations for `[T]`.
 
 use crate::intrinsics::const_eval_select;
-use crate::intrinsics::unchecked_sub;
 use crate::ops;
-use crate::ptr;
+use crate::range;
 use crate::ub_checks::assert_unsafe_precondition;
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -107,8 +106,50 @@ const fn slice_end_index_overflow_fail() -> ! {
     panic!("attempted to index slice up to maximum usize");
 }
 
+// The UbChecks are great for catching bugs in the unsafe methods, but including
+// them in safe indexing is unnecessary and hurts inlining and debug runtime perf.
+// Both the safe and unsafe public methods share these helpers,
+// which use intrinsics directly to get *no* extra checks.
+
+#[inline(always)]
+const unsafe fn get_noubcheck<T>(ptr: *const [T], index: usize) -> *const T {
+    let ptr = ptr as *const T;
+    // SAFETY: The caller already checked these preconditions
+    unsafe { crate::intrinsics::offset(ptr, index) }
+}
+
+#[inline(always)]
+const unsafe fn get_mut_noubcheck<T>(ptr: *mut [T], index: usize) -> *mut T {
+    let ptr = ptr as *mut T;
+    // SAFETY: The caller already checked these preconditions
+    unsafe { crate::intrinsics::offset(ptr, index) }
+}
+
+#[inline(always)]
+const unsafe fn get_offset_len_noubcheck<T>(
+    ptr: *const [T],
+    offset: usize,
+    len: usize,
+) -> *const [T] {
+    // SAFETY: The caller already checked these preconditions
+    let ptr = unsafe { get_noubcheck(ptr, offset) };
+    crate::intrinsics::aggregate_raw_ptr(ptr, len)
+}
+
+#[inline(always)]
+const unsafe fn get_offset_len_mut_noubcheck<T>(
+    ptr: *mut [T],
+    offset: usize,
+    len: usize,
+) -> *mut [T] {
+    // SAFETY: The caller already checked these preconditions
+    let ptr = unsafe { get_mut_noubcheck(ptr, offset) };
+    crate::intrinsics::aggregate_raw_ptr(ptr, len)
+}
+
 mod private_slice_index {
-    use super::ops;
+    use super::{ops, range};
+
     #[stable(feature = "slice_get_slice", since = "1.28.0")]
     pub trait Sealed {}
 
@@ -129,6 +170,13 @@ mod private_slice_index {
     #[stable(feature = "slice_index_with_ops_bound_pair", since = "1.53.0")]
     impl Sealed for (ops::Bound<usize>, ops::Bound<usize>) {}
 
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    impl Sealed for range::Range<usize> {}
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    impl Sealed for range::RangeInclusive<usize> {}
+    #[unstable(feature = "new_range_api", issue = "125687")]
+    impl Sealed for range::RangeFrom<usize> {}
+
     impl Sealed for ops::IndexRange {}
 }
 
@@ -204,13 +252,17 @@ unsafe impl<T> SliceIndex<[T]> for usize {
     #[inline]
     fn get(self, slice: &[T]) -> Option<&T> {
         // SAFETY: `self` is checked to be in bounds.
-        if self < slice.len() { unsafe { Some(&*self.get_unchecked(slice)) } } else { None }
+        if self < slice.len() { unsafe { Some(&*get_noubcheck(slice, self)) } } else { None }
     }
 
     #[inline]
     fn get_mut(self, slice: &mut [T]) -> Option<&mut T> {
-        // SAFETY: `self` is checked to be in bounds.
-        if self < slice.len() { unsafe { Some(&mut *self.get_unchecked_mut(slice)) } } else { None }
+        if self < slice.len() {
+            // SAFETY: `self` is checked to be in bounds.
+            unsafe { Some(&mut *get_mut_noubcheck(slice, self)) }
+        } else {
+            None
+        }
     }
 
     #[inline]
@@ -228,7 +280,7 @@ unsafe impl<T> SliceIndex<[T]> for usize {
             // Use intrinsics::assume instead of hint::assert_unchecked so that we don't check the
             // precondition of this function twice.
             crate::intrinsics::assume(self < slice.len());
-            slice.as_ptr().add(self)
+            get_noubcheck(slice, self)
         }
     }
 
@@ -240,7 +292,7 @@ unsafe impl<T> SliceIndex<[T]> for usize {
             (this: usize = self, len: usize = slice.len()) => this < len
         );
         // SAFETY: see comments for `get_unchecked` above.
-        unsafe { slice.as_mut_ptr().add(self) }
+        unsafe { get_mut_noubcheck(slice, self) }
     }
 
     #[inline]
@@ -266,7 +318,7 @@ unsafe impl<T> SliceIndex<[T]> for ops::IndexRange {
     fn get(self, slice: &[T]) -> Option<&[T]> {
         if self.end() <= slice.len() {
             // SAFETY: `self` is checked to be valid and in bounds above.
-            unsafe { Some(&*self.get_unchecked(slice)) }
+            unsafe { Some(&*get_offset_len_noubcheck(slice, self.start(), self.len())) }
         } else {
             None
         }
@@ -276,7 +328,7 @@ unsafe impl<T> SliceIndex<[T]> for ops::IndexRange {
     fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
         if self.end() <= slice.len() {
             // SAFETY: `self` is checked to be valid and in bounds above.
-            unsafe { Some(&mut *self.get_unchecked_mut(slice)) }
+            unsafe { Some(&mut *get_offset_len_mut_noubcheck(slice, self.start(), self.len())) }
         } else {
             None
         }
@@ -293,7 +345,7 @@ unsafe impl<T> SliceIndex<[T]> for ops::IndexRange {
         // cannot be longer than `isize::MAX`. They also guarantee that
         // `self` is in bounds of `slice` so `self` cannot overflow an `isize`,
         // so the call to `add` is safe.
-        unsafe { ptr::slice_from_raw_parts(slice.as_ptr().add(self.start()), self.len()) }
+        unsafe { get_offset_len_noubcheck(slice, self.start(), self.len()) }
     }
 
     #[inline]
@@ -305,14 +357,14 @@ unsafe impl<T> SliceIndex<[T]> for ops::IndexRange {
         );
 
         // SAFETY: see comments for `get_unchecked` above.
-        unsafe { ptr::slice_from_raw_parts_mut(slice.as_mut_ptr().add(self.start()), self.len()) }
+        unsafe { get_offset_len_mut_noubcheck(slice, self.start(), self.len()) }
     }
 
     #[inline]
     fn index(self, slice: &[T]) -> &[T] {
         if self.end() <= slice.len() {
             // SAFETY: `self` is checked to be valid and in bounds above.
-            unsafe { &*self.get_unchecked(slice) }
+            unsafe { &*get_offset_len_noubcheck(slice, self.start(), self.len()) }
         } else {
             slice_end_index_len_fail(self.end(), slice.len())
         }
@@ -322,7 +374,7 @@ unsafe impl<T> SliceIndex<[T]> for ops::IndexRange {
     fn index_mut(self, slice: &mut [T]) -> &mut [T] {
         if self.end() <= slice.len() {
             // SAFETY: `self` is checked to be valid and in bounds above.
-            unsafe { &mut *self.get_unchecked_mut(slice) }
+            unsafe { &mut *get_offset_len_mut_noubcheck(slice, self.start(), self.len()) }
         } else {
             slice_end_index_len_fail(self.end(), slice.len())
         }
@@ -339,21 +391,26 @@ unsafe impl<T> SliceIndex<[T]> for ops::Range<usize> {
 
     #[inline]
     fn get(self, slice: &[T]) -> Option<&[T]> {
-        if self.start > self.end || self.end > slice.len() {
-            None
-        } else {
+        // Using checked_sub is a safe way to get `SubUnchecked` in MIR
+        if let Some(new_len) = usize::checked_sub(self.end, self.start)
+            && self.end <= slice.len()
+        {
             // SAFETY: `self` is checked to be valid and in bounds above.
-            unsafe { Some(&*self.get_unchecked(slice)) }
+            unsafe { Some(&*get_offset_len_noubcheck(slice, self.start, new_len)) }
+        } else {
+            None
         }
     }
 
     #[inline]
     fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
-        if self.start > self.end || self.end > slice.len() {
-            None
-        } else {
+        if let Some(new_len) = usize::checked_sub(self.end, self.start)
+            && self.end <= slice.len()
+        {
             // SAFETY: `self` is checked to be valid and in bounds above.
-            unsafe { Some(&mut *self.get_unchecked_mut(slice)) }
+            unsafe { Some(&mut *get_offset_len_mut_noubcheck(slice, self.start, new_len)) }
+        } else {
+            None
         }
     }
 
@@ -374,8 +431,10 @@ unsafe impl<T> SliceIndex<[T]> for ops::Range<usize> {
         // `self` is in bounds of `slice` so `self` cannot overflow an `isize`,
         // so the call to `add` is safe and the length calculation cannot overflow.
         unsafe {
-            let new_len = unchecked_sub(self.end, self.start);
-            ptr::slice_from_raw_parts(slice.as_ptr().add(self.start), new_len)
+            // Using the intrinsic avoids a superfluous UB check,
+            // since the one on this method already checked `end >= start`.
+            let new_len = crate::intrinsics::unchecked_sub(self.end, self.start);
+            get_offset_len_noubcheck(slice, self.start, new_len)
         }
     }
 
@@ -392,31 +451,71 @@ unsafe impl<T> SliceIndex<[T]> for ops::Range<usize> {
         );
         // SAFETY: see comments for `get_unchecked` above.
         unsafe {
-            let new_len = unchecked_sub(self.end, self.start);
-            ptr::slice_from_raw_parts_mut(slice.as_mut_ptr().add(self.start), new_len)
+            let new_len = crate::intrinsics::unchecked_sub(self.end, self.start);
+            get_offset_len_mut_noubcheck(slice, self.start, new_len)
         }
     }
 
     #[inline(always)]
     fn index(self, slice: &[T]) -> &[T] {
-        if self.start > self.end {
-            slice_index_order_fail(self.start, self.end);
-        } else if self.end > slice.len() {
+        // Using checked_sub is a safe way to get `SubUnchecked` in MIR
+        let Some(new_len) = usize::checked_sub(self.end, self.start) else {
+            slice_index_order_fail(self.start, self.end)
+        };
+        if self.end > slice.len() {
             slice_end_index_len_fail(self.end, slice.len());
         }
         // SAFETY: `self` is checked to be valid and in bounds above.
-        unsafe { &*self.get_unchecked(slice) }
+        unsafe { &*get_offset_len_noubcheck(slice, self.start, new_len) }
     }
 
     #[inline]
     fn index_mut(self, slice: &mut [T]) -> &mut [T] {
-        if self.start > self.end {
-            slice_index_order_fail(self.start, self.end);
-        } else if self.end > slice.len() {
+        let Some(new_len) = usize::checked_sub(self.end, self.start) else {
+            slice_index_order_fail(self.start, self.end)
+        };
+        if self.end > slice.len() {
             slice_end_index_len_fail(self.end, slice.len());
         }
         // SAFETY: `self` is checked to be valid and in bounds above.
-        unsafe { &mut *self.get_unchecked_mut(slice) }
+        unsafe { &mut *get_offset_len_mut_noubcheck(slice, self.start, new_len) }
+    }
+}
+
+#[unstable(feature = "new_range_api", issue = "125687")]
+unsafe impl<T> SliceIndex<[T]> for range::Range<usize> {
+    type Output = [T];
+
+    #[inline]
+    fn get(self, slice: &[T]) -> Option<&[T]> {
+        ops::Range::from(self).get(slice)
+    }
+
+    #[inline]
+    fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
+        ops::Range::from(self).get_mut(slice)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const [T]) -> *const [T] {
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked`.
+        unsafe { ops::Range::from(self).get_unchecked(slice) }
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut [T] {
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked_mut`.
+        unsafe { ops::Range::from(self).get_unchecked_mut(slice) }
+    }
+
+    #[inline(always)]
+    fn index(self, slice: &[T]) -> &[T] {
+        ops::Range::from(self).index(slice)
+    }
+
+    #[inline]
+    fn index_mut(self, slice: &mut [T]) -> &mut [T] {
+        ops::Range::from(self).index_mut(slice)
     }
 }
 
@@ -506,6 +605,43 @@ unsafe impl<T> SliceIndex<[T]> for ops::RangeFrom<usize> {
     }
 }
 
+#[unstable(feature = "new_range_api", issue = "125687")]
+unsafe impl<T> SliceIndex<[T]> for range::RangeFrom<usize> {
+    type Output = [T];
+
+    #[inline]
+    fn get(self, slice: &[T]) -> Option<&[T]> {
+        ops::RangeFrom::from(self).get(slice)
+    }
+
+    #[inline]
+    fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
+        ops::RangeFrom::from(self).get_mut(slice)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const [T]) -> *const [T] {
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked`.
+        unsafe { ops::RangeFrom::from(self).get_unchecked(slice) }
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut [T] {
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked_mut`.
+        unsafe { ops::RangeFrom::from(self).get_unchecked_mut(slice) }
+    }
+
+    #[inline]
+    fn index(self, slice: &[T]) -> &[T] {
+        ops::RangeFrom::from(self).index(slice)
+    }
+
+    #[inline]
+    fn index_mut(self, slice: &mut [T]) -> &mut [T] {
+        ops::RangeFrom::from(self).index_mut(slice)
+    }
+}
+
 #[stable(feature = "slice_get_slice_impls", since = "1.15.0")]
 #[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
 unsafe impl<T> SliceIndex<[T]> for ops::RangeFull {
@@ -590,6 +726,43 @@ unsafe impl<T> SliceIndex<[T]> for ops::RangeInclusive<usize> {
     }
 }
 
+#[unstable(feature = "new_range_api", issue = "125687")]
+unsafe impl<T> SliceIndex<[T]> for range::RangeInclusive<usize> {
+    type Output = [T];
+
+    #[inline]
+    fn get(self, slice: &[T]) -> Option<&[T]> {
+        ops::RangeInclusive::from(self).get(slice)
+    }
+
+    #[inline]
+    fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
+        ops::RangeInclusive::from(self).get_mut(slice)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const [T]) -> *const [T] {
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked`.
+        unsafe { ops::RangeInclusive::from(self).get_unchecked(slice) }
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut [T]) -> *mut [T] {
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked_mut`.
+        unsafe { ops::RangeInclusive::from(self).get_unchecked_mut(slice) }
+    }
+
+    #[inline]
+    fn index(self, slice: &[T]) -> &[T] {
+        ops::RangeInclusive::from(self).index(slice)
+    }
+
+    #[inline]
+    fn index_mut(self, slice: &mut [T]) -> &mut [T] {
+        ops::RangeInclusive::from(self).index_mut(slice)
+    }
+}
+
 /// The methods `index` and `index_mut` panic if the end of the range is out of bounds.
 #[stable(feature = "inclusive_range", since = "1.26.0")]
 #[rustc_const_unstable(feature = "const_slice_index", issue = "none")]
@@ -727,7 +900,7 @@ where
 
 /// Performs bounds-checking of a range without panicking.
 ///
-/// This is a version of [`range`] that returns [`None`] instead of panicking.
+/// This is a version of [`range()`] that returns [`None`] instead of panicking.
 ///
 /// # Examples
 ///
diff --git a/core/src/slice/iter.rs b/core/src/slice/iter.rs
index 96fc87ab2e9ec..504676ce187a8 100644
--- a/core/src/slice/iter.rs
+++ b/core/src/slice/iter.rs
@@ -16,7 +16,7 @@ use crate::ptr::{self, without_provenance, without_provenance_mut, NonNull};
 
 use super::{from_raw_parts, from_raw_parts_mut};
 
-#[stable(feature = "boxed_slice_into_iter", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "boxed_slice_into_iter", since = "1.80.0")]
 impl<T> !Iterator for [T] {}
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -388,6 +388,9 @@ pub(super) trait SplitIter: DoubleEndedIterator {
 /// ```
 /// let slice = [10, 40, 33, 20];
 /// let mut iter = slice.split(|num| num % 3 == 0);
+/// assert_eq!(iter.next(), Some(&[10, 40][..]));
+/// assert_eq!(iter.next(), Some(&[20][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`split`]: slice::split
@@ -541,6 +544,9 @@ impl<T, P> FusedIterator for Split<'_, T, P> where P: FnMut(&T) -> bool {}
 /// ```
 /// let slice = [10, 40, 33, 20];
 /// let mut iter = slice.split_inclusive(|num| num % 3 == 0);
+/// assert_eq!(iter.next(), Some(&[10, 40, 33][..]));
+/// assert_eq!(iter.next(), Some(&[20][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`split_inclusive`]: slice::split_inclusive
@@ -914,7 +920,10 @@ impl<T, P> FusedIterator for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> b
 ///
 /// ```
 /// let slice = [11, 22, 33, 0, 44, 55];
-/// let iter = slice.rsplit(|num| *num == 0);
+/// let mut iter = slice.rsplit(|num| *num == 0);
+/// assert_eq!(iter.next(), Some(&[44, 55][..]));
+/// assert_eq!(iter.next(), Some(&[11, 22, 33][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`rsplit`]: slice::rsplit
@@ -1134,7 +1143,10 @@ impl<T, I: SplitIter<Item = T>> Iterator for GenericSplitN<I> {
 ///
 /// ```
 /// let slice = [10, 40, 30, 20, 60, 50];
-/// let iter = slice.splitn(2, |num| *num % 3 == 0);
+/// let mut iter = slice.splitn(2, |num| *num % 3 == 0);
+/// assert_eq!(iter.next(), Some(&[10, 40][..]));
+/// assert_eq!(iter.next(), Some(&[20, 60, 50][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`splitn`]: slice::splitn
@@ -1175,7 +1187,10 @@ where
 ///
 /// ```
 /// let slice = [10, 40, 30, 20, 60, 50];
-/// let iter = slice.rsplitn(2, |num| *num % 3 == 0);
+/// let mut iter = slice.rsplitn(2, |num| *num % 3 == 0);
+/// assert_eq!(iter.next(), Some(&[50][..]));
+/// assert_eq!(iter.next(), Some(&[10, 40, 30, 20][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`rsplitn`]: slice::rsplitn
@@ -1300,7 +1315,11 @@ forward_iterator! { RSplitNMut: T, &'a mut [T] }
 ///
 /// ```
 /// let slice = ['r', 'u', 's', 't'];
-/// let iter = slice.windows(2);
+/// let mut iter = slice.windows(2);
+/// assert_eq!(iter.next(), Some(&['r', 'u'][..]));
+/// assert_eq!(iter.next(), Some(&['u', 's'][..]));
+/// assert_eq!(iter.next(), Some(&['s', 't'][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`windows`]: slice::windows
@@ -1448,7 +1467,11 @@ unsafe impl<'a, T> TrustedRandomAccessNoCoerce for Windows<'a, T> {
 ///
 /// ```
 /// let slice = ['l', 'o', 'r', 'e', 'm'];
-/// let iter = slice.chunks(2);
+/// let mut iter = slice.chunks(2);
+/// assert_eq!(iter.next(), Some(&['l', 'o'][..]));
+/// assert_eq!(iter.next(), Some(&['r', 'e'][..]));
+/// assert_eq!(iter.next(), Some(&['m'][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`chunks`]: slice::chunks
@@ -1819,7 +1842,10 @@ unsafe impl<T> Sync for ChunksMut<'_, T> where T: Sync {}
 ///
 /// ```
 /// let slice = ['l', 'o', 'r', 'e', 'm'];
-/// let iter = slice.chunks_exact(2);
+/// let mut iter = slice.chunks_exact(2);
+/// assert_eq!(iter.next(), Some(&['l', 'o'][..]));
+/// assert_eq!(iter.next(), Some(&['r', 'e'][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`chunks_exact`]: slice::chunks_exact
@@ -2163,7 +2189,11 @@ unsafe impl<T> Sync for ChunksExactMut<'_, T> where T: Sync {}
 /// #![feature(array_windows)]
 ///
 /// let slice = [0, 1, 2, 3];
-/// let iter = slice.array_windows::<2>();
+/// let mut iter = slice.array_windows::<2>();
+/// assert_eq!(iter.next(), Some(&[0, 1]));
+/// assert_eq!(iter.next(), Some(&[1, 2]));
+/// assert_eq!(iter.next(), Some(&[2, 3]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`array_windows`]: slice::array_windows
@@ -2285,7 +2315,10 @@ impl<T, const N: usize> ExactSizeIterator for ArrayWindows<'_, T, N> {
 /// #![feature(array_chunks)]
 ///
 /// let slice = ['l', 'o', 'r', 'e', 'm'];
-/// let iter = slice.array_chunks::<2>();
+/// let mut iter = slice.array_chunks::<2>();
+/// assert_eq!(iter.next(), Some(&['l', 'o']));
+/// assert_eq!(iter.next(), Some(&['r', 'e']));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`array_chunks`]: slice::array_chunks
@@ -2526,7 +2559,11 @@ unsafe impl<'a, T, const N: usize> TrustedRandomAccessNoCoerce for ArrayChunksMu
 ///
 /// ```
 /// let slice = ['l', 'o', 'r', 'e', 'm'];
-/// let iter = slice.rchunks(2);
+/// let mut iter = slice.rchunks(2);
+/// assert_eq!(iter.next(), Some(&['e', 'm'][..]));
+/// assert_eq!(iter.next(), Some(&['o', 'r'][..]));
+/// assert_eq!(iter.next(), Some(&['l'][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`rchunks`]: slice::rchunks
@@ -2892,7 +2929,10 @@ unsafe impl<T> Sync for RChunksMut<'_, T> where T: Sync {}
 ///
 /// ```
 /// let slice = ['l', 'o', 'r', 'e', 'm'];
-/// let iter = slice.rchunks_exact(2);
+/// let mut iter = slice.rchunks_exact(2);
+/// assert_eq!(iter.next(), Some(&['e', 'm'][..]));
+/// assert_eq!(iter.next(), Some(&['o', 'r'][..]));
+/// assert_eq!(iter.next(), None);
 /// ```
 ///
 /// [`rchunks_exact`]: slice::rchunks_exact
diff --git a/core/src/slice/iter/macros.rs b/core/src/slice/iter/macros.rs
index 0b8ff5cc01242..c2a3819464410 100644
--- a/core/src/slice/iter/macros.rs
+++ b/core/src/slice/iter/macros.rs
@@ -103,7 +103,8 @@ macro_rules! iterator {
                 // so this new pointer is inside `self` and thus guaranteed to be non-null.
                 unsafe {
                     if_zst!(mut self,
-                        len => *len = len.unchecked_sub(offset),
+                        // Using the intrinsic directly avoids emitting a UbCheck
+                        len => *len = crate::intrinsics::unchecked_sub(*len, offset),
                         _end => self.ptr = self.ptr.add(offset),
                     );
                 }
@@ -119,7 +120,8 @@ macro_rules! iterator {
                     // SAFETY: By our precondition, `offset` can be at most the
                     // current length, so the subtraction can never overflow.
                     len => unsafe {
-                        *len = len.unchecked_sub(offset);
+                        // Using the intrinsic directly avoids emitting a UbCheck
+                        *len = crate::intrinsics::unchecked_sub(*len, offset);
                         self.ptr
                     },
                     // SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
diff --git a/core/src/slice/mod.rs b/core/src/slice/mod.rs
index f82f965e67cf4..68508e85f8e14 100644
--- a/core/src/slice/mod.rs
+++ b/core/src/slice/mod.rs
@@ -39,7 +39,6 @@ pub(crate) mod index;
 mod iter;
 mod raw;
 mod rotate;
-mod select;
 mod specialize;
 
 #[unstable(feature = "str_internals", issue = "none")]
@@ -83,10 +82,6 @@ pub use raw::{from_mut, from_ref};
 #[unstable(feature = "slice_from_ptr_range", issue = "89792")]
 pub use raw::{from_mut_ptr_range, from_ptr_range};
 
-// This function is public only because there is no other way to unit test heapsort.
-#[unstable(feature = "sort_internals", reason = "internal to sort module", issue = "none")]
-pub use sort::heapsort;
-
 #[stable(feature = "slice_get_slice", since = "1.28.0")]
 pub use index::SliceIndex;
 
@@ -2082,8 +2077,8 @@ impl<T> [T] {
     ///
     /// assert_eq!(None, v.split_at_checked(7));
     /// ```
-    #[stable(feature = "split_at_checked", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "split_at_checked", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "split_at_checked", since = "1.80.0")]
+    #[rustc_const_stable(feature = "split_at_checked", since = "1.80.0")]
     #[inline]
     #[must_use]
     pub const fn split_at_checked(&self, mid: usize) -> Option<(&[T], &[T])> {
@@ -2121,7 +2116,7 @@ impl<T> [T] {
     ///
     /// assert_eq!(None, v.split_at_mut_checked(7));
     /// ```
-    #[stable(feature = "split_at_checked", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "split_at_checked", since = "1.80.0")]
     #[rustc_const_unstable(feature = "const_slice_split_at_mut", issue = "101804")]
     #[inline]
     #[must_use]
@@ -2884,21 +2879,26 @@ impl<T> [T] {
         self.binary_search_by(|k| f(k).cmp(b))
     }
 
-    /// Sorts the slice, but might not preserve the order of equal elements.
+    /// Sorts the slice **without** preserving the initial order of equal elements.
+    ///
+    /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not
+    /// allocate), and *O*(*n* \* log(*n*)) worst-case.
     ///
-    /// This sort is unstable (i.e., may reorder equal elements), in-place
-    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
+    /// If `T: Ord` does not implement a total order the resulting order is unspecified. All
+    /// original elements will remain in the slice and any possible modifications via interior
+    /// mutability are observed in the input. Same is true if `T: Ord` panics.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
-    /// which combines the fast average case of randomized quicksort with the fast worst case of
-    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
-    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
-    /// deterministic behavior.
+    /// The current implementation is based on [ipnsort] by Lukas Bergdoll and Orson Peters, which
+    /// combines the fast average case of quicksort with the fast worst case of heapsort, achieving
+    /// linear time on fully sorted and reversed inputs. On inputs with k distinct elements, the
+    /// expected time to sort the data is *O*(*n* \* log(*k*)).
     ///
     /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
-    /// slice consists of several concatenated sorted sequences.
+    /// slice is partially sorted.
+    ///
+    /// If `T: Ord` does not implement a total order, the implementation may panic.
     ///
     /// # Examples
     ///
@@ -2909,25 +2909,29 @@ impl<T> [T] {
     /// assert!(v == [-5, -3, 1, 2, 4]);
     /// ```
     ///
-    /// [pdqsort]: https://github.com/orlp/pdqsort
+    /// [ipnsort]: https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort
     #[stable(feature = "sort_unstable", since = "1.20.0")]
     #[inline]
     pub fn sort_unstable(&mut self)
     where
         T: Ord,
     {
-        sort::quicksort(self, T::lt);
+        sort::unstable::sort(self, &mut T::lt);
     }
 
-    /// Sorts the slice with a comparator function, but might not preserve the order of equal
-    /// elements.
+    /// Sorts the slice with a comparator function, **without** preserving the initial order of
+    /// equal elements.
+    ///
+    /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not
+    /// allocate), and *O*(*n* \* log(*n*)) worst-case.
     ///
-    /// This sort is unstable (i.e., may reorder equal elements), in-place
-    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
+    /// The comparator function should define a total ordering for the elements in the slice. If the
+    /// ordering is not total, the order of the elements is unspecified.
     ///
-    /// The comparator function must define a total ordering for the elements in the slice. If
-    /// the ordering is not total, the order of the elements is unspecified. An order is a
-    /// total order if it is (for all `a`, `b` and `c`):
+    /// If the comparator function does not implement a total order the resulting order is
+    /// unspecified. All original elements will remain in the slice and any possible modifications
+    /// via interior mutability are observed in the input. Same is true if the comparator function
+    /// panics. A total order (for all `a`, `b` and `c`):
     ///
     /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
     /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
@@ -2943,14 +2947,15 @@ impl<T> [T] {
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
-    /// which combines the fast average case of randomized quicksort with the fast worst case of
-    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
-    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
-    /// deterministic behavior.
+    /// The current implementation is based on [ipnsort] by Lukas Bergdoll and Orson Peters, which
+    /// combines the fast average case of quicksort with the fast worst case of heapsort, achieving
+    /// linear time on fully sorted and reversed inputs. On inputs with k distinct elements, the
+    /// expected time to sort the data is *O*(*n* \* log(*k*)).
     ///
     /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
-    /// slice consists of several concatenated sorted sequences.
+    /// slice is partially sorted.
+    ///
+    /// If `T: Ord` does not implement a total order, the implementation may panic.
     ///
     /// # Examples
     ///
@@ -2964,34 +2969,37 @@ impl<T> [T] {
     /// assert!(v == [5, 4, 3, 2, 1]);
     /// ```
     ///
-    /// [pdqsort]: https://github.com/orlp/pdqsort
+    /// [ipnsort]: https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort
     #[stable(feature = "sort_unstable", since = "1.20.0")]
     #[inline]
     pub fn sort_unstable_by<F>(&mut self, mut compare: F)
     where
         F: FnMut(&T, &T) -> Ordering,
     {
-        sort::quicksort(self, |a, b| compare(a, b) == Ordering::Less);
+        sort::unstable::sort(self, &mut |a, b| compare(a, b) == Ordering::Less);
     }
 
-    /// Sorts the slice with a key extraction function, but might not preserve the order of equal
-    /// elements.
+    /// Sorts the slice with a key extraction function, **without** preserving the initial order of
+    /// equal elements.
+    ///
+    /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not
+    /// allocate), and *O*(*n* \* log(*n*)) worst-case.
     ///
-    /// This sort is unstable (i.e., may reorder equal elements), in-place
-    /// (i.e., does not allocate), and *O*(*m* \* *n* \* log(*n*)) worst-case, where the key function is
-    /// *O*(*m*).
+    /// If `K: Ord` does not implement a total order the resulting order is unspecified.
+    /// All original elements will remain in the slice and any possible modifications via interior
+    /// mutability are observed in the input. Same is true if `K: Ord` panics.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
-    /// which combines the fast average case of randomized quicksort with the fast worst case of
-    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
-    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
-    /// deterministic behavior.
+    /// The current implementation is based on [ipnsort] by Lukas Bergdoll and Orson Peters, which
+    /// combines the fast average case of quicksort with the fast worst case of heapsort, achieving
+    /// linear time on fully sorted and reversed inputs. On inputs with k distinct elements, the
+    /// expected time to sort the data is *O*(*n* \* log(*k*)).
     ///
-    /// Due to its key calling strategy, [`sort_unstable_by_key`](#method.sort_unstable_by_key)
-    /// is likely to be slower than [`sort_by_cached_key`](#method.sort_by_cached_key) in
-    /// cases where the key function is expensive.
+    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
+    /// slice is partially sorted.
+    ///
+    /// If `K: Ord` does not implement a total order, the implementation may panic.
     ///
     /// # Examples
     ///
@@ -3002,7 +3010,7 @@ impl<T> [T] {
     /// assert!(v == [1, 2, -3, 4, -5]);
     /// ```
     ///
-    /// [pdqsort]: https://github.com/orlp/pdqsort
+    /// [ipnsort]: https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort
     #[stable(feature = "sort_unstable", since = "1.20.0")]
     #[inline]
     pub fn sort_unstable_by_key<K, F>(&mut self, mut f: F)
@@ -3010,27 +3018,32 @@ impl<T> [T] {
         F: FnMut(&T) -> K,
         K: Ord,
     {
-        sort::quicksort(self, |a, b| f(a).lt(&f(b)));
+        sort::unstable::sort(self, &mut |a, b| f(a).lt(&f(b)));
     }
 
-    /// Reorder the slice such that the element at `index` after the reordering is at its final sorted position.
+    /// Reorder the slice such that the element at `index` after the reordering is at its final
+    /// sorted position.
     ///
     /// This reordering has the additional property that any value at position `i < index` will be
     /// less than or equal to any value at a position `j > index`. Additionally, this reordering is
-    /// unstable (i.e. any number of equal elements may end up at position `index`), in-place
-    /// (i.e. does not allocate), and runs in *O*(*n*) time.
-    /// This function is also known as "kth element" in other libraries.
+    /// unstable (i.e. any number of equal elements may end up at position `index`), in-place (i.e.
+    /// does not allocate), and runs in *O*(*n*) time. This function is also known as "kth element"
+    /// in other libraries.
     ///
-    /// It returns a triplet of the following from the reordered slice:
-    /// the subslice prior to `index`, the element at `index`, and the subslice after `index`;
-    /// accordingly, the values in those two subslices will respectively all be less-than-or-equal-to
-    /// and greater-than-or-equal-to the value of the element at `index`.
+    /// It returns a triplet of the following from the reordered slice: the subslice prior to
+    /// `index`, the element at `index`, and the subslice after `index`; accordingly, the values in
+    /// those two subslices will respectively all be less-than-or-equal-to and
+    /// greater-than-or-equal-to the value of the element at `index`.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is an introselect implementation based on Pattern Defeating Quicksort, which is also
-    /// the basis for [`sort_unstable`]. The fallback algorithm is Median of Medians using Tukey's Ninther for
-    /// pivot selection, which guarantees linear runtime for all inputs.
+    /// The current algorithm is an introselect implementation based on [ipnsort] by Lukas Bergdoll
+    /// and Orson Peters, which is also the basis for [`sort_unstable`]. The fallback algorithm is
+    /// Median of Medians using Tukey's Ninther for pivot selection, which guarantees linear runtime
+    /// for all inputs.
+    ///
+    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
+    /// slice is nearly fully sorted, where `slice::sort` may be faster.
     ///
     /// [`sort_unstable`]: slice::sort_unstable
     ///
@@ -3058,35 +3071,40 @@ impl<T> [T] {
     ///         v == [-3, -5, 1, 4, 2] ||
     ///         v == [-5, -3, 1, 4, 2]);
     /// ```
+    ///
+    /// [ipnsort]: https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort
     #[stable(feature = "slice_select_nth_unstable", since = "1.49.0")]
     #[inline]
     pub fn select_nth_unstable(&mut self, index: usize) -> (&mut [T], &mut T, &mut [T])
     where
         T: Ord,
     {
-        select::partition_at_index(self, index, T::lt)
+        sort::select::partition_at_index(self, index, T::lt)
     }
 
-    /// Reorder the slice with a comparator function such that the element at `index` after the reordering is at
-    /// its final sorted position.
+    /// Reorder the slice with a comparator function such that the element at `index` after the
+    /// reordering is at its final sorted position.
     ///
     /// This reordering has the additional property that any value at position `i < index` will be
     /// less than or equal to any value at a position `j > index` using the comparator function.
     /// Additionally, this reordering is unstable (i.e. any number of equal elements may end up at
-    /// position `index`), in-place (i.e. does not allocate), and runs in *O*(*n*) time.
-    /// This function is also known as "kth element" in other libraries.
+    /// position `index`), in-place (i.e. does not allocate), and runs in *O*(*n*) time. This
+    /// function is also known as "kth element" in other libraries.
     ///
-    /// It returns a triplet of the following from
-    /// the slice reordered according to the provided comparator function: the subslice prior to
-    /// `index`, the element at `index`, and the subslice after `index`; accordingly, the values in
-    /// those two subslices will respectively all be less-than-or-equal-to and greater-than-or-equal-to
-    /// the value of the element at `index`.
+    /// It returns a triplet of the following from the slice reordered according to the provided
+    /// comparator function: the subslice prior to `index`, the element at `index`, and the subslice
+    /// after `index`; accordingly, the values in those two subslices will respectively all be
+    /// less-than-or-equal-to and greater-than-or-equal-to the value of the element at `index`.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is an introselect implementation based on Pattern Defeating Quicksort, which is also
-    /// the basis for [`sort_unstable`]. The fallback algorithm is Median of Medians using Tukey's Ninther for
-    /// pivot selection, which guarantees linear runtime for all inputs.
+    /// The current algorithm is an introselect implementation based on [ipnsort] by Lukas Bergdoll
+    /// and Orson Peters, which is also the basis for [`sort_unstable`]. The fallback algorithm is
+    /// Median of Medians using Tukey's Ninther for pivot selection, which guarantees linear runtime
+    /// for all inputs.
+    ///
+    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
+    /// slice is nearly fully sorted, where `slice::sort` may be faster.
     ///
     /// [`sort_unstable`]: slice::sort_unstable
     ///
@@ -3114,6 +3132,8 @@ impl<T> [T] {
     ///         v == [4, 2, 1, -5, -3] ||
     ///         v == [4, 2, 1, -3, -5]);
     /// ```
+    ///
+    /// [ipnsort]: https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort
     #[stable(feature = "slice_select_nth_unstable", since = "1.49.0")]
     #[inline]
     pub fn select_nth_unstable_by<F>(
@@ -3124,29 +3144,32 @@ impl<T> [T] {
     where
         F: FnMut(&T, &T) -> Ordering,
     {
-        select::partition_at_index(self, index, |a: &T, b: &T| compare(a, b) == Less)
+        sort::select::partition_at_index(self, index, |a: &T, b: &T| compare(a, b) == Less)
     }
 
-    /// Reorder the slice with a key extraction function such that the element at `index` after the reordering is
-    /// at its final sorted position.
+    /// Reorder the slice with a key extraction function such that the element at `index` after the
+    /// reordering is at its final sorted position.
     ///
     /// This reordering has the additional property that any value at position `i < index` will be
     /// less than or equal to any value at a position `j > index` using the key extraction function.
     /// Additionally, this reordering is unstable (i.e. any number of equal elements may end up at
-    /// position `index`), in-place (i.e. does not allocate), and runs in *O*(*n*) time.
-    /// This function is also known as "kth element" in other libraries.
+    /// position `index`), in-place (i.e. does not allocate), and runs in *O*(*n*) time. This
+    /// function is also known as "kth element" in other libraries.
     ///
-    /// It returns a triplet of the following from
-    /// the slice reordered according to the provided key extraction function: the subslice prior to
-    /// `index`, the element at `index`, and the subslice after `index`; accordingly, the values in
-    /// those two subslices will respectively all be less-than-or-equal-to and greater-than-or-equal-to
-    /// the value of the element at `index`.
+    /// It returns a triplet of the following from the slice reordered according to the provided key
+    /// extraction function: the subslice prior to `index`, the element at `index`, and the subslice
+    /// after `index`; accordingly, the values in those two subslices will respectively all be
+    /// less-than-or-equal-to and greater-than-or-equal-to the value of the element at `index`.
     ///
     /// # Current implementation
     ///
-    /// The current algorithm is an introselect implementation based on Pattern Defeating Quicksort, which is also
-    /// the basis for [`sort_unstable`]. The fallback algorithm is Median of Medians using Tukey's Ninther for
-    /// pivot selection, which guarantees linear runtime for all inputs.
+    /// The current algorithm is an introselect implementation based on [ipnsort] by Lukas Bergdoll
+    /// and Orson Peters, which is also the basis for [`sort_unstable`]. The fallback algorithm is
+    /// Median of Medians using Tukey's Ninther for pivot selection, which guarantees linear runtime
+    /// for all inputs.
+    ///
+    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
+    /// slice is nearly fully sorted, where `slice::sort` may be faster.
     ///
     /// [`sort_unstable`]: slice::sort_unstable
     ///
@@ -3174,6 +3197,8 @@ impl<T> [T] {
     ///         v == [2, 1, -3, 4, -5] ||
     ///         v == [2, 1, -3, -5, 4]);
     /// ```
+    ///
+    /// [ipnsort]: https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort
     #[stable(feature = "slice_select_nth_unstable", since = "1.49.0")]
     #[inline]
     pub fn select_nth_unstable_by_key<K, F>(
@@ -3185,7 +3210,7 @@ impl<T> [T] {
         F: FnMut(&T) -> K,
         K: Ord,
     {
-        select::partition_at_index(self, index, |a: &T, b: &T| f(a).lt(&f(b)))
+        sort::select::partition_at_index(self, index, |a: &T, b: &T| f(a).lt(&f(b)))
     }
 
     /// Moves all consecutive repeated elements to the end of the slice according to the
@@ -3934,17 +3959,8 @@ impl<T> [T] {
 
     /// Split a slice into a prefix, a middle of aligned SIMD types, and a suffix.
     ///
-    /// This is a safe wrapper around [`slice::align_to`], so has the same weak
-    /// postconditions as that method.  You're only assured that
-    /// `self.len() == prefix.len() + middle.len() * LANES + suffix.len()`.
-    ///
-    /// Notably, all of the following are possible:
-    /// - `prefix.len() >= LANES`.
-    /// - `middle.is_empty()` despite `self.len() >= 3 * LANES`.
-    /// - `suffix.len() >= LANES`.
-    ///
-    /// That said, this is a safe method, so if you're only writing safe code,
-    /// then this can at most cause incorrect logic, not unsoundness.
+    /// This is a safe wrapper around [`slice::align_to`], so inherits the same
+    /// guarantees as that method.
     ///
     /// # Panics
     ///
@@ -4008,17 +4024,8 @@ impl<T> [T] {
     /// Split a mutable slice into a mutable prefix, a middle of aligned SIMD types,
     /// and a mutable suffix.
     ///
-    /// This is a safe wrapper around [`slice::align_to_mut`], so has the same weak
-    /// postconditions as that method.  You're only assured that
-    /// `self.len() == prefix.len() + middle.len() * LANES + suffix.len()`.
-    ///
-    /// Notably, all of the following are possible:
-    /// - `prefix.len() >= LANES`.
-    /// - `middle.is_empty()` despite `self.len() >= 3 * LANES`.
-    /// - `suffix.len() >= LANES`.
-    ///
-    /// That said, this is a safe method, so if you're only writing safe code,
-    /// then this can at most cause incorrect logic, not unsoundness.
+    /// This is a safe wrapper around [`slice::align_to_mut`], so inherits the same
+    /// guarantees as that method.
     ///
     /// This is the mutable version of [`slice::as_simd`]; see that for examples.
     ///
@@ -4531,8 +4538,6 @@ impl<T, const N: usize> [[T; N]] {
     /// # Examples
     ///
     /// ```
-    /// #![feature(slice_flatten)]
-    ///
     /// assert_eq!([[1, 2, 3], [4, 5, 6]].as_flattened(), &[1, 2, 3, 4, 5, 6]);
     ///
     /// assert_eq!(
@@ -4546,7 +4551,8 @@ impl<T, const N: usize> [[T; N]] {
     /// let empty_slice_of_arrays: &[[u32; 10]] = &[];
     /// assert!(empty_slice_of_arrays.as_flattened().is_empty());
     /// ```
-    #[unstable(feature = "slice_flatten", issue = "95629")]
+    #[stable(feature = "slice_flatten", since = "1.80.0")]
+    #[rustc_const_unstable(feature = "const_slice_flatten", issue = "95629")]
     pub const fn as_flattened(&self) -> &[T] {
         let len = if T::IS_ZST {
             self.len().checked_mul(N).expect("slice len overflow")
@@ -4572,8 +4578,6 @@ impl<T, const N: usize> [[T; N]] {
     /// # Examples
     ///
     /// ```
-    /// #![feature(slice_flatten)]
-    ///
     /// fn add_5_to_all(slice: &mut [i32]) {
     ///     for i in slice {
     ///         *i += 5;
@@ -4584,7 +4588,7 @@ impl<T, const N: usize> [[T; N]] {
     /// add_5_to_all(array.as_flattened_mut());
     /// assert_eq!(array, [[6, 7, 8], [9, 10, 11], [12, 13, 14]]);
     /// ```
-    #[unstable(feature = "slice_flatten", issue = "95629")]
+    #[stable(feature = "slice_flatten", since = "1.80.0")]
     pub fn as_flattened_mut(&mut self) -> &mut [T] {
         let len = if T::IS_ZST {
             self.len().checked_mul(N).expect("slice len overflow")
diff --git a/core/src/slice/raw.rs b/core/src/slice/raw.rs
index 29a12f106c5ed..280aead270e76 100644
--- a/core/src/slice/raw.rs
+++ b/core/src/slice/raw.rs
@@ -1,7 +1,6 @@
 //! Free functions to create `&[T]` and `&mut [T]`.
 
 use crate::array;
-use crate::mem::{align_of, size_of};
 use crate::ops::Range;
 use crate::ptr;
 use crate::ub_checks;
@@ -83,6 +82,39 @@ use crate::ub_checks;
 /// }
 /// ```
 ///
+/// ### FFI: Handling null pointers
+///
+/// In languages such as C++, pointers to empty collections are not guaranteed to be non-null.
+/// When accepting such pointers, they have to be checked for null-ness to avoid undefined
+/// behavior.
+///
+/// ```
+/// use std::slice;
+///
+/// /// Sum the elements of an FFI slice.
+/// ///
+/// /// # Safety
+/// ///
+/// /// If ptr is not NULL, it must be correctly aligned and
+/// /// point to `len` initialized items of type `f32`.
+/// unsafe extern "C" fn sum_slice(ptr: *const f32, len: usize) -> f32 {
+///     let data = if ptr.is_null() {
+///         // `len` is assumed to be 0.
+///         &[]
+///     } else {
+///         // SAFETY: see function docstring.
+///         unsafe { slice::from_raw_parts(ptr, len) }
+///     };
+///     data.into_iter().sum()
+/// }
+///
+/// // This could be the result of C++'s std::vector::data():
+/// let ptr = std::ptr::null();
+/// // And this could be std::vector::size():
+/// let len = 0;
+/// assert_eq!(unsafe { sum_slice(ptr, len) }, 0.0);
+/// ```
+///
 /// [valid]: ptr#safety
 /// [`NonNull::dangling()`]: ptr::NonNull::dangling
 #[inline]
diff --git a/core/src/slice/rotate.rs b/core/src/slice/rotate.rs
index fa8c238f8e7a2..1d7b86339799b 100644
--- a/core/src/slice/rotate.rs
+++ b/core/src/slice/rotate.rs
@@ -71,7 +71,9 @@ pub unsafe fn ptr_rotate<T>(mut left: usize, mut mid: *mut T, mut right: usize)
         if (right == 0) || (left == 0) {
             return;
         }
-        if (left + right < 24) || (mem::size_of::<T>() > mem::size_of::<[usize; 4]>()) {
+        if !cfg!(feature = "optimize_for_size")
+            && ((left + right < 24) || (mem::size_of::<T>() > mem::size_of::<[usize; 4]>()))
+        {
             // Algorithm 1
             // Microbenchmarks indicate that the average performance for random shifts is better all
             // the way until about `left + right == 32`, but the worst case performance breaks even
@@ -158,7 +160,9 @@ pub unsafe fn ptr_rotate<T>(mut left: usize, mut mid: *mut T, mut right: usize)
             }
             return;
         // `T` is not a zero-sized type, so it's okay to divide by its size.
-        } else if cmp::min(left, right) <= mem::size_of::<BufType>() / mem::size_of::<T>() {
+        } else if !cfg!(feature = "optimize_for_size")
+            && cmp::min(left, right) <= mem::size_of::<BufType>() / mem::size_of::<T>()
+        {
             // Algorithm 2
             // The `[T; 0]` here is to ensure this is appropriately aligned for T
             let mut rawarray = MaybeUninit::<(BufType, [T; 0])>::uninit();
diff --git a/core/src/slice/sort.rs b/core/src/slice/sort.rs
deleted file mode 100644
index 993a608f42b60..0000000000000
--- a/core/src/slice/sort.rs
+++ /dev/null
@@ -1,1383 +0,0 @@
-//! Slice sorting
-//!
-//! This module contains a sorting algorithm based on Orson Peters' pattern-defeating quicksort,
-//! published at: <https://github.com/orlp/pdqsort>
-//!
-//! Unstable sorting is compatible with core because it doesn't allocate memory, unlike our
-//! stable sorting implementation.
-//!
-//! In addition it also contains the core logic of the stable sort used by `slice::sort` based on
-//! TimSort.
-
-use crate::cmp;
-use crate::mem::{self, MaybeUninit, SizedTypeProperties};
-use crate::ptr;
-
-// When dropped, copies from `src` into `dest`.
-struct InsertionHole<T> {
-    src: *const T,
-    dest: *mut T,
-}
-
-impl<T> Drop for InsertionHole<T> {
-    fn drop(&mut self) {
-        // SAFETY: This is a helper class. Please refer to its usage for correctness. Namely, one
-        // must be sure that `src` and `dst` does not overlap as required by
-        // `ptr::copy_nonoverlapping` and are both valid for writes.
-        unsafe {
-            ptr::copy_nonoverlapping(self.src, self.dest, 1);
-        }
-    }
-}
-
-/// Inserts `v[v.len() - 1]` into pre-sorted sequence `v[..v.len() - 1]` so that whole `v[..]`
-/// becomes sorted.
-unsafe fn insert_tail<T, F>(v: &mut [T], is_less: &mut F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    debug_assert!(v.len() >= 2);
-
-    let arr_ptr = v.as_mut_ptr();
-    let i = v.len() - 1;
-
-    // SAFETY: caller must ensure v is at least len 2.
-    unsafe {
-        // See insert_head which talks about why this approach is beneficial.
-        let i_ptr = arr_ptr.add(i);
-
-        // It's important that we use i_ptr here. If this check is positive and we continue,
-        // We want to make sure that no other copy of the value was seen by is_less.
-        // Otherwise we would have to copy it back.
-        if is_less(&*i_ptr, &*i_ptr.sub(1)) {
-            // It's important, that we use tmp for comparison from now on. As it is the value that
-            // will be copied back. And notionally we could have created a divergence if we copy
-            // back the wrong value.
-            let tmp = mem::ManuallyDrop::new(ptr::read(i_ptr));
-            // Intermediate state of the insertion process is always tracked by `hole`, which
-            // serves two purposes:
-            // 1. Protects integrity of `v` from panics in `is_less`.
-            // 2. Fills the remaining hole in `v` in the end.
-            //
-            // Panic safety:
-            //
-            // If `is_less` panics at any point during the process, `hole` will get dropped and
-            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
-            // initially held exactly once.
-            let mut hole = InsertionHole { src: &*tmp, dest: i_ptr.sub(1) };
-            ptr::copy_nonoverlapping(hole.dest, i_ptr, 1);
-
-            // SAFETY: We know i is at least 1.
-            for j in (0..(i - 1)).rev() {
-                let j_ptr = arr_ptr.add(j);
-                if !is_less(&*tmp, &*j_ptr) {
-                    break;
-                }
-
-                ptr::copy_nonoverlapping(j_ptr, hole.dest, 1);
-                hole.dest = j_ptr;
-            }
-            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
-        }
-    }
-}
-
-/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
-///
-/// This is the integral subroutine of insertion sort.
-unsafe fn insert_head<T, F>(v: &mut [T], is_less: &mut F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    debug_assert!(v.len() >= 2);
-
-    // SAFETY: caller must ensure v is at least len 2.
-    unsafe {
-        if is_less(v.get_unchecked(1), v.get_unchecked(0)) {
-            let arr_ptr = v.as_mut_ptr();
-
-            // There are three ways to implement insertion here:
-            //
-            // 1. Swap adjacent elements until the first one gets to its final destination.
-            //    However, this way we copy data around more than is necessary. If elements are big
-            //    structures (costly to copy), this method will be slow.
-            //
-            // 2. Iterate until the right place for the first element is found. Then shift the
-            //    elements succeeding it to make room for it and finally place it into the
-            //    remaining hole. This is a good method.
-            //
-            // 3. Copy the first element into a temporary variable. Iterate until the right place
-            //    for it is found. As we go along, copy every traversed element into the slot
-            //    preceding it. Finally, copy data from the temporary variable into the remaining
-            //    hole. This method is very good. Benchmarks demonstrated slightly better
-            //    performance than with the 2nd method.
-            //
-            // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
-            let tmp = mem::ManuallyDrop::new(ptr::read(arr_ptr));
-
-            // Intermediate state of the insertion process is always tracked by `hole`, which
-            // serves two purposes:
-            // 1. Protects integrity of `v` from panics in `is_less`.
-            // 2. Fills the remaining hole in `v` in the end.
-            //
-            // Panic safety:
-            //
-            // If `is_less` panics at any point during the process, `hole` will get dropped and
-            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
-            // initially held exactly once.
-            let mut hole = InsertionHole { src: &*tmp, dest: arr_ptr.add(1) };
-            ptr::copy_nonoverlapping(arr_ptr.add(1), arr_ptr.add(0), 1);
-
-            for i in 2..v.len() {
-                if !is_less(&v.get_unchecked(i), &*tmp) {
-                    break;
-                }
-                ptr::copy_nonoverlapping(arr_ptr.add(i), arr_ptr.add(i - 1), 1);
-                hole.dest = arr_ptr.add(i);
-            }
-            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
-        }
-    }
-}
-
-/// Sort `v` assuming `v[..offset]` is already sorted.
-///
-/// Never inline this function to avoid code bloat. It still optimizes nicely and has practically no
-/// performance impact. Even improving performance in some cases.
-#[inline(never)]
-pub(super) fn insertion_sort_shift_left<T, F>(v: &mut [T], offset: usize, is_less: &mut F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    let len = v.len();
-
-    // Using assert here improves performance.
-    assert!(offset != 0 && offset <= len);
-
-    // Shift each element of the unsorted region v[i..] as far left as is needed to make v sorted.
-    for i in offset..len {
-        // SAFETY: we tested that `offset` must be at least 1, so this loop is only entered if len
-        // >= 2. The range is exclusive and we know `i` must be at least 1 so this slice has at
-        // >least len 2.
-        unsafe {
-            insert_tail(&mut v[..=i], is_less);
-        }
-    }
-}
-
-/// Sort `v` assuming `v[offset..]` is already sorted.
-///
-/// Never inline this function to avoid code bloat. It still optimizes nicely and has practically no
-/// performance impact. Even improving performance in some cases.
-#[inline(never)]
-fn insertion_sort_shift_right<T, F>(v: &mut [T], offset: usize, is_less: &mut F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    let len = v.len();
-
-    // Using assert here improves performance.
-    assert!(offset != 0 && offset <= len && len >= 2);
-
-    // Shift each element of the unsorted region v[..i] as far left as is needed to make v sorted.
-    for i in (0..offset).rev() {
-        // SAFETY: we tested that `offset` must be at least 1, so this loop is only entered if len
-        // >= 2.We ensured that the slice length is always at least 2 long. We know that start_found
-        // will be at least one less than end, and the range is exclusive. Which gives us i always
-        // <= (end - 2).
-        unsafe {
-            insert_head(&mut v[i..len], is_less);
-        }
-    }
-}
-
-/// Partially sorts a slice by shifting several out-of-order elements around.
-///
-/// Returns `true` if the slice is sorted at the end. This function is *O*(*n*) worst-case.
-#[cold]
-fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &mut F) -> bool
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // Maximum number of adjacent out-of-order pairs that will get shifted.
-    const MAX_STEPS: usize = 5;
-    // If the slice is shorter than this, don't shift any elements.
-    const SHORTEST_SHIFTING: usize = 50;
-
-    let len = v.len();
-    let mut i = 1;
-
-    for _ in 0..MAX_STEPS {
-        // SAFETY: We already explicitly did the bound checking with `i < len`.
-        // All our subsequent indexing is only in the range `0 <= index < len`
-        unsafe {
-            // Find the next pair of adjacent out-of-order elements.
-            while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
-                i += 1;
-            }
-        }
-
-        // Are we done?
-        if i == len {
-            return true;
-        }
-
-        // Don't shift elements on short arrays, that has a performance cost.
-        if len < SHORTEST_SHIFTING {
-            return false;
-        }
-
-        // Swap the found pair of elements. This puts them in correct order.
-        v.swap(i - 1, i);
-
-        if i >= 2 {
-            // Shift the smaller element to the left.
-            insertion_sort_shift_left(&mut v[..i], i - 1, is_less);
-
-            // Shift the greater element to the right.
-            insertion_sort_shift_right(&mut v[..i], 1, is_less);
-        }
-    }
-
-    // Didn't manage to sort the slice in the limited number of steps.
-    false
-}
-
-/// Sorts `v` using heapsort, which guarantees *O*(*n* \* log(*n*)) worst-case.
-#[cold]
-#[unstable(feature = "sort_internals", reason = "internal to sort module", issue = "none")]
-pub fn heapsort<T, F>(v: &mut [T], mut is_less: F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // This binary heap respects the invariant `parent >= child`.
-    let mut sift_down = |v: &mut [T], mut node| {
-        loop {
-            // Children of `node`.
-            let mut child = 2 * node + 1;
-            if child >= v.len() {
-                break;
-            }
-
-            // Choose the greater child.
-            if child + 1 < v.len() {
-                // We need a branch to be sure not to out-of-bounds index,
-                // but it's highly predictable.  The comparison, however,
-                // is better done branchless, especially for primitives.
-                child += is_less(&v[child], &v[child + 1]) as usize;
-            }
-
-            // Stop if the invariant holds at `node`.
-            if !is_less(&v[node], &v[child]) {
-                break;
-            }
-
-            // Swap `node` with the greater child, move one step down, and continue sifting.
-            v.swap(node, child);
-            node = child;
-        }
-    };
-
-    // Build the heap in linear time.
-    for i in (0..v.len() / 2).rev() {
-        sift_down(v, i);
-    }
-
-    // Pop maximal elements from the heap.
-    for i in (1..v.len()).rev() {
-        v.swap(0, i);
-        sift_down(&mut v[..i], 0);
-    }
-}
-
-/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
-/// to `pivot`.
-///
-/// Returns the number of elements smaller than `pivot`.
-///
-/// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
-/// This idea is presented in the [BlockQuicksort][pdf] paper.
-///
-/// [pdf]: https://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
-fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // Number of elements in a typical block.
-    const BLOCK: usize = 128;
-
-    // The partitioning algorithm repeats the following steps until completion:
-    //
-    // 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
-    // 2. Trace a block from the right side to identify elements smaller than the pivot.
-    // 3. Exchange the identified elements between the left and right side.
-    //
-    // We keep the following variables for a block of elements:
-    //
-    // 1. `block` - Number of elements in the block.
-    // 2. `start` - Start pointer into the `offsets` array.
-    // 3. `end` - End pointer into the `offsets` array.
-    // 4. `offsets` - Indices of out-of-order elements within the block.
-
-    // The current block on the left side (from `l` to `l.add(block_l)`).
-    let mut l = v.as_mut_ptr();
-    let mut block_l = BLOCK;
-    let mut start_l = ptr::null_mut();
-    let mut end_l = ptr::null_mut();
-    let mut offsets_l = [MaybeUninit::<u8>::uninit(); BLOCK];
-
-    // The current block on the right side (from `r.sub(block_r)` to `r`).
-    // SAFETY: The documentation for .add() specifically mention that `vec.as_ptr().add(vec.len())` is always safe
-    let mut r = unsafe { l.add(v.len()) };
-    let mut block_r = BLOCK;
-    let mut start_r = ptr::null_mut();
-    let mut end_r = ptr::null_mut();
-    let mut offsets_r = [MaybeUninit::<u8>::uninit(); BLOCK];
-
-    // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
-    // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
-
-    // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
-    fn width<T>(l: *mut T, r: *mut T) -> usize {
-        assert!(mem::size_of::<T>() > 0);
-        // FIXME: this should *likely* use `offset_from`, but more
-        // investigation is needed (including running tests in miri).
-        (r.addr() - l.addr()) / mem::size_of::<T>()
-    }
-
-    loop {
-        // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
-        // some patch-up work in order to partition the remaining elements in between.
-        let is_done = width(l, r) <= 2 * BLOCK;
-
-        if is_done {
-            // Number of remaining elements (still not compared to the pivot).
-            let mut rem = width(l, r);
-            if start_l < end_l || start_r < end_r {
-                rem -= BLOCK;
-            }
-
-            // Adjust block sizes so that the left and right block don't overlap, but get perfectly
-            // aligned to cover the whole remaining gap.
-            if start_l < end_l {
-                block_r = rem;
-            } else if start_r < end_r {
-                block_l = rem;
-            } else {
-                // There were the same number of elements to switch on both blocks during the last
-                // iteration, so there are no remaining elements on either block. Cover the remaining
-                // items with roughly equally-sized blocks.
-                block_l = rem / 2;
-                block_r = rem - block_l;
-            }
-            debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
-            debug_assert!(width(l, r) == block_l + block_r);
-        }
-
-        if start_l == end_l {
-            // Trace `block_l` elements from the left side.
-            start_l = MaybeUninit::slice_as_mut_ptr(&mut offsets_l);
-            end_l = start_l;
-            let mut elem = l;
-
-            for i in 0..block_l {
-                // SAFETY: The unsafety operations below involve the usage of the `offset`.
-                //         According to the conditions required by the function, we satisfy them because:
-                //         1. `offsets_l` is stack-allocated, and thus considered separate allocated object.
-                //         2. The function `is_less` returns a `bool`.
-                //            Casting a `bool` will never overflow `isize`.
-                //         3. We have guaranteed that `block_l` will be `<= BLOCK`.
-                //            Plus, `end_l` was initially set to the begin pointer of `offsets_` which was declared on the stack.
-                //            Thus, we know that even in the worst case (all invocations of `is_less` returns false) we will only be at most 1 byte pass the end.
-                //        Another unsafety operation here is dereferencing `elem`.
-                //        However, `elem` was initially the begin pointer to the slice which is always valid.
-                unsafe {
-                    // Branchless comparison.
-                    *end_l = i as u8;
-                    end_l = end_l.add(!is_less(&*elem, pivot) as usize);
-                    elem = elem.add(1);
-                }
-            }
-        }
-
-        if start_r == end_r {
-            // Trace `block_r` elements from the right side.
-            start_r = MaybeUninit::slice_as_mut_ptr(&mut offsets_r);
-            end_r = start_r;
-            let mut elem = r;
-
-            for i in 0..block_r {
-                // SAFETY: The unsafety operations below involve the usage of the `offset`.
-                //         According to the conditions required by the function, we satisfy them because:
-                //         1. `offsets_r` is stack-allocated, and thus considered separate allocated object.
-                //         2. The function `is_less` returns a `bool`.
-                //            Casting a `bool` will never overflow `isize`.
-                //         3. We have guaranteed that `block_r` will be `<= BLOCK`.
-                //            Plus, `end_r` was initially set to the begin pointer of `offsets_` which was declared on the stack.
-                //            Thus, we know that even in the worst case (all invocations of `is_less` returns true) we will only be at most 1 byte pass the end.
-                //        Another unsafety operation here is dereferencing `elem`.
-                //        However, `elem` was initially `1 * sizeof(T)` past the end and we decrement it by `1 * sizeof(T)` before accessing it.
-                //        Plus, `block_r` was asserted to be less than `BLOCK` and `elem` will therefore at most be pointing to the beginning of the slice.
-                unsafe {
-                    // Branchless comparison.
-                    elem = elem.sub(1);
-                    *end_r = i as u8;
-                    end_r = end_r.add(is_less(&*elem, pivot) as usize);
-                }
-            }
-        }
-
-        // Number of out-of-order elements to swap between the left and right side.
-        let count = cmp::min(width(start_l, end_l), width(start_r, end_r));
-
-        if count > 0 {
-            macro_rules! left {
-                () => {
-                    l.add(usize::from(*start_l))
-                };
-            }
-            macro_rules! right {
-                () => {
-                    r.sub(usize::from(*start_r) + 1)
-                };
-            }
-
-            // Instead of swapping one pair at the time, it is more efficient to perform a cyclic
-            // permutation. This is not strictly equivalent to swapping, but produces a similar
-            // result using fewer memory operations.
-
-            // SAFETY: The use of `ptr::read` is valid because there is at least one element in
-            // both `offsets_l` and `offsets_r`, so `left!` is a valid pointer to read from.
-            //
-            // The uses of `left!` involve calls to `offset` on `l`, which points to the
-            // beginning of `v`. All the offsets pointed-to by `start_l` are at most `block_l`, so
-            // these `offset` calls are safe as all reads are within the block. The same argument
-            // applies for the uses of `right!`.
-            //
-            // The calls to `start_l.offset` are valid because there are at most `count-1` of them,
-            // plus the final one at the end of the unsafe block, where `count` is the minimum number
-            // of collected offsets in `offsets_l` and `offsets_r`, so there is no risk of there not
-            // being enough elements. The same reasoning applies to the calls to `start_r.offset`.
-            //
-            // The calls to `copy_nonoverlapping` are safe because `left!` and `right!` are guaranteed
-            // not to overlap, and are valid because of the reasoning above.
-            unsafe {
-                let tmp = ptr::read(left!());
-                ptr::copy_nonoverlapping(right!(), left!(), 1);
-
-                for _ in 1..count {
-                    start_l = start_l.add(1);
-                    ptr::copy_nonoverlapping(left!(), right!(), 1);
-                    start_r = start_r.add(1);
-                    ptr::copy_nonoverlapping(right!(), left!(), 1);
-                }
-
-                ptr::copy_nonoverlapping(&tmp, right!(), 1);
-                mem::forget(tmp);
-                start_l = start_l.add(1);
-                start_r = start_r.add(1);
-            }
-        }
-
-        if start_l == end_l {
-            // All out-of-order elements in the left block were moved. Move to the next block.
-
-            // block-width-guarantee
-            // SAFETY: if `!is_done` then the slice width is guaranteed to be at least `2*BLOCK` wide. There
-            // are at most `BLOCK` elements in `offsets_l` because of its size, so the `offset` operation is
-            // safe. Otherwise, the debug assertions in the `is_done` case guarantee that
-            // `width(l, r) == block_l + block_r`, namely, that the block sizes have been adjusted to account
-            // for the smaller number of remaining elements.
-            l = unsafe { l.add(block_l) };
-        }
-
-        if start_r == end_r {
-            // All out-of-order elements in the right block were moved. Move to the previous block.
-
-            // SAFETY: Same argument as [block-width-guarantee]. Either this is a full block `2*BLOCK`-wide,
-            // or `block_r` has been adjusted for the last handful of elements.
-            r = unsafe { r.sub(block_r) };
-        }
-
-        if is_done {
-            break;
-        }
-    }
-
-    // All that remains now is at most one block (either the left or the right) with out-of-order
-    // elements that need to be moved. Such remaining elements can be simply shifted to the end
-    // within their block.
-
-    if start_l < end_l {
-        // The left block remains.
-        // Move its remaining out-of-order elements to the far right.
-        debug_assert_eq!(width(l, r), block_l);
-        while start_l < end_l {
-            // remaining-elements-safety
-            // SAFETY: while the loop condition holds there are still elements in `offsets_l`, so it
-            // is safe to point `end_l` to the previous element.
-            //
-            // The `ptr::swap` is safe if both its arguments are valid for reads and writes:
-            //  - Per the debug assert above, the distance between `l` and `r` is `block_l`
-            //    elements, so there can be at most `block_l` remaining offsets between `start_l`
-            //    and `end_l`. This means `r` will be moved at most `block_l` steps back, which
-            //    makes the `r.offset` calls valid (at that point `l == r`).
-            //  - `offsets_l` contains valid offsets into `v` collected during the partitioning of
-            //    the last block, so the `l.offset` calls are valid.
-            unsafe {
-                end_l = end_l.sub(1);
-                ptr::swap(l.add(usize::from(*end_l)), r.sub(1));
-                r = r.sub(1);
-            }
-        }
-        width(v.as_mut_ptr(), r)
-    } else if start_r < end_r {
-        // The right block remains.
-        // Move its remaining out-of-order elements to the far left.
-        debug_assert_eq!(width(l, r), block_r);
-        while start_r < end_r {
-            // SAFETY: See the reasoning in [remaining-elements-safety].
-            unsafe {
-                end_r = end_r.sub(1);
-                ptr::swap(l, r.sub(usize::from(*end_r) + 1));
-                l = l.add(1);
-            }
-        }
-        width(v.as_mut_ptr(), l)
-    } else {
-        // Nothing else to do, we're done.
-        width(v.as_mut_ptr(), l)
-    }
-}
-
-/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
-/// equal to `v[pivot]`.
-///
-/// Returns a tuple of:
-///
-/// 1. Number of elements smaller than `v[pivot]`.
-/// 2. True if `v` was already partitioned.
-pub(super) fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> (usize, bool)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    let (mid, was_partitioned) = {
-        // Place the pivot at the beginning of slice.
-        v.swap(0, pivot);
-        let (pivot, v) = v.split_at_mut(1);
-        let pivot = &mut pivot[0];
-
-        // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
-        // operation panics, the pivot will be automatically written back into the slice.
-
-        // SAFETY: `pivot` is a reference to the first element of `v`, so `ptr::read` is safe.
-        let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
-        let _pivot_guard = InsertionHole { src: &*tmp, dest: pivot };
-        let pivot = &*tmp;
-
-        // Find the first pair of out-of-order elements.
-        let mut l = 0;
-        let mut r = v.len();
-
-        // SAFETY: The unsafety below involves indexing an array.
-        // For the first one: We already do the bounds checking here with `l < r`.
-        // For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
-        //                     From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
-        unsafe {
-            // Find the first element greater than or equal to the pivot.
-            while l < r && is_less(v.get_unchecked(l), pivot) {
-                l += 1;
-            }
-
-            // Find the last element smaller that the pivot.
-            while l < r && !is_less(v.get_unchecked(r - 1), pivot) {
-                r -= 1;
-            }
-        }
-
-        (l + partition_in_blocks(&mut v[l..r], pivot, is_less), l >= r)
-
-        // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated
-        // variable) back into the slice where it originally was. This step is critical in ensuring
-        // safety!
-    };
-
-    // Place the pivot between the two partitions.
-    v.swap(0, mid);
-
-    (mid, was_partitioned)
-}
-
-/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
-///
-/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
-/// elements smaller than the pivot.
-pub(super) fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // Place the pivot at the beginning of slice.
-    v.swap(0, pivot);
-    let (pivot, v) = v.split_at_mut(1);
-    let pivot = &mut pivot[0];
-
-    // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
-    // operation panics, the pivot will be automatically written back into the slice.
-    // SAFETY: The pointer here is valid because it is obtained from a reference to a slice.
-    let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
-    let _pivot_guard = InsertionHole { src: &*tmp, dest: pivot };
-    let pivot = &*tmp;
-
-    let len = v.len();
-    if len == 0 {
-        return 0;
-    }
-
-    // Now partition the slice.
-    let mut l = 0;
-    let mut r = len;
-    loop {
-        // SAFETY: The unsafety below involves indexing an array.
-        // For the first one: We already do the bounds checking here with `l < r`.
-        // For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
-        //                     From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
-        unsafe {
-            // Find the first element greater than the pivot.
-            while l < r && !is_less(pivot, v.get_unchecked(l)) {
-                l += 1;
-            }
-
-            // Find the last element equal to the pivot.
-            loop {
-                r -= 1;
-                if l >= r || !is_less(pivot, v.get_unchecked(r)) {
-                    break;
-                }
-            }
-
-            // Are we done?
-            if l >= r {
-                break;
-            }
-
-            // Swap the found pair of out-of-order elements.
-            let ptr = v.as_mut_ptr();
-            ptr::swap(ptr.add(l), ptr.add(r));
-            l += 1;
-        }
-    }
-
-    // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
-    l + 1
-
-    // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated variable)
-    // back into the slice where it originally was. This step is critical in ensuring safety!
-}
-
-/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
-/// partitions in quicksort.
-#[cold]
-pub(super) fn break_patterns<T>(v: &mut [T]) {
-    let len = v.len();
-    if len >= 8 {
-        let mut seed = len;
-        let mut gen_usize = || {
-            // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
-            if usize::BITS <= 32 {
-                let mut r = seed as u32;
-                r ^= r << 13;
-                r ^= r >> 17;
-                r ^= r << 5;
-                seed = r as usize;
-                seed
-            } else {
-                let mut r = seed as u64;
-                r ^= r << 13;
-                r ^= r >> 7;
-                r ^= r << 17;
-                seed = r as usize;
-                seed
-            }
-        };
-
-        // Take random numbers modulo this number.
-        // The number fits into `usize` because `len` is not greater than `isize::MAX`.
-        let modulus = len.next_power_of_two();
-
-        // Some pivot candidates will be in the nearby of this index. Let's randomize them.
-        let pos = len / 4 * 2;
-
-        for i in 0..3 {
-            // Generate a random number modulo `len`. However, in order to avoid costly operations
-            // we first take it modulo a power of two, and then decrease by `len` until it fits
-            // into the range `[0, len - 1]`.
-            let mut other = gen_usize() & (modulus - 1);
-
-            // `other` is guaranteed to be less than `2 * len`.
-            if other >= len {
-                other -= len;
-            }
-
-            v.swap(pos - 1 + i, other);
-        }
-    }
-}
-
-/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
-///
-/// Elements in `v` might be reordered in the process.
-pub(super) fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // Minimum length to choose the median-of-medians method.
-    // Shorter slices use the simple median-of-three method.
-    const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
-    // Maximum number of swaps that can be performed in this function.
-    const MAX_SWAPS: usize = 4 * 3;
-
-    let len = v.len();
-
-    // Three indices near which we are going to choose a pivot.
-    let mut a = len / 4 * 1;
-    let mut b = len / 4 * 2;
-    let mut c = len / 4 * 3;
-
-    // Counts the total number of swaps we are about to perform while sorting indices.
-    let mut swaps = 0;
-
-    if len >= 8 {
-        // Swaps indices so that `v[a] <= v[b]`.
-        // SAFETY: `len >= 8` so there are at least two elements in the neighborhoods of
-        // `a`, `b` and `c`. This means the three calls to `sort_adjacent` result in
-        // corresponding calls to `sort3` with valid 3-item neighborhoods around each
-        // pointer, which in turn means the calls to `sort2` are done with valid
-        // references. Thus the `v.get_unchecked` calls are safe, as is the `ptr::swap`
-        // call.
-        let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
-            if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
-                ptr::swap(a, b);
-                swaps += 1;
-            }
-        };
-
-        // Swaps indices so that `v[a] <= v[b] <= v[c]`.
-        let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| {
-            sort2(a, b);
-            sort2(b, c);
-            sort2(a, b);
-        };
-
-        if len >= SHORTEST_MEDIAN_OF_MEDIANS {
-            // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
-            let mut sort_adjacent = |a: &mut usize| {
-                let tmp = *a;
-                sort3(&mut (tmp - 1), a, &mut (tmp + 1));
-            };
-
-            // Find medians in the neighborhoods of `a`, `b`, and `c`.
-            sort_adjacent(&mut a);
-            sort_adjacent(&mut b);
-            sort_adjacent(&mut c);
-        }
-
-        // Find the median among `a`, `b`, and `c`.
-        sort3(&mut a, &mut b, &mut c);
-    }
-
-    if swaps < MAX_SWAPS {
-        (b, swaps == 0)
-    } else {
-        // The maximum number of swaps was performed. Chances are the slice is descending or mostly
-        // descending, so reversing will probably help sort it faster.
-        v.reverse();
-        (len - 1 - b, true)
-    }
-}
-
-/// Sorts `v` recursively.
-///
-/// If the slice had a predecessor in the original array, it is specified as `pred`.
-///
-/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
-/// this function will immediately switch to heapsort.
-fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &mut F, mut pred: Option<&'a T>, mut limit: u32)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // Slices of up to this length get sorted using insertion sort.
-    const MAX_INSERTION: usize = 20;
-
-    // True if the last partitioning was reasonably balanced.
-    let mut was_balanced = true;
-    // True if the last partitioning didn't shuffle elements (the slice was already partitioned).
-    let mut was_partitioned = true;
-
-    loop {
-        let len = v.len();
-
-        // Very short slices get sorted using insertion sort.
-        if len <= MAX_INSERTION {
-            if len >= 2 {
-                insertion_sort_shift_left(v, 1, is_less);
-            }
-            return;
-        }
-
-        // If too many bad pivot choices were made, simply fall back to heapsort in order to
-        // guarantee `O(n * log(n))` worst-case.
-        if limit == 0 {
-            heapsort(v, is_less);
-            return;
-        }
-
-        // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
-        // some elements around. Hopefully we'll choose a better pivot this time.
-        if !was_balanced {
-            break_patterns(v);
-            limit -= 1;
-        }
-
-        // Choose a pivot and try guessing whether the slice is already sorted.
-        let (pivot, likely_sorted) = choose_pivot(v, is_less);
-
-        // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
-        // selection predicts the slice is likely already sorted...
-        if was_balanced && was_partitioned && likely_sorted {
-            // Try identifying several out-of-order elements and shifting them to correct
-            // positions. If the slice ends up being completely sorted, we're done.
-            if partial_insertion_sort(v, is_less) {
-                return;
-            }
-        }
-
-        // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
-        // slice. Partition the slice into elements equal to and elements greater than the pivot.
-        // This case is usually hit when the slice contains many duplicate elements.
-        if let Some(p) = pred {
-            if !is_less(p, &v[pivot]) {
-                let mid = partition_equal(v, pivot, is_less);
-
-                // Continue sorting elements greater than the pivot.
-                v = &mut v[mid..];
-                continue;
-            }
-        }
-
-        // Partition the slice.
-        let (mid, was_p) = partition(v, pivot, is_less);
-        was_balanced = cmp::min(mid, len - mid) >= len / 8;
-        was_partitioned = was_p;
-
-        // Split the slice into `left`, `pivot`, and `right`.
-        let (left, right) = v.split_at_mut(mid);
-        let (pivot, right) = right.split_at_mut(1);
-        let pivot = &pivot[0];
-
-        // Recurse into the shorter side only in order to minimize the total number of recursive
-        // calls and consume less stack space. Then just continue with the longer side (this is
-        // akin to tail recursion).
-        if left.len() < right.len() {
-            recurse(left, is_less, pred, limit);
-            v = right;
-            pred = Some(pivot);
-        } else {
-            recurse(right, is_less, Some(pivot), limit);
-            v = left;
-        }
-    }
-}
-
-/// Sorts `v` using pattern-defeating quicksort, which is *O*(*n* \* log(*n*)) worst-case.
-pub fn quicksort<T, F>(v: &mut [T], mut is_less: F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // Sorting has no meaningful behavior on zero-sized types.
-    if T::IS_ZST {
-        return;
-    }
-
-    // Limit the number of imbalanced partitions to `floor(log2(len)) + 1`.
-    let limit = usize::BITS - v.len().leading_zeros();
-
-    recurse(v, &mut is_less, None, limit);
-}
-
-/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
-/// stores the result into `v[..]`.
-///
-/// # Safety
-///
-/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
-/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
-unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    let len = v.len();
-    let v = v.as_mut_ptr();
-
-    // SAFETY: mid and len must be in-bounds of v.
-    let (v_mid, v_end) = unsafe { (v.add(mid), v.add(len)) };
-
-    // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
-    // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
-    // copying the lesser (or greater) one into `v`.
-    //
-    // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
-    // consumed first, then we must copy whatever is left of the shorter run into the remaining
-    // hole in `v`.
-    //
-    // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
-    // 1. Protects integrity of `v` from panics in `is_less`.
-    // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
-    //
-    // Panic safety:
-    //
-    // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
-    // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
-    // object it initially held exactly once.
-    let mut hole;
-
-    if mid <= len - mid {
-        // The left run is shorter.
-
-        // SAFETY: buf must have enough capacity for `v[..mid]`.
-        unsafe {
-            ptr::copy_nonoverlapping(v, buf, mid);
-            hole = MergeHole { start: buf, end: buf.add(mid), dest: v };
-        }
-
-        // Initially, these pointers point to the beginnings of their arrays.
-        let left = &mut hole.start;
-        let mut right = v_mid;
-        let out = &mut hole.dest;
-
-        while *left < hole.end && right < v_end {
-            // Consume the lesser side.
-            // If equal, prefer the left run to maintain stability.
-
-            // SAFETY: left and right must be valid and part of v same for out.
-            unsafe {
-                let is_l = is_less(&*right, &**left);
-                let to_copy = if is_l { right } else { *left };
-                ptr::copy_nonoverlapping(to_copy, *out, 1);
-                *out = out.add(1);
-                right = right.add(is_l as usize);
-                *left = left.add(!is_l as usize);
-            }
-        }
-    } else {
-        // The right run is shorter.
-
-        // SAFETY: buf must have enough capacity for `v[mid..]`.
-        unsafe {
-            ptr::copy_nonoverlapping(v_mid, buf, len - mid);
-            hole = MergeHole { start: buf, end: buf.add(len - mid), dest: v_mid };
-        }
-
-        // Initially, these pointers point past the ends of their arrays.
-        let left = &mut hole.dest;
-        let right = &mut hole.end;
-        let mut out = v_end;
-
-        while v < *left && buf < *right {
-            // Consume the greater side.
-            // If equal, prefer the right run to maintain stability.
-
-            // SAFETY: left and right must be valid and part of v same for out.
-            unsafe {
-                let is_l = is_less(&*right.sub(1), &*left.sub(1));
-                *left = left.sub(is_l as usize);
-                *right = right.sub(!is_l as usize);
-                let to_copy = if is_l { *left } else { *right };
-                out = out.sub(1);
-                ptr::copy_nonoverlapping(to_copy, out, 1);
-            }
-        }
-    }
-    // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
-    // it will now be copied into the hole in `v`.
-
-    // When dropped, copies the range `start..end` into `dest..`.
-    struct MergeHole<T> {
-        start: *mut T,
-        end: *mut T,
-        dest: *mut T,
-    }
-
-    impl<T> Drop for MergeHole<T> {
-        fn drop(&mut self) {
-            // SAFETY: `T` is not a zero-sized type, and these are pointers into a slice's elements.
-            unsafe {
-                let len = self.end.sub_ptr(self.start);
-                ptr::copy_nonoverlapping(self.start, self.dest, len);
-            }
-        }
-    }
-}
-
-/// This merge sort borrows some (but not all) ideas from TimSort, which used to be described in
-/// detail [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt). However Python
-/// has switched to a Powersort based implementation.
-///
-/// The algorithm identifies strictly descending and non-descending subsequences, which are called
-/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
-/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
-/// satisfied:
-///
-/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
-/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
-///
-/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
-pub fn merge_sort<T, CmpF, ElemAllocF, ElemDeallocF, RunAllocF, RunDeallocF>(
-    v: &mut [T],
-    is_less: &mut CmpF,
-    elem_alloc_fn: ElemAllocF,
-    elem_dealloc_fn: ElemDeallocF,
-    run_alloc_fn: RunAllocF,
-    run_dealloc_fn: RunDeallocF,
-) where
-    CmpF: FnMut(&T, &T) -> bool,
-    ElemAllocF: Fn(usize) -> *mut T,
-    ElemDeallocF: Fn(*mut T, usize),
-    RunAllocF: Fn(usize) -> *mut TimSortRun,
-    RunDeallocF: Fn(*mut TimSortRun, usize),
-{
-    // Slices of up to this length get sorted using insertion sort.
-    const MAX_INSERTION: usize = 20;
-
-    // The caller should have already checked that.
-    debug_assert!(!T::IS_ZST);
-
-    let len = v.len();
-
-    // Short arrays get sorted in-place via insertion sort to avoid allocations.
-    if len <= MAX_INSERTION {
-        if len >= 2 {
-            insertion_sort_shift_left(v, 1, is_less);
-        }
-        return;
-    }
-
-    // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
-    // shallow copies of the contents of `v` without risking the dtors running on copies if
-    // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
-    // which will always have length at most `len / 2`.
-    let buf = BufGuard::new(len / 2, elem_alloc_fn, elem_dealloc_fn);
-    let buf_ptr = buf.buf_ptr.as_ptr();
-
-    let mut runs = RunVec::new(run_alloc_fn, run_dealloc_fn);
-
-    let mut end = 0;
-    let mut start = 0;
-
-    // Scan forward. Memory pre-fetching prefers forward scanning vs backwards scanning, and the
-    // code-gen is usually better. For the most sensitive types such as integers, these are merged
-    // bidirectionally at once. So there is no benefit in scanning backwards.
-    while end < len {
-        let (streak_end, was_reversed) = find_streak(&v[start..], is_less);
-        end += streak_end;
-        if was_reversed {
-            v[start..end].reverse();
-        }
-
-        // Insert some more elements into the run if it's too short. Insertion sort is faster than
-        // merge sort on short sequences, so this significantly improves performance.
-        end = provide_sorted_batch(v, start, end, is_less);
-
-        // Push this run onto the stack.
-        runs.push(TimSortRun { start, len: end - start });
-        start = end;
-
-        // Merge some pairs of adjacent runs to satisfy the invariants.
-        while let Some(r) = collapse(runs.as_slice(), len) {
-            let left = runs[r];
-            let right = runs[r + 1];
-            let merge_slice = &mut v[left.start..right.start + right.len];
-            // SAFETY: `buf_ptr` must hold enough capacity for the shorter of the two sides, and
-            // neither side may be on length 0.
-            unsafe {
-                merge(merge_slice, left.len, buf_ptr, is_less);
-            }
-            runs[r + 1] = TimSortRun { start: left.start, len: left.len + right.len };
-            runs.remove(r);
-        }
-    }
-
-    // Finally, exactly one run must remain in the stack.
-    debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
-
-    // Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
-    // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
-    // algorithm should continue building a new run instead, `None` is returned.
-    //
-    // TimSort is infamous for its buggy implementations, as described here:
-    // http://envisage-project.eu/timsort-specification-and-verification/
-    //
-    // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
-    // Enforcing them on just top three is not sufficient to ensure that the invariants will still
-    // hold for *all* runs in the stack.
-    //
-    // This function correctly checks invariants for the top four runs. Additionally, if the top
-    // run starts at index 0, it will always demand a merge operation until the stack is fully
-    // collapsed, in order to complete the sort.
-    #[inline]
-    fn collapse(runs: &[TimSortRun], stop: usize) -> Option<usize> {
-        let n = runs.len();
-        if n >= 2
-            && (runs[n - 1].start + runs[n - 1].len == stop
-                || runs[n - 2].len <= runs[n - 1].len
-                || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
-                || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
-        {
-            if n >= 3 && runs[n - 3].len < runs[n - 1].len { Some(n - 3) } else { Some(n - 2) }
-        } else {
-            None
-        }
-    }
-
-    // Extremely basic versions of Vec.
-    // Their use is super limited and by having the code here, it allows reuse between the sort
-    // implementations.
-    struct BufGuard<T, ElemDeallocF>
-    where
-        ElemDeallocF: Fn(*mut T, usize),
-    {
-        buf_ptr: ptr::NonNull<T>,
-        capacity: usize,
-        elem_dealloc_fn: ElemDeallocF,
-    }
-
-    impl<T, ElemDeallocF> BufGuard<T, ElemDeallocF>
-    where
-        ElemDeallocF: Fn(*mut T, usize),
-    {
-        fn new<ElemAllocF>(
-            len: usize,
-            elem_alloc_fn: ElemAllocF,
-            elem_dealloc_fn: ElemDeallocF,
-        ) -> Self
-        where
-            ElemAllocF: Fn(usize) -> *mut T,
-        {
-            Self {
-                buf_ptr: ptr::NonNull::new(elem_alloc_fn(len)).unwrap(),
-                capacity: len,
-                elem_dealloc_fn,
-            }
-        }
-    }
-
-    impl<T, ElemDeallocF> Drop for BufGuard<T, ElemDeallocF>
-    where
-        ElemDeallocF: Fn(*mut T, usize),
-    {
-        fn drop(&mut self) {
-            (self.elem_dealloc_fn)(self.buf_ptr.as_ptr(), self.capacity);
-        }
-    }
-
-    struct RunVec<RunAllocF, RunDeallocF>
-    where
-        RunAllocF: Fn(usize) -> *mut TimSortRun,
-        RunDeallocF: Fn(*mut TimSortRun, usize),
-    {
-        buf_ptr: ptr::NonNull<TimSortRun>,
-        capacity: usize,
-        len: usize,
-        run_alloc_fn: RunAllocF,
-        run_dealloc_fn: RunDeallocF,
-    }
-
-    impl<RunAllocF, RunDeallocF> RunVec<RunAllocF, RunDeallocF>
-    where
-        RunAllocF: Fn(usize) -> *mut TimSortRun,
-        RunDeallocF: Fn(*mut TimSortRun, usize),
-    {
-        fn new(run_alloc_fn: RunAllocF, run_dealloc_fn: RunDeallocF) -> Self {
-            // Most slices can be sorted with at most 16 runs in-flight.
-            const START_RUN_CAPACITY: usize = 16;
-
-            Self {
-                buf_ptr: ptr::NonNull::new(run_alloc_fn(START_RUN_CAPACITY)).unwrap(),
-                capacity: START_RUN_CAPACITY,
-                len: 0,
-                run_alloc_fn,
-                run_dealloc_fn,
-            }
-        }
-
-        fn push(&mut self, val: TimSortRun) {
-            if self.len == self.capacity {
-                let old_capacity = self.capacity;
-                let old_buf_ptr = self.buf_ptr.as_ptr();
-
-                self.capacity = self.capacity * 2;
-                self.buf_ptr = ptr::NonNull::new((self.run_alloc_fn)(self.capacity)).unwrap();
-
-                // SAFETY: buf_ptr new and old were correctly allocated and old_buf_ptr has
-                // old_capacity valid elements.
-                unsafe {
-                    ptr::copy_nonoverlapping(old_buf_ptr, self.buf_ptr.as_ptr(), old_capacity);
-                }
-
-                (self.run_dealloc_fn)(old_buf_ptr, old_capacity);
-            }
-
-            // SAFETY: The invariant was just checked.
-            unsafe {
-                self.buf_ptr.as_ptr().add(self.len).write(val);
-            }
-            self.len += 1;
-        }
-
-        fn remove(&mut self, index: usize) {
-            if index >= self.len {
-                panic!("Index out of bounds");
-            }
-
-            // SAFETY: buf_ptr needs to be valid and len invariant upheld.
-            unsafe {
-                // the place we are taking from.
-                let ptr = self.buf_ptr.as_ptr().add(index);
-
-                // Shift everything down to fill in that spot.
-                ptr::copy(ptr.add(1), ptr, self.len - index - 1);
-            }
-            self.len -= 1;
-        }
-
-        fn as_slice(&self) -> &[TimSortRun] {
-            // SAFETY: Safe as long as buf_ptr is valid and len invariant was upheld.
-            unsafe { &*ptr::slice_from_raw_parts(self.buf_ptr.as_ptr(), self.len) }
-        }
-
-        fn len(&self) -> usize {
-            self.len
-        }
-    }
-
-    impl<RunAllocF, RunDeallocF> core::ops::Index<usize> for RunVec<RunAllocF, RunDeallocF>
-    where
-        RunAllocF: Fn(usize) -> *mut TimSortRun,
-        RunDeallocF: Fn(*mut TimSortRun, usize),
-    {
-        type Output = TimSortRun;
-
-        fn index(&self, index: usize) -> &Self::Output {
-            if index < self.len {
-                // SAFETY: buf_ptr and len invariant must be upheld.
-                unsafe {
-                    return &*(self.buf_ptr.as_ptr().add(index));
-                }
-            }
-
-            panic!("Index out of bounds");
-        }
-    }
-
-    impl<RunAllocF, RunDeallocF> core::ops::IndexMut<usize> for RunVec<RunAllocF, RunDeallocF>
-    where
-        RunAllocF: Fn(usize) -> *mut TimSortRun,
-        RunDeallocF: Fn(*mut TimSortRun, usize),
-    {
-        fn index_mut(&mut self, index: usize) -> &mut Self::Output {
-            if index < self.len {
-                // SAFETY: buf_ptr and len invariant must be upheld.
-                unsafe {
-                    return &mut *(self.buf_ptr.as_ptr().add(index));
-                }
-            }
-
-            panic!("Index out of bounds");
-        }
-    }
-
-    impl<RunAllocF, RunDeallocF> Drop for RunVec<RunAllocF, RunDeallocF>
-    where
-        RunAllocF: Fn(usize) -> *mut TimSortRun,
-        RunDeallocF: Fn(*mut TimSortRun, usize),
-    {
-        fn drop(&mut self) {
-            // As long as TimSortRun is Copy we don't need to drop them individually but just the
-            // whole allocation.
-            (self.run_dealloc_fn)(self.buf_ptr.as_ptr(), self.capacity);
-        }
-    }
-}
-
-/// Internal type used by merge_sort.
-#[derive(Clone, Copy, Debug)]
-pub struct TimSortRun {
-    len: usize,
-    start: usize,
-}
-
-/// Takes a range as denoted by start and end, that is already sorted and extends it to the right if
-/// necessary with sorts optimized for smaller ranges such as insertion sort.
-fn provide_sorted_batch<T, F>(v: &mut [T], start: usize, mut end: usize, is_less: &mut F) -> usize
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    let len = v.len();
-    assert!(end >= start && end <= len);
-
-    // This value is a balance between least comparisons and best performance, as
-    // influenced by for example cache locality.
-    const MIN_INSERTION_RUN: usize = 10;
-
-    // Insert some more elements into the run if it's too short. Insertion sort is faster than
-    // merge sort on short sequences, so this significantly improves performance.
-    let start_end_diff = end - start;
-
-    if start_end_diff < MIN_INSERTION_RUN && end < len {
-        // v[start_found..end] are elements that are already sorted in the input. We want to extend
-        // the sorted region to the left, so we push up MIN_INSERTION_RUN - 1 to the right. Which is
-        // more efficient that trying to push those already sorted elements to the left.
-        end = cmp::min(start + MIN_INSERTION_RUN, len);
-        let presorted_start = cmp::max(start_end_diff, 1);
-
-        insertion_sort_shift_left(&mut v[start..end], presorted_start, is_less);
-    }
-
-    end
-}
-
-/// Finds a streak of presorted elements starting at the beginning of the slice. Returns the first
-/// value that is not part of said streak, and a bool denoting whether the streak was reversed.
-/// Streaks can be increasing or decreasing.
-fn find_streak<T, F>(v: &[T], is_less: &mut F) -> (usize, bool)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    let len = v.len();
-
-    if len < 2 {
-        return (len, false);
-    }
-
-    let mut end = 2;
-
-    // SAFETY: See below specific.
-    unsafe {
-        // SAFETY: We checked that len >= 2, so 0 and 1 are valid indices.
-        let assume_reverse = is_less(v.get_unchecked(1), v.get_unchecked(0));
-
-        // SAFETY: We know end >= 2 and check end < len.
-        // From that follows that accessing v at end and end - 1 is safe.
-        if assume_reverse {
-            while end < len && is_less(v.get_unchecked(end), v.get_unchecked(end - 1)) {
-                end += 1;
-            }
-
-            (end, true)
-        } else {
-            while end < len && !is_less(v.get_unchecked(end), v.get_unchecked(end - 1)) {
-                end += 1;
-            }
-            (end, false)
-        }
-    }
-}
diff --git a/core/src/slice/sort/mod.rs b/core/src/slice/sort/mod.rs
new file mode 100644
index 0000000000000..79852708b81ea
--- /dev/null
+++ b/core/src/slice/sort/mod.rs
@@ -0,0 +1,8 @@
+//! This module and the contained sub-modules contains the code for efficient and robust sort
+//! implementations, as well as the domain adjacent implementation of `select_nth_unstable`.
+
+pub mod stable;
+pub mod unstable;
+
+pub(crate) mod select;
+pub(crate) mod shared;
diff --git a/core/src/slice/select.rs b/core/src/slice/sort/select.rs
similarity index 76%
rename from core/src/slice/select.rs
rename to core/src/slice/sort/select.rs
index ffc193578e075..6212def30416b 100644
--- a/core/src/slice/select.rs
+++ b/core/src/slice/sort/select.rs
@@ -1,45 +1,78 @@
-//! Slice selection
-//!
 //! This module contains the implementation for `slice::select_nth_unstable`.
-//! It uses an introselect algorithm based on Orson Peters' pattern-defeating quicksort,
-//! published at: <https://github.com/orlp/pdqsort>
+//! It uses an introselect algorithm based on ipnsort by Lukas Bergdoll and Orson Peters,
+//! published at: <https://github.com/Voultapher/sort-research-rs/tree/main/ipnsort>
 //!
 //! The fallback algorithm used for introselect is Median of Medians using Tukey's Ninther
 //! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
 //! better performance than one would get using heapsort as fallback.
 
-use crate::cmp;
 use crate::mem::{self, SizedTypeProperties};
-use crate::slice::sort::{
-    break_patterns, choose_pivot, insertion_sort_shift_left, partition, partition_equal,
-};
 
-// For slices of up to this length it's probably faster to simply sort them.
-// Defined at the module scope because it's used in multiple functions.
-const MAX_INSERTION: usize = 10;
+use crate::slice::sort::shared::pivot::choose_pivot;
+use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
+use crate::slice::sort::unstable::quicksort::partition;
+
+/// Reorder the slice such that the element at `index` is at its final sorted position.
+pub(crate) fn partition_at_index<T, F>(
+    v: &mut [T],
+    index: usize,
+    mut is_less: F,
+) -> (&mut [T], &mut T, &mut [T])
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    let len = v.len();
+
+    // Puts a lower limit of 1 on `len`.
+    if index >= len {
+        panic!("partition_at_index index {} greater than length of slice {}", index, len);
+    }
+
+    if T::IS_ZST {
+        // Sorting has no meaningful behavior on zero-sized types. Do nothing.
+    } else if index == len - 1 {
+        // Find max element and place it in the last position of the array. We're free to use
+        // `unwrap()` here because we checked that `v` is not empty.
+        let max_idx = max_index(v, &mut is_less).unwrap();
+        v.swap(max_idx, index);
+    } else if index == 0 {
+        // Find min element and place it in the first position of the array. We're free to use
+        // `unwrap()` here because we checked that `v` is not empty.
+        let min_idx = min_index(v, &mut is_less).unwrap();
+        v.swap(min_idx, index);
+    } else {
+        partition_at_index_loop(v, index, None, &mut is_less);
+    }
+
+    let (left, right) = v.split_at_mut(index);
+    let (pivot, right) = right.split_at_mut(1);
+    let pivot = &mut pivot[0];
+    (left, pivot, right)
+}
+
+// For small sub-slices it's faster to use a dedicated small-sort, but because it is only called at
+// most once, it doesn't make sense to use something more sophisticated than insertion-sort.
+const INSERTION_SORT_THRESHOLD: usize = 16;
 
 fn partition_at_index_loop<'a, T, F>(
     mut v: &'a mut [T],
     mut index: usize,
+    mut ancestor_pivot: Option<&'a T>,
     is_less: &mut F,
-    mut pred: Option<&'a T>,
 ) where
     F: FnMut(&T, &T) -> bool,
 {
-    // Limit the amount of iterations and fall back to fast deterministic selection
-    // to ensure O(n) worst case running time. This limit needs to be constant, because
-    // using `ilog2(len)` like in `sort` would result in O(n log n) time complexity.
-    // The exact value of the limit is chosen somewhat arbitrarily, but for most inputs bad pivot
-    // selections should be relatively rare, so the limit usually shouldn't be reached
-    // anyways.
+    // Limit the amount of iterations and fall back to fast deterministic selection to ensure O(n)
+    // worst case running time. This limit needs to be constant, because using `ilog2(len)` like in
+    // `sort` would result in O(n log n) time complexity. The exact value of the limit is chosen
+    // somewhat arbitrarily, but for most inputs bad pivot selections should be relatively rare, so
+    // the limit is reached for sub-slices len / (2^limit or less). Which makes the remaining work
+    // with the fallback minimal in relative terms.
     let mut limit = 16;
 
-    // True if the last partitioning was reasonably balanced.
-    let mut was_balanced = true;
-
     loop {
-        if v.len() <= MAX_INSERTION {
-            if v.len() > 1 {
+        if v.len() <= INSERTION_SORT_THRESHOLD {
+            if v.len() >= 2 {
                 insertion_sort_shift_left(v, 1, is_less);
             }
             return;
@@ -50,38 +83,38 @@ fn partition_at_index_loop<'a, T, F>(
             return;
         }
 
-        // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
-        // some elements around. Hopefully we'll choose a better pivot this time.
-        if !was_balanced {
-            break_patterns(v);
-            limit -= 1;
-        }
+        limit -= 1;
 
         // Choose a pivot
-        let (pivot, _) = choose_pivot(v, is_less);
+        let pivot_pos = choose_pivot(v, is_less);
 
         // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
         // slice. Partition the slice into elements equal to and elements greater than the pivot.
         // This case is usually hit when the slice contains many duplicate elements.
-        if let Some(p) = pred {
-            if !is_less(p, &v[pivot]) {
-                let mid = partition_equal(v, pivot, is_less);
+        if let Some(p) = ancestor_pivot {
+            // SAFETY: choose_pivot promises to return a valid pivot position.
+            let pivot = unsafe { v.get_unchecked(pivot_pos) };
+
+            if !is_less(p, pivot) {
+                let num_lt = partition(v, pivot_pos, &mut |a, b| !is_less(b, a));
+
+                // Continue sorting elements greater than the pivot. We know that `mid` contains
+                // the pivot. So we can continue after `mid`.
+                let mid = num_lt + 1;
 
                 // If we've passed our index, then we're good.
                 if mid > index {
                     return;
                 }
 
-                // Otherwise, continue sorting elements greater than the pivot.
                 v = &mut v[mid..];
                 index = index - mid;
-                pred = None;
+                ancestor_pivot = None;
                 continue;
             }
         }
 
-        let (mid, _) = partition(v, pivot, is_less);
-        was_balanced = cmp::min(mid, v.len() - mid) >= v.len() / 8;
+        let mid = partition(v, pivot_pos, is_less);
 
         // Split the slice into `left`, `pivot`, and `right`.
         let (left, right) = v.split_at_mut(mid);
@@ -91,7 +124,7 @@ fn partition_at_index_loop<'a, T, F>(
         if mid < index {
             v = right;
             index = index - mid - 1;
-            pred = Some(pivot);
+            ancestor_pivot = Some(pivot);
         } else if mid > index {
             v = left;
         } else {
@@ -122,41 +155,6 @@ fn max_index<T, F: FnMut(&T, &T) -> bool>(slice: &[T], is_less: &mut F) -> Optio
         .map(|(i, _)| i)
 }
 
-/// Reorder the slice such that the element at `index` is at its final sorted position.
-pub fn partition_at_index<T, F>(
-    v: &mut [T],
-    index: usize,
-    mut is_less: F,
-) -> (&mut [T], &mut T, &mut [T])
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    if index >= v.len() {
-        panic!("partition_at_index index {} greater than length of slice {}", index, v.len());
-    }
-
-    if T::IS_ZST {
-        // Sorting has no meaningful behavior on zero-sized types. Do nothing.
-    } else if index == v.len() - 1 {
-        // Find max element and place it in the last position of the array. We're free to use
-        // `unwrap()` here because we know v must not be empty.
-        let max_idx = max_index(v, &mut is_less).unwrap();
-        v.swap(max_idx, index);
-    } else if index == 0 {
-        // Find min element and place it in the first position of the array. We're free to use
-        // `unwrap()` here because we know v must not be empty.
-        let min_idx = min_index(v, &mut is_less).unwrap();
-        v.swap(min_idx, index);
-    } else {
-        partition_at_index_loop(v, index, &mut is_less, None);
-    }
-
-    let (left, right) = v.split_at_mut(index);
-    let (pivot, right) = right.split_at_mut(1);
-    let pivot = &mut pivot[0];
-    (left, pivot, right)
-}
-
 /// Selection algorithm to select the k-th element from the slice in guaranteed O(n) time.
 /// This is essentially a quickselect that uses Tukey's Ninther for pivot selection
 fn median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut F, mut k: usize) {
@@ -168,8 +166,8 @@ fn median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut
 
     // We now know that `k < v.len() <= isize::MAX`
     loop {
-        if v.len() <= MAX_INSERTION {
-            if v.len() > 1 {
+        if v.len() <= INSERTION_SORT_THRESHOLD {
+            if v.len() >= 2 {
                 insertion_sort_shift_left(v, 1, is_less);
             }
             return;
@@ -232,7 +230,8 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)
     }
 
     median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
-    partition(v, lo + pivot, is_less).0
+
+    partition(v, lo + pivot, is_less)
 }
 
 /// Moves around the 9 elements at the indices a..i, such that
diff --git a/core/src/slice/sort/shared/mod.rs b/core/src/slice/sort/shared/mod.rs
new file mode 100644
index 0000000000000..ad1171bfc6a0a
--- /dev/null
+++ b/core/src/slice/sort/shared/mod.rs
@@ -0,0 +1,45 @@
+use crate::marker::Freeze;
+
+pub(crate) mod pivot;
+pub(crate) mod smallsort;
+
+/// SAFETY: this is safety relevant, how does this interact with the soundness holes in
+/// specialization?
+#[rustc_unsafe_specialization_marker]
+pub(crate) trait FreezeMarker {}
+
+impl<T: Freeze> FreezeMarker for T {}
+
+/// Finds a run of sorted elements starting at the beginning of the slice.
+///
+/// Returns the length of the run, and a bool that is false when the run
+/// is ascending, and true if the run strictly descending.
+#[inline(always)]
+pub(crate) fn find_existing_run<T, F: FnMut(&T, &T) -> bool>(
+    v: &[T],
+    is_less: &mut F,
+) -> (usize, bool) {
+    let len = v.len();
+    if len < 2 {
+        return (len, false);
+    }
+
+    // SAFETY: We checked that len >= 2, so 0 and 1 are valid indices.
+    // This also means that run_len < len implies run_len and run_len - 1
+    // are valid indices as well.
+    unsafe {
+        let mut run_len = 2;
+        let strictly_descending = is_less(v.get_unchecked(1), v.get_unchecked(0));
+        if strictly_descending {
+            while run_len < len && is_less(v.get_unchecked(run_len), v.get_unchecked(run_len - 1)) {
+                run_len += 1;
+            }
+        } else {
+            while run_len < len && !is_less(v.get_unchecked(run_len), v.get_unchecked(run_len - 1))
+            {
+                run_len += 1;
+            }
+        }
+        (run_len, strictly_descending)
+    }
+}
diff --git a/core/src/slice/sort/shared/pivot.rs b/core/src/slice/sort/shared/pivot.rs
new file mode 100644
index 0000000000000..255a1eb6c88a8
--- /dev/null
+++ b/core/src/slice/sort/shared/pivot.rs
@@ -0,0 +1,88 @@
+//! This module contains the logic for pivot selection.
+
+use crate::intrinsics;
+
+// Recursively select a pseudomedian if above this threshold.
+const PSEUDO_MEDIAN_REC_THRESHOLD: usize = 64;
+
+/// Selects a pivot from `v`. Algorithm taken from glidesort by Orson Peters.
+///
+/// This chooses a pivot by sampling an adaptive amount of points, approximating
+/// the quality of a median of sqrt(n) elements.
+pub fn choose_pivot<T, F: FnMut(&T, &T) -> bool>(v: &[T], is_less: &mut F) -> usize {
+    // We use unsafe code and raw pointers here because we're dealing with
+    // heavy recursion. Passing safe slices around would involve a lot of
+    // branches and function call overhead.
+
+    let len = v.len();
+    if len < 8 {
+        intrinsics::abort();
+    }
+
+    // SAFETY: a, b, c point to initialized regions of len_div_8 elements,
+    // satisfying median3 and median3_rec's preconditions as v_base points
+    // to an initialized region of n = len elements.
+    unsafe {
+        let v_base = v.as_ptr();
+        let len_div_8 = len / 8;
+
+        let a = v_base; // [0, floor(n/8))
+        let b = v_base.add(len_div_8 * 4); // [4*floor(n/8), 5*floor(n/8))
+        let c = v_base.add(len_div_8 * 7); // [7*floor(n/8), 8*floor(n/8))
+
+        if len < PSEUDO_MEDIAN_REC_THRESHOLD {
+            median3(&*a, &*b, &*c, is_less).sub_ptr(v_base)
+        } else {
+            median3_rec(a, b, c, len_div_8, is_less).sub_ptr(v_base)
+        }
+    }
+}
+
+/// Calculates an approximate median of 3 elements from sections a, b, c, or
+/// recursively from an approximation of each, if they're large enough. By
+/// dividing the size of each section by 8 when recursing we have logarithmic
+/// recursion depth and overall sample from f(n) = 3*f(n/8) -> f(n) =
+/// O(n^(log(3)/log(8))) ~= O(n^0.528) elements.
+///
+/// SAFETY: a, b, c must point to the start of initialized regions of memory of
+/// at least n elements.
+unsafe fn median3_rec<T, F: FnMut(&T, &T) -> bool>(
+    mut a: *const T,
+    mut b: *const T,
+    mut c: *const T,
+    n: usize,
+    is_less: &mut F,
+) -> *const T {
+    // SAFETY: a, b, c still point to initialized regions of n / 8 elements,
+    // by the exact same logic as in choose_pivot.
+    unsafe {
+        if n * 8 >= PSEUDO_MEDIAN_REC_THRESHOLD {
+            let n8 = n / 8;
+            a = median3_rec(a, a.add(n8 * 4), a.add(n8 * 7), n8, is_less);
+            b = median3_rec(b, b.add(n8 * 4), b.add(n8 * 7), n8, is_less);
+            c = median3_rec(c, c.add(n8 * 4), c.add(n8 * 7), n8, is_less);
+        }
+        median3(&*a, &*b, &*c, is_less)
+    }
+}
+
+/// Calculates the median of 3 elements.
+///
+/// SAFETY: a, b, c must be valid initialized elements.
+#[inline(always)]
+fn median3<T, F: FnMut(&T, &T) -> bool>(a: &T, b: &T, c: &T, is_less: &mut F) -> *const T {
+    // Compiler tends to make this branchless when sensible, and avoids the
+    // third comparison when not.
+    let x = is_less(a, b);
+    let y = is_less(a, c);
+    if x == y {
+        // If x=y=0 then b, c <= a. In this case we want to return max(b, c).
+        // If x=y=1 then a < b, c. In this case we want to return min(b, c).
+        // By toggling the outcome of b < c using XOR x we get this behavior.
+        let z = is_less(b, c);
+        if z ^ x { c } else { b }
+    } else {
+        // Either c <= a < b or b <= a < c, thus a is our median.
+        a
+    }
+}
diff --git a/core/src/slice/sort/shared/smallsort.rs b/core/src/slice/sort/shared/smallsort.rs
new file mode 100644
index 0000000000000..5111ed8756bf1
--- /dev/null
+++ b/core/src/slice/sort/shared/smallsort.rs
@@ -0,0 +1,855 @@
+//! This module contains a variety of sort implementations that are optimized for small lengths.
+
+use crate::intrinsics;
+use crate::mem::{self, ManuallyDrop, MaybeUninit};
+use crate::ptr;
+use crate::slice;
+
+use crate::slice::sort::shared::FreezeMarker;
+
+// It's important to differentiate between SMALL_SORT_THRESHOLD performance for
+// small slices and small-sort performance sorting small sub-slices as part of
+// the main quicksort loop. For the former, testing showed that the
+// representative benchmarks for real-world performance are cold CPU state and
+// not single-size hot benchmarks. For the latter the CPU will call them many
+// times, so hot benchmarks are fine and more realistic. And it's worth it to
+// optimize sorting small sub-slices with more sophisticated solutions than
+// insertion sort.
+
+/// Using a trait allows us to specialize on `Freeze` which in turn allows us to make safe
+/// abstractions.
+pub(crate) trait StableSmallSortTypeImpl: Sized {
+    /// For which input length <= return value of this function, is it valid to call `small_sort`.
+    fn small_sort_threshold() -> usize;
+
+    /// Sorts `v` using strategies optimized for small sizes.
+    fn small_sort<F: FnMut(&Self, &Self) -> bool>(
+        v: &mut [Self],
+        scratch: &mut [MaybeUninit<Self>],
+        is_less: &mut F,
+    );
+}
+
+impl<T> StableSmallSortTypeImpl for T {
+    #[inline(always)]
+    default fn small_sort_threshold() -> usize {
+        // Optimal number of comparisons, and good perf.
+        SMALL_SORT_FALLBACK_THRESHOLD
+    }
+
+    #[inline(always)]
+    default fn small_sort<F: FnMut(&T, &T) -> bool>(
+        v: &mut [T],
+        _scratch: &mut [MaybeUninit<T>],
+        is_less: &mut F,
+    ) {
+        if v.len() >= 2 {
+            insertion_sort_shift_left(v, 1, is_less);
+        }
+    }
+}
+
+impl<T: FreezeMarker> StableSmallSortTypeImpl for T {
+    #[inline(always)]
+    fn small_sort_threshold() -> usize {
+        SMALL_SORT_GENERAL_THRESHOLD
+    }
+
+    #[inline(always)]
+    fn small_sort<F: FnMut(&T, &T) -> bool>(
+        v: &mut [T],
+        scratch: &mut [MaybeUninit<T>],
+        is_less: &mut F,
+    ) {
+        small_sort_general_with_scratch(v, scratch, is_less);
+    }
+}
+
+/// Using a trait allows us to specialize on `Freeze` which in turn allows us to make safe
+/// abstractions.
+pub(crate) trait UnstableSmallSortTypeImpl: Sized {
+    /// For which input length <= return value of this function, is it valid to call `small_sort`.
+    fn small_sort_threshold() -> usize;
+
+    /// Sorts `v` using strategies optimized for small sizes.
+    fn small_sort<F: FnMut(&Self, &Self) -> bool>(v: &mut [Self], is_less: &mut F);
+}
+
+impl<T> UnstableSmallSortTypeImpl for T {
+    #[inline(always)]
+    default fn small_sort_threshold() -> usize {
+        SMALL_SORT_FALLBACK_THRESHOLD
+    }
+
+    #[inline(always)]
+    default fn small_sort<F>(v: &mut [T], is_less: &mut F)
+    where
+        F: FnMut(&T, &T) -> bool,
+    {
+        small_sort_fallback(v, is_less);
+    }
+}
+
+impl<T: FreezeMarker> UnstableSmallSortTypeImpl for T {
+    #[inline(always)]
+    fn small_sort_threshold() -> usize {
+        <T as UnstableSmallSortFreezeTypeImpl>::small_sort_threshold()
+    }
+
+    #[inline(always)]
+    fn small_sort<F>(v: &mut [T], is_less: &mut F)
+    where
+        F: FnMut(&T, &T) -> bool,
+    {
+        <T as UnstableSmallSortFreezeTypeImpl>::small_sort(v, is_less);
+    }
+}
+
+/// FIXME(effects) use original ipnsort approach with choose_unstable_small_sort,
+/// as found here <https://github.com/Voultapher/sort-research-rs/blob/438fad5d0495f65d4b72aa87f0b62fc96611dff3/ipnsort/src/smallsort.rs#L83C10-L83C36>.
+pub(crate) trait UnstableSmallSortFreezeTypeImpl: Sized + FreezeMarker {
+    fn small_sort_threshold() -> usize;
+
+    fn small_sort<F: FnMut(&Self, &Self) -> bool>(v: &mut [Self], is_less: &mut F);
+}
+
+impl<T: FreezeMarker> UnstableSmallSortFreezeTypeImpl for T {
+    #[inline(always)]
+    default fn small_sort_threshold() -> usize {
+        if (mem::size_of::<T>() * SMALL_SORT_GENERAL_SCRATCH_LEN) <= MAX_STACK_ARRAY_SIZE {
+            SMALL_SORT_GENERAL_THRESHOLD
+        } else {
+            SMALL_SORT_FALLBACK_THRESHOLD
+        }
+    }
+
+    #[inline(always)]
+    default fn small_sort<F>(v: &mut [T], is_less: &mut F)
+    where
+        F: FnMut(&T, &T) -> bool,
+    {
+        if (mem::size_of::<T>() * SMALL_SORT_GENERAL_SCRATCH_LEN) <= MAX_STACK_ARRAY_SIZE {
+            small_sort_general(v, is_less);
+        } else {
+            small_sort_fallback(v, is_less);
+        }
+    }
+}
+
+/// SAFETY: Only used for run-time optimization heuristic.
+#[rustc_unsafe_specialization_marker]
+trait CopyMarker {}
+
+impl<T: Copy> CopyMarker for T {}
+
+impl<T: FreezeMarker + CopyMarker> UnstableSmallSortFreezeTypeImpl for T {
+    #[inline(always)]
+    fn small_sort_threshold() -> usize {
+        if has_efficient_in_place_swap::<T>()
+            && (mem::size_of::<T>() * SMALL_SORT_NETWORK_SCRATCH_LEN) <= MAX_STACK_ARRAY_SIZE
+        {
+            SMALL_SORT_NETWORK_THRESHOLD
+        } else if (mem::size_of::<T>() * SMALL_SORT_GENERAL_SCRATCH_LEN) <= MAX_STACK_ARRAY_SIZE {
+            SMALL_SORT_GENERAL_THRESHOLD
+        } else {
+            SMALL_SORT_FALLBACK_THRESHOLD
+        }
+    }
+
+    #[inline(always)]
+    fn small_sort<F>(v: &mut [T], is_less: &mut F)
+    where
+        F: FnMut(&T, &T) -> bool,
+    {
+        if has_efficient_in_place_swap::<T>()
+            && (mem::size_of::<T>() * SMALL_SORT_NETWORK_SCRATCH_LEN) <= MAX_STACK_ARRAY_SIZE
+        {
+            small_sort_network(v, is_less);
+        } else if (mem::size_of::<T>() * SMALL_SORT_GENERAL_SCRATCH_LEN) <= MAX_STACK_ARRAY_SIZE {
+            small_sort_general(v, is_less);
+        } else {
+            small_sort_fallback(v, is_less);
+        }
+    }
+}
+
+/// Optimal number of comparisons, and good perf.
+const SMALL_SORT_FALLBACK_THRESHOLD: usize = 16;
+
+/// From a comparison perspective 20 was ~2% more efficient for fully random input, but for
+/// wall-clock performance choosing 32 yielded better performance overall.
+///
+/// SAFETY: If you change this value, you have to adjust [`small_sort_general`] !
+const SMALL_SORT_GENERAL_THRESHOLD: usize = 32;
+
+/// [`small_sort_general`] uses [`sort8_stable`] as primitive and does a kind of ping-pong merge,
+/// where the output of the first two [`sort8_stable`] calls is stored at the end of the scratch
+/// buffer. This simplifies panic handling and avoids additional copies. This affects the required
+/// scratch buffer size.
+///
+/// SAFETY: If you change this value, you have to adjust [`small_sort_general`] !
+pub(crate) const SMALL_SORT_GENERAL_SCRATCH_LEN: usize = SMALL_SORT_GENERAL_THRESHOLD + 16;
+
+/// SAFETY: If you change this value, you have to adjust [`small_sort_network`] !
+const SMALL_SORT_NETWORK_THRESHOLD: usize = 32;
+const SMALL_SORT_NETWORK_SCRATCH_LEN: usize = SMALL_SORT_NETWORK_THRESHOLD;
+
+/// Using a stack array, could cause a stack overflow if the type `T` is very large. To be
+/// conservative we limit the usage of small-sorts that require a stack array to types that fit
+/// within this limit.
+const MAX_STACK_ARRAY_SIZE: usize = 4096;
+
+fn small_sort_fallback<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
+    if v.len() >= 2 {
+        insertion_sort_shift_left(v, 1, is_less);
+    }
+}
+
+fn small_sort_general<T: FreezeMarker, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
+    let mut stack_array = MaybeUninit::<[T; SMALL_SORT_GENERAL_SCRATCH_LEN]>::uninit();
+
+    // SAFETY: The memory is backed by `stack_array`, and the operation is safe as long as the len
+    // is the same.
+    let scratch = unsafe {
+        slice::from_raw_parts_mut(
+            stack_array.as_mut_ptr() as *mut MaybeUninit<T>,
+            SMALL_SORT_GENERAL_SCRATCH_LEN,
+        )
+    };
+
+    small_sort_general_with_scratch(v, scratch, is_less);
+}
+
+fn small_sort_general_with_scratch<T: FreezeMarker, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    is_less: &mut F,
+) {
+    let len = v.len();
+    if len < 2 {
+        return;
+    }
+
+    if scratch.len() < len + 16 {
+        intrinsics::abort();
+    }
+
+    let v_base = v.as_mut_ptr();
+    let len_div_2 = len / 2;
+
+    // SAFETY: See individual comments.
+    unsafe {
+        let scratch_base = scratch.as_mut_ptr() as *mut T;
+
+        let presorted_len = if const { mem::size_of::<T>() <= 16 } && len >= 16 {
+            // SAFETY: scratch_base is valid and has enough space.
+            sort8_stable(v_base, scratch_base, scratch_base.add(len), is_less);
+            sort8_stable(
+                v_base.add(len_div_2),
+                scratch_base.add(len_div_2),
+                scratch_base.add(len + 8),
+                is_less,
+            );
+
+            8
+        } else if len >= 8 {
+            // SAFETY: scratch_base is valid and has enough space.
+            sort4_stable(v_base, scratch_base, is_less);
+            sort4_stable(v_base.add(len_div_2), scratch_base.add(len_div_2), is_less);
+
+            4
+        } else {
+            ptr::copy_nonoverlapping(v_base, scratch_base, 1);
+            ptr::copy_nonoverlapping(v_base.add(len_div_2), scratch_base.add(len_div_2), 1);
+
+            1
+        };
+
+        for offset in [0, len_div_2] {
+            // SAFETY: at this point dst is initialized with presorted_len elements.
+            // We extend this to desired_len, src is valid for desired_len elements.
+            let src = v_base.add(offset);
+            let dst = scratch_base.add(offset);
+            let desired_len = if offset == 0 { len_div_2 } else { len - len_div_2 };
+
+            for i in presorted_len..desired_len {
+                ptr::copy_nonoverlapping(src.add(i), dst.add(i), 1);
+                insert_tail(dst, dst.add(i), is_less);
+            }
+        }
+
+        // SAFETY: see comment in `CopyOnDrop::drop`.
+        let drop_guard = CopyOnDrop { src: scratch_base, dst: v_base, len };
+
+        // SAFETY: at this point scratch_base is fully initialized, allowing us
+        // to use it as the source of our merge back into the original array.
+        // If a panic occurs we ensure the original array is restored to a valid
+        // permutation of the input through drop_guard. This technique is similar
+        // to ping-pong merging.
+        bidirectional_merge(
+            &*ptr::slice_from_raw_parts(drop_guard.src, drop_guard.len),
+            drop_guard.dst,
+            is_less,
+        );
+        mem::forget(drop_guard);
+    }
+}
+
+struct CopyOnDrop<T> {
+    src: *const T,
+    dst: *mut T,
+    len: usize,
+}
+
+impl<T> Drop for CopyOnDrop<T> {
+    fn drop(&mut self) {
+        // SAFETY: `src` must contain `len` initialized elements, and dst must
+        // be valid to write `len` elements.
+        unsafe {
+            ptr::copy_nonoverlapping(self.src, self.dst, self.len);
+        }
+    }
+}
+
+fn small_sort_network<T, F>(v: &mut [T], is_less: &mut F)
+where
+    T: FreezeMarker,
+    F: FnMut(&T, &T) -> bool,
+{
+    // This implementation is tuned to be efficient for integer types.
+
+    let len = v.len();
+    if len < 2 {
+        return;
+    }
+
+    if len > SMALL_SORT_NETWORK_SCRATCH_LEN {
+        intrinsics::abort();
+    }
+
+    let mut stack_array = MaybeUninit::<[T; SMALL_SORT_NETWORK_SCRATCH_LEN]>::uninit();
+
+    let len_div_2 = len / 2;
+    let no_merge = len < 18;
+
+    let v_base = v.as_mut_ptr();
+    let initial_region_len = if no_merge { len } else { len_div_2 };
+    // SAFETY: Both possible values of `initial_region_len` are in-bounds.
+    let mut region = unsafe { &mut *ptr::slice_from_raw_parts_mut(v_base, initial_region_len) };
+
+    // Avoid compiler unrolling, we *really* don't want that to happen here for binary-size reasons.
+    loop {
+        let presorted_len = if region.len() >= 13 {
+            sort13_optimal(region, is_less);
+            13
+        } else if region.len() >= 9 {
+            sort9_optimal(region, is_less);
+            9
+        } else {
+            1
+        };
+
+        insertion_sort_shift_left(region, presorted_len, is_less);
+
+        if no_merge {
+            return;
+        }
+
+        if region.as_ptr() != v_base {
+            break;
+        }
+
+        // SAFETY: The right side of `v` based on `len_div_2` is guaranteed in-bounds.
+        unsafe {
+            region = &mut *ptr::slice_from_raw_parts_mut(v_base.add(len_div_2), len - len_div_2)
+        };
+    }
+
+    // SAFETY: We checked that T is Freeze and thus observation safe.
+    // Should is_less panic v was not modified in parity_merge and retains it's original input.
+    // scratch and v must not alias and scratch has v.len() space.
+    unsafe {
+        let scratch_base = stack_array.as_mut_ptr() as *mut T;
+        bidirectional_merge(
+            &mut *ptr::slice_from_raw_parts_mut(v_base, len),
+            scratch_base,
+            is_less,
+        );
+        ptr::copy_nonoverlapping(scratch_base, v_base, len);
+    }
+}
+
+/// Swap two values in the slice pointed to by `v_base` at the position `a_pos` and `b_pos` if the
+/// value at position `b_pos` is less than the one at position `a_pos`.
+pub unsafe fn swap_if_less<T, F>(v_base: *mut T, a_pos: usize, b_pos: usize, is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    // SAFETY: the caller must guarantee that `a` and `b` each added to `v_base` yield valid
+    // pointers into `v_base`, and are properly aligned, and part of the same allocation.
+    unsafe {
+        let v_a = v_base.add(a_pos);
+        let v_b = v_base.add(b_pos);
+
+        // PANIC SAFETY: if is_less panics, no scratch memory was created and the slice should still be
+        // in a well defined state, without duplicates.
+
+        // Important to only swap if it is more and not if it is equal. is_less should return false for
+        // equal, so we don't swap.
+        let should_swap = is_less(&*v_b, &*v_a);
+
+        // This is a branchless version of swap if.
+        // The equivalent code with a branch would be:
+        //
+        // if should_swap {
+        //     ptr::swap(left, right, 1);
+        // }
+
+        // The goal is to generate cmov instructions here.
+        let left_swap = if should_swap { v_b } else { v_a };
+        let right_swap = if should_swap { v_a } else { v_b };
+
+        let right_swap_tmp = ManuallyDrop::new(ptr::read(right_swap));
+        ptr::copy(left_swap, v_a, 1);
+        ptr::copy_nonoverlapping(&*right_swap_tmp, v_b, 1);
+    }
+}
+
+/// Sorts the first 9 elements of `v` with a fast fixed function.
+///
+/// Should `is_less` generate substantial amounts of code the compiler can choose to not inline
+/// `swap_if_less`. If the code of a sort impl changes so as to call this function in multiple
+/// places, `#[inline(never)]` is recommended to keep binary-size in check. The current design of
+/// `small_sort_network` makes sure to only call this once.
+fn sort9_optimal<T, F>(v: &mut [T], is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    if v.len() < 9 {
+        intrinsics::abort();
+    }
+
+    let v_base = v.as_mut_ptr();
+
+    // Optimal sorting network see:
+    // https://bertdobbelaere.github.io/sorting_networks.html.
+
+    // SAFETY: We checked the len.
+    unsafe {
+        swap_if_less(v_base, 0, 3, is_less);
+        swap_if_less(v_base, 1, 7, is_less);
+        swap_if_less(v_base, 2, 5, is_less);
+        swap_if_less(v_base, 4, 8, is_less);
+        swap_if_less(v_base, 0, 7, is_less);
+        swap_if_less(v_base, 2, 4, is_less);
+        swap_if_less(v_base, 3, 8, is_less);
+        swap_if_less(v_base, 5, 6, is_less);
+        swap_if_less(v_base, 0, 2, is_less);
+        swap_if_less(v_base, 1, 3, is_less);
+        swap_if_less(v_base, 4, 5, is_less);
+        swap_if_less(v_base, 7, 8, is_less);
+        swap_if_less(v_base, 1, 4, is_less);
+        swap_if_less(v_base, 3, 6, is_less);
+        swap_if_less(v_base, 5, 7, is_less);
+        swap_if_less(v_base, 0, 1, is_less);
+        swap_if_less(v_base, 2, 4, is_less);
+        swap_if_less(v_base, 3, 5, is_less);
+        swap_if_less(v_base, 6, 8, is_less);
+        swap_if_less(v_base, 2, 3, is_less);
+        swap_if_less(v_base, 4, 5, is_less);
+        swap_if_less(v_base, 6, 7, is_less);
+        swap_if_less(v_base, 1, 2, is_less);
+        swap_if_less(v_base, 3, 4, is_less);
+        swap_if_less(v_base, 5, 6, is_less);
+    }
+}
+
+/// Sorts the first 13 elements of `v` with a fast fixed function.
+///
+/// Should `is_less` generate substantial amounts of code the compiler can choose to not inline
+/// `swap_if_less`. If the code of a sort impl changes so as to call this function in multiple
+/// places, `#[inline(never)]` is recommended to keep binary-size in check. The current design of
+/// `small_sort_network` makes sure to only call this once.
+fn sort13_optimal<T, F>(v: &mut [T], is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    if v.len() < 13 {
+        intrinsics::abort();
+    }
+
+    let v_base = v.as_mut_ptr();
+
+    // Optimal sorting network see:
+    // https://bertdobbelaere.github.io/sorting_networks.html.
+
+    // SAFETY: We checked the len.
+    unsafe {
+        swap_if_less(v_base, 0, 12, is_less);
+        swap_if_less(v_base, 1, 10, is_less);
+        swap_if_less(v_base, 2, 9, is_less);
+        swap_if_less(v_base, 3, 7, is_less);
+        swap_if_less(v_base, 5, 11, is_less);
+        swap_if_less(v_base, 6, 8, is_less);
+        swap_if_less(v_base, 1, 6, is_less);
+        swap_if_less(v_base, 2, 3, is_less);
+        swap_if_less(v_base, 4, 11, is_less);
+        swap_if_less(v_base, 7, 9, is_less);
+        swap_if_less(v_base, 8, 10, is_less);
+        swap_if_less(v_base, 0, 4, is_less);
+        swap_if_less(v_base, 1, 2, is_less);
+        swap_if_less(v_base, 3, 6, is_less);
+        swap_if_less(v_base, 7, 8, is_less);
+        swap_if_less(v_base, 9, 10, is_less);
+        swap_if_less(v_base, 11, 12, is_less);
+        swap_if_less(v_base, 4, 6, is_less);
+        swap_if_less(v_base, 5, 9, is_less);
+        swap_if_less(v_base, 8, 11, is_less);
+        swap_if_less(v_base, 10, 12, is_less);
+        swap_if_less(v_base, 0, 5, is_less);
+        swap_if_less(v_base, 3, 8, is_less);
+        swap_if_less(v_base, 4, 7, is_less);
+        swap_if_less(v_base, 6, 11, is_less);
+        swap_if_less(v_base, 9, 10, is_less);
+        swap_if_less(v_base, 0, 1, is_less);
+        swap_if_less(v_base, 2, 5, is_less);
+        swap_if_less(v_base, 6, 9, is_less);
+        swap_if_less(v_base, 7, 8, is_less);
+        swap_if_less(v_base, 10, 11, is_less);
+        swap_if_less(v_base, 1, 3, is_less);
+        swap_if_less(v_base, 2, 4, is_less);
+        swap_if_less(v_base, 5, 6, is_less);
+        swap_if_less(v_base, 9, 10, is_less);
+        swap_if_less(v_base, 1, 2, is_less);
+        swap_if_less(v_base, 3, 4, is_less);
+        swap_if_less(v_base, 5, 7, is_less);
+        swap_if_less(v_base, 6, 8, is_less);
+        swap_if_less(v_base, 2, 3, is_less);
+        swap_if_less(v_base, 4, 5, is_less);
+        swap_if_less(v_base, 6, 7, is_less);
+        swap_if_less(v_base, 8, 9, is_less);
+        swap_if_less(v_base, 3, 4, is_less);
+        swap_if_less(v_base, 5, 6, is_less);
+    }
+}
+
+/// Sorts range [begin, tail] assuming [begin, tail) is already sorted.
+///
+/// # Safety
+/// begin < tail and p must be valid and initialized for all begin <= p <= tail.
+unsafe fn insert_tail<T, F: FnMut(&T, &T) -> bool>(begin: *mut T, tail: *mut T, is_less: &mut F) {
+    // SAFETY: see individual comments.
+    unsafe {
+        // SAFETY: in-bounds as tail > begin.
+        let mut sift = tail.sub(1);
+        if !is_less(&*tail, &*sift) {
+            return;
+        }
+
+        // SAFETY: after this read tail is never read from again, as we only ever
+        // read from sift, sift < tail and we only ever decrease sift. Thus this is
+        // effectively a move, not a copy. Should a panic occur, or we have found
+        // the correct insertion position, gap_guard ensures the element is moved
+        // back into the array.
+        let tmp = ManuallyDrop::new(tail.read());
+        let mut gap_guard = CopyOnDrop { src: &*tmp, dst: tail, len: 1 };
+
+        loop {
+            // SAFETY: we move sift into the gap (which is valid), and point the
+            // gap guard destination at sift, ensuring that if a panic occurs the
+            // gap is once again filled.
+            ptr::copy_nonoverlapping(sift, gap_guard.dst, 1);
+            gap_guard.dst = sift;
+
+            if sift == begin {
+                break;
+            }
+
+            // SAFETY: we checked that sift != begin, thus this is in-bounds.
+            sift = sift.sub(1);
+            if !is_less(&tmp, &*sift) {
+                break;
+            }
+        }
+    }
+}
+
+/// Sort `v` assuming `v[..offset]` is already sorted.
+pub fn insertion_sort_shift_left<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    offset: usize,
+    is_less: &mut F,
+) {
+    let len = v.len();
+    if offset == 0 || offset > len {
+        intrinsics::abort();
+    }
+
+    // SAFETY: see individual comments.
+    unsafe {
+        // We write this basic loop directly using pointers, as when we use a
+        // for loop LLVM likes to unroll this loop which we do not want.
+        // SAFETY: v_end is the one-past-end pointer, and we checked that
+        // offset <= len, thus tail is also in-bounds.
+        let v_base = v.as_mut_ptr();
+        let v_end = v_base.add(len);
+        let mut tail = v_base.add(offset);
+        while tail != v_end {
+            // SAFETY: v_base and tail are both valid pointers to elements, and
+            // v_base < tail since we checked offset != 0.
+            insert_tail(v_base, tail, is_less);
+
+            // SAFETY: we checked that tail is not yet the one-past-end pointer.
+            tail = tail.add(1);
+        }
+    }
+}
+
+/// SAFETY: The caller MUST guarantee that `v_base` is valid for 4 reads and
+/// `dst` is valid for 4 writes. The result will be stored in `dst[0..4]`.
+pub unsafe fn sort4_stable<T, F: FnMut(&T, &T) -> bool>(
+    v_base: *const T,
+    dst: *mut T,
+    is_less: &mut F,
+) {
+    // By limiting select to picking pointers, we are guaranteed good cmov code-gen
+    // regardless of type T's size. Further this only does 5 instead of 6
+    // comparisons compared to a stable transposition 4 element sorting-network,
+    // and always copies each element exactly once.
+
+    // SAFETY: all pointers have offset at most 3 from v_base and dst, and are
+    // thus in-bounds by the precondition.
+    unsafe {
+        // Stably create two pairs a <= b and c <= d.
+        let c1 = is_less(&*v_base.add(1), &*v_base);
+        let c2 = is_less(&*v_base.add(3), &*v_base.add(2));
+        let a = v_base.add(c1 as usize);
+        let b = v_base.add(!c1 as usize);
+        let c = v_base.add(2 + c2 as usize);
+        let d = v_base.add(2 + (!c2 as usize));
+
+        // Compare (a, c) and (b, d) to identify max/min. We're left with two
+        // unknown elements, but because we are a stable sort we must know which
+        // one is leftmost and which one is rightmost.
+        // c3, c4 | min max unknown_left unknown_right
+        //  0,  0 |  a   d    b         c
+        //  0,  1 |  a   b    c         d
+        //  1,  0 |  c   d    a         b
+        //  1,  1 |  c   b    a         d
+        let c3 = is_less(&*c, &*a);
+        let c4 = is_less(&*d, &*b);
+        let min = select(c3, c, a);
+        let max = select(c4, b, d);
+        let unknown_left = select(c3, a, select(c4, c, b));
+        let unknown_right = select(c4, d, select(c3, b, c));
+
+        // Sort the last two unknown elements.
+        let c5 = is_less(&*unknown_right, &*unknown_left);
+        let lo = select(c5, unknown_right, unknown_left);
+        let hi = select(c5, unknown_left, unknown_right);
+
+        ptr::copy_nonoverlapping(min, dst, 1);
+        ptr::copy_nonoverlapping(lo, dst.add(1), 1);
+        ptr::copy_nonoverlapping(hi, dst.add(2), 1);
+        ptr::copy_nonoverlapping(max, dst.add(3), 1);
+    }
+
+    #[inline(always)]
+    fn select<T>(cond: bool, if_true: *const T, if_false: *const T) -> *const T {
+        if cond { if_true } else { if_false }
+    }
+}
+
+/// SAFETY: The caller MUST guarantee that `v_base` is valid for 8 reads and
+/// writes, `scratch_base` and `dst` MUST be valid for 8 writes. The result will
+/// be stored in `dst[0..8]`.
+unsafe fn sort8_stable<T: FreezeMarker, F: FnMut(&T, &T) -> bool>(
+    v_base: *mut T,
+    dst: *mut T,
+    scratch_base: *mut T,
+    is_less: &mut F,
+) {
+    // SAFETY: these pointers are all in-bounds by the precondition of our function.
+    unsafe {
+        sort4_stable(v_base, scratch_base, is_less);
+        sort4_stable(v_base.add(4), scratch_base.add(4), is_less);
+    }
+
+    // SAFETY: scratch_base[0..8] is now initialized, allowing us to merge back
+    // into dst.
+    unsafe {
+        bidirectional_merge(&*ptr::slice_from_raw_parts(scratch_base, 8), dst, is_less);
+    }
+}
+
+#[inline(always)]
+unsafe fn merge_up<T, F: FnMut(&T, &T) -> bool>(
+    mut left_src: *const T,
+    mut right_src: *const T,
+    mut dst: *mut T,
+    is_less: &mut F,
+) -> (*const T, *const T, *mut T) {
+    // This is a branchless merge utility function.
+    // The equivalent code with a branch would be:
+    //
+    // if !is_less(&*right_src, &*left_src) {
+    //     ptr::copy_nonoverlapping(left_src, dst, 1);
+    //     left_src = left_src.add(1);
+    // } else {
+    //     ptr::copy_nonoverlapping(right_src, dst, 1);
+    //     right_src = right_src.add(1);
+    // }
+    // dst = dst.add(1);
+
+    // SAFETY: The caller must guarantee that `left_src`, `right_src` are valid
+    // to read and `dst` is valid to write, while not aliasing.
+    unsafe {
+        let is_l = !is_less(&*right_src, &*left_src);
+        let src = if is_l { left_src } else { right_src };
+        ptr::copy_nonoverlapping(src, dst, 1);
+        right_src = right_src.add(!is_l as usize);
+        left_src = left_src.add(is_l as usize);
+        dst = dst.add(1);
+    }
+
+    (left_src, right_src, dst)
+}
+
+#[inline(always)]
+unsafe fn merge_down<T, F: FnMut(&T, &T) -> bool>(
+    mut left_src: *const T,
+    mut right_src: *const T,
+    mut dst: *mut T,
+    is_less: &mut F,
+) -> (*const T, *const T, *mut T) {
+    // This is a branchless merge utility function.
+    // The equivalent code with a branch would be:
+    //
+    // if !is_less(&*right_src, &*left_src) {
+    //     ptr::copy_nonoverlapping(right_src, dst, 1);
+    //     right_src = right_src.wrapping_sub(1);
+    // } else {
+    //     ptr::copy_nonoverlapping(left_src, dst, 1);
+    //     left_src = left_src.wrapping_sub(1);
+    // }
+    // dst = dst.sub(1);
+
+    // SAFETY: The caller must guarantee that `left_src`, `right_src` are valid
+    // to read and `dst` is valid to write, while not aliasing.
+    unsafe {
+        let is_l = !is_less(&*right_src, &*left_src);
+        let src = if is_l { right_src } else { left_src };
+        ptr::copy_nonoverlapping(src, dst, 1);
+        right_src = right_src.wrapping_sub(is_l as usize);
+        left_src = left_src.wrapping_sub(!is_l as usize);
+        dst = dst.sub(1);
+    }
+
+    (left_src, right_src, dst)
+}
+
+/// Merge v assuming v[..len / 2] and v[len / 2..] are sorted.
+///
+/// Original idea for bi-directional merging by Igor van den Hoven (quadsort),
+/// adapted to only use merge up and down. In contrast to the original
+/// parity_merge function, it performs 2 writes instead of 4 per iteration.
+///
+/// # Safety
+/// The caller must guarantee that `dst` is valid for v.len() writes.
+/// Also `v.as_ptr()` and `dst` must not alias and v.len() must be >= 2.
+///
+/// Note that T must be Freeze, the comparison function is evaluated on outdated
+/// temporary 'copies' that may not end up in the final array.
+unsafe fn bidirectional_merge<T: FreezeMarker, F: FnMut(&T, &T) -> bool>(
+    v: &[T],
+    dst: *mut T,
+    is_less: &mut F,
+) {
+    // It helps to visualize the merge:
+    //
+    // Initial:
+    //
+    //  |dst (in dst)
+    //  |left               |right
+    //  v                   v
+    // [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]
+    //                     ^                   ^
+    //                     |left_rev           |right_rev
+    //                                         |dst_rev (in dst)
+    //
+    // After:
+    //
+    //                      |dst (in dst)
+    //        |left         |           |right
+    //        v             v           v
+    // [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]
+    //       ^             ^           ^
+    //       |left_rev     |           |right_rev
+    //                     |dst_rev (in dst)
+    //
+    // In each iteration one of left or right moves up one position, and one of
+    // left_rev or right_rev moves down one position, whereas dst always moves
+    // up one position and dst_rev always moves down one position. Assuming
+    // the input was sorted and the comparison function is correctly implemented
+    // at the end we will have left == left_rev + 1, and right == right_rev + 1,
+    // fully consuming the input having written it to dst.
+
+    let len = v.len();
+    let src = v.as_ptr();
+
+    let len_div_2 = len / 2;
+
+    // SAFETY: The caller has to ensure that len >= 2.
+    unsafe {
+        intrinsics::assume(len_div_2 != 0); // This can avoid useless code-gen.
+    }
+
+    // SAFETY: no matter what the result of the user-provided comparison function
+    // is, all 4 read pointers will always be in-bounds. Writing `dst` and `dst_rev`
+    // will always be in bounds if the caller guarantees that `dst` is valid for
+    // `v.len()` writes.
+    unsafe {
+        let mut left = src;
+        let mut right = src.add(len_div_2);
+        let mut dst = dst;
+
+        let mut left_rev = src.add(len_div_2 - 1);
+        let mut right_rev = src.add(len - 1);
+        let mut dst_rev = dst.add(len - 1);
+
+        for _ in 0..len_div_2 {
+            (left, right, dst) = merge_up(left, right, dst, is_less);
+            (left_rev, right_rev, dst_rev) = merge_down(left_rev, right_rev, dst_rev, is_less);
+        }
+
+        let left_end = left_rev.wrapping_add(1);
+        let right_end = right_rev.wrapping_add(1);
+
+        // Odd length, so one element is left unconsumed in the input.
+        if len % 2 != 0 {
+            let left_nonempty = left < left_end;
+            let last_src = if left_nonempty { left } else { right };
+            ptr::copy_nonoverlapping(last_src, dst, 1);
+            left = left.add(left_nonempty as usize);
+            right = right.add((!left_nonempty) as usize);
+        }
+
+        // We now should have consumed the full input exactly once. This can
+        // only fail if the comparison operator fails to be Ord, in which case
+        // we will panic and never access the inconsistent state in dst.
+        if left != left_end || right != right_end {
+            panic_on_ord_violation();
+        }
+    }
+}
+
+#[inline(never)]
+fn panic_on_ord_violation() -> ! {
+    panic!("Ord violation");
+}
+
+#[must_use]
+pub(crate) const fn has_efficient_in_place_swap<T>() -> bool {
+    // Heuristic that holds true on all tested 64-bit capable architectures.
+    mem::size_of::<T>() <= 8 // mem::size_of::<u64>()
+}
diff --git a/core/src/slice/sort/stable/drift.rs b/core/src/slice/sort/stable/drift.rs
new file mode 100644
index 0000000000000..2d9c4ac9fcf7c
--- /dev/null
+++ b/core/src/slice/sort/stable/drift.rs
@@ -0,0 +1,300 @@
+//! This module contains the hybrid top-level loop combining bottom-up Mergesort with top-down
+//! Quicksort.
+
+use crate::cmp;
+use crate::intrinsics;
+use crate::mem::MaybeUninit;
+
+use crate::slice::sort::shared::find_existing_run;
+use crate::slice::sort::shared::smallsort::StableSmallSortTypeImpl;
+use crate::slice::sort::stable::merge::merge;
+use crate::slice::sort::stable::quicksort::quicksort;
+
+/// Sorts `v` based on comparison function `is_less`. If `eager_sort` is true,
+/// it will only do small-sorts and physical merges, ensuring O(N * log(N))
+/// worst-case complexity. `scratch.len()` must be at least `max(v.len() / 2,
+/// MIN_SMALL_SORT_SCRATCH_LEN)` otherwise the implementation may abort.
+/// Fully ascending and descending inputs will be sorted with exactly N - 1
+/// comparisons.
+///
+/// This is the main loop for driftsort, which uses powersort's heuristic to
+/// determine in which order to merge runs, see below for details.
+pub fn sort<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    eager_sort: bool,
+    is_less: &mut F,
+) {
+    let len = v.len();
+    if len < 2 {
+        return; // Removing this length check *increases* code size.
+    }
+    let scale_factor = merge_tree_scale_factor(len);
+
+    // It's important to have a relatively high entry barrier for pre-sorted
+    // runs, as the presence of a single such run will force on average several
+    // merge operations and shrink the maximum quicksort size a lot. For that
+    // reason we use sqrt(len) as our pre-sorted run threshold.
+    const MIN_SQRT_RUN_LEN: usize = 64;
+    let min_good_run_len = if len <= (MIN_SQRT_RUN_LEN * MIN_SQRT_RUN_LEN) {
+        // For small input length `MIN_SQRT_RUN_LEN` would break pattern
+        // detection of full or nearly sorted inputs.
+        cmp::min(len - len / 2, MIN_SQRT_RUN_LEN)
+    } else {
+        sqrt_approx(len)
+    };
+
+    // (stack_len, runs, desired_depths) together form a stack maintaining run
+    // information for the powersort heuristic. desired_depths[i] is the desired
+    // depth of the merge node that merges runs[i] with the run that comes after
+    // it.
+    let mut stack_len = 0;
+    let mut run_storage = MaybeUninit::<[DriftsortRun; 66]>::uninit();
+    let runs: *mut DriftsortRun = run_storage.as_mut_ptr().cast();
+    let mut desired_depth_storage = MaybeUninit::<[u8; 66]>::uninit();
+    let desired_depths: *mut u8 = desired_depth_storage.as_mut_ptr().cast();
+
+    let mut scan_idx = 0;
+    let mut prev_run = DriftsortRun::new_sorted(0); // Initial dummy run.
+    loop {
+        // Compute the next run and the desired depth of the merge node between
+        // prev_run and next_run. On the last iteration we create a dummy run
+        // with root-level desired depth to fully collapse the merge tree.
+        let (next_run, desired_depth);
+        if scan_idx < len {
+            next_run =
+                create_run(&mut v[scan_idx..], scratch, min_good_run_len, eager_sort, is_less);
+            desired_depth = merge_tree_depth(
+                scan_idx - prev_run.len(),
+                scan_idx,
+                scan_idx + next_run.len(),
+                scale_factor,
+            );
+        } else {
+            next_run = DriftsortRun::new_sorted(0);
+            desired_depth = 0;
+        };
+
+        // Process the merge nodes between earlier runs[i] that have a desire to
+        // be deeper in the merge tree than the merge node for the splitpoint
+        // between prev_run and next_run.
+        //
+        // SAFETY: first note that this is the only place we modify stack_len,
+        // runs or desired depths. We maintain the following invariants:
+        //  1. The first stack_len elements of runs/desired_depths are initialized.
+        //  2. For all valid i > 0, desired_depths[i] < desired_depths[i+1].
+        //  3. The sum of all valid runs[i].len() plus prev_run.len() equals
+        //     scan_idx.
+        unsafe {
+            while stack_len > 1 && *desired_depths.add(stack_len - 1) >= desired_depth {
+                // Desired depth greater than the upcoming desired depth, pop
+                // left neighbor run from stack and merge into prev_run.
+                let left = *runs.add(stack_len - 1);
+                let merged_len = left.len() + prev_run.len();
+                let merge_start_idx = scan_idx - merged_len;
+                let merge_slice = v.get_unchecked_mut(merge_start_idx..scan_idx);
+                prev_run = logical_merge(merge_slice, scratch, left, prev_run, is_less);
+                stack_len -= 1;
+            }
+
+            // We now know that desired_depths[stack_len - 1] < desired_depth,
+            // maintaining our invariant. This also guarantees we don't overflow
+            // the stack as merge_tree_depth(..) <= 64 and thus we can only have
+            // 64 distinct values on the stack before pushing, plus our initial
+            // dummy run, while our capacity is 66.
+            *runs.add(stack_len) = prev_run;
+            *desired_depths.add(stack_len) = desired_depth;
+            stack_len += 1;
+        }
+
+        // Break before overriding the last run with our dummy run.
+        if scan_idx >= len {
+            break;
+        }
+
+        scan_idx += next_run.len();
+        prev_run = next_run;
+    }
+
+    if !prev_run.sorted() {
+        stable_quicksort(v, scratch, is_less);
+    }
+}
+
+// Nearly-Optimal Mergesorts: Fast, Practical Sorting Methods That Optimally
+// Adapt to Existing Runs by J. Ian Munro and Sebastian Wild.
+//
+// This method forms a binary merge tree, where each internal node corresponds
+// to a splitting point between the adjacent runs that have to be merged. If we
+// visualize our array as the number line from 0 to 1, we want to find the
+// dyadic fraction with smallest denominator that lies between the midpoints of
+// our to-be-merged slices. The exponent in the dyadic fraction indicates the
+// desired depth in the binary merge tree this internal node wishes to have.
+// This does not always correspond to the actual depth due to the inherent
+// imbalance in runs, but we follow it as closely as possible.
+//
+// As an optimization we rescale the number line from [0, 1) to [0, 2^62). Then
+// finding the simplest dyadic fraction between midpoints corresponds to finding
+// the most significant bit difference of the midpoints. We save scale_factor =
+// ceil(2^62 / n) to perform this rescaling using a multiplication, avoiding
+// having to repeatedly do integer divides. This rescaling isn't exact when n is
+// not a power of two since we use integers and not reals, but the result is
+// very close, and in fact when n < 2^30 the resulting tree is equivalent as the
+// approximation errors stay entirely in the lower order bits.
+//
+// Thus for the splitting point between two adjacent slices [a, b) and [b, c)
+// the desired depth of the corresponding merge node is CLZ((a+b)*f ^ (b+c)*f),
+// where CLZ counts the number of leading zeros in an integer and f is our scale
+// factor. Note that we omitted the division by two in the midpoint
+// calculations, as this simply shifts the bits by one position (and thus always
+// adds one to the result), and we only care about the relative depths.
+//
+// Finally, if we try to upper bound x = (a+b)*f giving x = (n-1 + n) * ceil(2^62 / n) then
+//    x < (2^62 / n + 1) * 2n
+//    x < 2^63 + 2n
+// So as long as n < 2^62 we find that x < 2^64, meaning our operations do not
+// overflow.
+#[inline(always)]
+fn merge_tree_scale_factor(n: usize) -> u64 {
+    if usize::BITS > u64::BITS {
+        panic!("Platform not supported");
+    }
+
+    ((1 << 62) + n as u64 - 1) / n as u64
+}
+
+// Note: merge_tree_depth output is < 64 when left < right as f*x and f*y must
+// differ in some bit, and is <= 64 always.
+#[inline(always)]
+fn merge_tree_depth(left: usize, mid: usize, right: usize, scale_factor: u64) -> u8 {
+    let x = left as u64 + mid as u64;
+    let y = mid as u64 + right as u64;
+    ((scale_factor * x) ^ (scale_factor * y)).leading_zeros() as u8
+}
+
+fn sqrt_approx(n: usize) -> usize {
+    // Note that sqrt(n) = n^(1/2), and that 2^log2(n) = n. We combine these
+    // two facts to approximate sqrt(n) as 2^(log2(n) / 2). Because our integer
+    // log floors we want to add 0.5 to compensate for this on average, so our
+    // initial approximation is 2^((1 + floor(log2(n))) / 2).
+    //
+    // We then apply an iteration of Newton's method to improve our
+    // approximation, which for sqrt(n) is a1 = (a0 + n / a0) / 2.
+    //
+    // Finally we note that the exponentiation / division can be done directly
+    // with shifts. We OR with 1 to avoid zero-checks in the integer log.
+    let ilog = (n | 1).ilog2();
+    let shift = (1 + ilog) / 2;
+    ((1 << shift) + (n >> shift)) / 2
+}
+
+// Lazy logical runs as in Glidesort.
+#[inline(always)]
+fn logical_merge<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    left: DriftsortRun,
+    right: DriftsortRun,
+    is_less: &mut F,
+) -> DriftsortRun {
+    // If one or both of the runs are sorted do a physical merge, using
+    // quicksort to sort the unsorted run if present. We also *need* to
+    // physically merge if the combined runs would not fit in the scratch space
+    // anymore (as this would mean we are no longer able to quicksort them).
+    let len = v.len();
+    let can_fit_in_scratch = len <= scratch.len();
+    if !can_fit_in_scratch || left.sorted() || right.sorted() {
+        if !left.sorted() {
+            stable_quicksort(&mut v[..left.len()], scratch, is_less);
+        }
+        if !right.sorted() {
+            stable_quicksort(&mut v[left.len()..], scratch, is_less);
+        }
+        merge(v, scratch, left.len(), is_less);
+
+        DriftsortRun::new_sorted(len)
+    } else {
+        DriftsortRun::new_unsorted(len)
+    }
+}
+
+/// Creates a new logical run.
+///
+/// A logical run can either be sorted or unsorted. If there is a pre-existing
+/// run that clears the `min_good_run_len` threshold it is returned as a sorted
+/// run. If not, the result depends on the value of `eager_sort`. If it is true,
+/// then a sorted run of length `T::SMALL_SORT_THRESHOLD` is returned, and if it
+/// is false an unsorted run of length `min_good_run_len` is returned.
+fn create_run<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    min_good_run_len: usize,
+    eager_sort: bool,
+    is_less: &mut F,
+) -> DriftsortRun {
+    let len = v.len();
+    if len >= min_good_run_len {
+        let (run_len, was_reversed) = find_existing_run(v, is_less);
+
+        // SAFETY: find_existing_run promises to return a valid run_len.
+        unsafe { intrinsics::assume(run_len <= len) };
+
+        if run_len >= min_good_run_len {
+            if was_reversed {
+                v[..run_len].reverse();
+            }
+
+            return DriftsortRun::new_sorted(run_len);
+        }
+    }
+
+    if eager_sort {
+        // We call quicksort with a len that will immediately call small-sort.
+        // By not calling the small-sort directly here it can always be inlined into
+        // the quicksort itself, making the recursive base case faster and is generally
+        // more binary-size efficient.
+        let eager_run_len = cmp::min(T::small_sort_threshold(), len);
+        quicksort(&mut v[..eager_run_len], scratch, 0, None, is_less);
+        DriftsortRun::new_sorted(eager_run_len)
+    } else {
+        DriftsortRun::new_unsorted(cmp::min(min_good_run_len, len))
+    }
+}
+
+fn stable_quicksort<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    is_less: &mut F,
+) {
+    // Limit the number of imbalanced partitions to `2 * floor(log2(len))`.
+    // The binary OR by one is used to eliminate the zero-check in the logarithm.
+    let limit = 2 * (v.len() | 1).ilog2();
+    quicksort(v, scratch, limit, None, is_less);
+}
+
+/// Compactly stores the length of a run, and whether or not it is sorted. This
+/// can always fit in a usize because the maximum slice length is isize::MAX.
+#[derive(Copy, Clone)]
+struct DriftsortRun(usize);
+
+impl DriftsortRun {
+    #[inline(always)]
+    fn new_sorted(length: usize) -> Self {
+        Self((length << 1) | 1)
+    }
+
+    #[inline(always)]
+    fn new_unsorted(length: usize) -> Self {
+        Self(length << 1)
+    }
+
+    #[inline(always)]
+    fn sorted(self) -> bool {
+        self.0 & 1 == 1
+    }
+
+    #[inline(always)]
+    fn len(self) -> usize {
+        self.0 >> 1
+    }
+}
diff --git a/core/src/slice/sort/stable/merge.rs b/core/src/slice/sort/stable/merge.rs
new file mode 100644
index 0000000000000..6739e114b130a
--- /dev/null
+++ b/core/src/slice/sort/stable/merge.rs
@@ -0,0 +1,151 @@
+//! This module contains logic for performing a merge of two sorted sub-slices.
+
+use crate::cmp;
+use crate::mem::MaybeUninit;
+use crate::ptr;
+
+/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `scratch` as
+/// temporary storage, and stores the result into `v[..]`.
+pub fn merge<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    mid: usize,
+    is_less: &mut F,
+) {
+    let len = v.len();
+
+    if mid == 0 || mid >= len || scratch.len() < cmp::min(mid, len - mid) {
+        return;
+    }
+
+    // SAFETY: We checked that the two slices are non-empty and `mid` is in-bounds.
+    // We checked that the buffer `scratch` has enough capacity to hold a copy of
+    // the shorter slice. `merge_up` and `merge_down` are written in such a way that
+    // they uphold the contract described in `MergeState::drop`.
+    unsafe {
+        // The merge process first copies the shorter run into `buf`. Then it traces
+        // the newly copied run and the longer run forwards (or backwards), comparing
+        // their next unconsumed elements and copying the lesser (or greater) one into `v`.
+        //
+        // As soon as the shorter run is fully consumed, the process is done. If the
+        // longer run gets consumed first, then we must copy whatever is left of the
+        // shorter run into the remaining gap in `v`.
+        //
+        // Intermediate state of the process is always tracked by `gap`, which serves
+        // two purposes:
+        //  1. Protects integrity of `v` from panics in `is_less`.
+        //  2. Fills the remaining gap in `v` if the longer run gets consumed first.
+
+        let buf = MaybeUninit::slice_as_mut_ptr(scratch);
+
+        let v_base = v.as_mut_ptr();
+        let v_mid = v_base.add(mid);
+        let v_end = v_base.add(len);
+
+        let left_len = mid;
+        let right_len = len - mid;
+
+        let left_is_shorter = left_len <= right_len;
+        let save_base = if left_is_shorter { v_base } else { v_mid };
+        let save_len = if left_is_shorter { left_len } else { right_len };
+
+        ptr::copy_nonoverlapping(save_base, buf, save_len);
+
+        let mut merge_state = MergeState { start: buf, end: buf.add(save_len), dst: save_base };
+
+        if left_is_shorter {
+            merge_state.merge_up(v_mid, v_end, is_less);
+        } else {
+            merge_state.merge_down(v_base, buf, v_end, is_less);
+        }
+        // Finally, `merge_state` gets dropped. If the shorter run was not fully
+        // consumed, whatever remains of it will now be copied into the hole in `v`.
+    }
+}
+
+// When dropped, copies the range `start..end` into `dst..`.
+struct MergeState<T> {
+    start: *mut T,
+    end: *mut T,
+    dst: *mut T,
+}
+
+impl<T> MergeState<T> {
+    /// # Safety
+    /// The caller MUST guarantee that `self` is initialized in a way where `start -> end` is
+    /// the longer sub-slice and so that `dst` can be written to at least the shorter sub-slice
+    /// length times. In addition `start -> end` and `right -> right_end` MUST be valid to be
+    /// read. This function MUST only be called once.
+    unsafe fn merge_up<F: FnMut(&T, &T) -> bool>(
+        &mut self,
+        mut right: *const T,
+        right_end: *const T,
+        is_less: &mut F,
+    ) {
+        // SAFETY: See function safety comment.
+        unsafe {
+            let left = &mut self.start;
+            let out = &mut self.dst;
+
+            while *left != self.end && right as *const T != right_end {
+                let consume_left = !is_less(&*right, &**left);
+
+                let src = if consume_left { *left } else { right };
+                ptr::copy_nonoverlapping(src, *out, 1);
+
+                *left = left.add(consume_left as usize);
+                right = right.add(!consume_left as usize);
+
+                *out = out.add(1);
+            }
+        }
+    }
+
+    /// # Safety
+    /// The caller MUST guarantee that `self` is initialized in a way where `left_end <- dst` is
+    /// the shorter sub-slice and so that `out` can be written to at least the shorter sub-slice
+    /// length times. In addition `left_end <- dst` and `right_end <- end` MUST be valid to be
+    /// read. This function MUST only be called once.
+    unsafe fn merge_down<F: FnMut(&T, &T) -> bool>(
+        &mut self,
+        left_end: *const T,
+        right_end: *const T,
+        mut out: *mut T,
+        is_less: &mut F,
+    ) {
+        // SAFETY: See function safety comment.
+        unsafe {
+            loop {
+                let left = self.dst.sub(1);
+                let right = self.end.sub(1);
+                out = out.sub(1);
+
+                let consume_left = is_less(&*right, &*left);
+
+                let src = if consume_left { left } else { right };
+                ptr::copy_nonoverlapping(src, out, 1);
+
+                self.dst = left.add(!consume_left as usize);
+                self.end = right.add(consume_left as usize);
+
+                if self.dst as *const T == left_end || self.end as *const T == right_end {
+                    break;
+                }
+            }
+        }
+    }
+}
+
+impl<T> Drop for MergeState<T> {
+    fn drop(&mut self) {
+        // SAFETY: The user of MergeState MUST ensure, that at any point this drop
+        // impl MAY run, for example when the user provided `is_less` panics, that
+        // copying the contiguous region between `start` and `end` to `dst` will
+        // leave the input slice `v` with each original element and all possible
+        // modifications observed.
+        unsafe {
+            let len = self.end.sub_ptr(self.start);
+            ptr::copy_nonoverlapping(self.start, self.dst, len);
+        }
+    }
+}
diff --git a/core/src/slice/sort/stable/mod.rs b/core/src/slice/sort/stable/mod.rs
new file mode 100644
index 0000000000000..18f7b2ac54af5
--- /dev/null
+++ b/core/src/slice/sort/stable/mod.rs
@@ -0,0 +1,116 @@
+//! This module contains the entry points for `slice::sort`.
+
+use crate::cmp;
+use crate::intrinsics;
+use crate::mem::{self, MaybeUninit, SizedTypeProperties};
+
+use crate::slice::sort::shared::smallsort::{
+    insertion_sort_shift_left, StableSmallSortTypeImpl, SMALL_SORT_GENERAL_SCRATCH_LEN,
+};
+
+pub(crate) mod drift;
+pub(crate) mod merge;
+pub(crate) mod quicksort;
+
+/// Stable sort called driftsort by Orson Peters and Lukas Bergdoll.
+/// Design document:
+/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/driftsort_introduction/text.md>
+///
+/// Upholds all safety properties outlined here:
+/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/sort_safety/text.md>
+#[inline(always)]
+pub fn sort<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], is_less: &mut F) {
+    // Arrays of zero-sized types are always all-equal, and thus sorted.
+    if T::IS_ZST {
+        return;
+    }
+
+    // Instrumenting the standard library showed that 90+% of the calls to sort
+    // by rustc are either of size 0 or 1.
+    let len = v.len();
+    if intrinsics::likely(len < 2) {
+        return;
+    }
+
+    // More advanced sorting methods than insertion sort are faster if called in
+    // a hot loop for small inputs, but for general-purpose code the small
+    // binary size of insertion sort is more important. The instruction cache in
+    // modern processors is very valuable, and for a single sort call in general
+    // purpose code any gains from an advanced method are cancelled by i-cache
+    // misses during the sort, and thrashing the i-cache for surrounding code.
+    const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20;
+    if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) {
+        insertion_sort_shift_left(v, 1, is_less);
+        return;
+    }
+
+    driftsort_main::<T, F, BufT>(v, is_less);
+}
+
+/// See [`sort`]
+///
+/// Deliberately don't inline the main sorting routine entrypoint to ensure the
+/// inlined insertion sort i-cache footprint remains minimal.
+#[inline(never)]
+fn driftsort_main<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], is_less: &mut F) {
+    // By allocating n elements of memory we can ensure the entire input can
+    // be sorted using stable quicksort, which allows better performance on
+    // random and low-cardinality distributions. However, we still want to
+    // reduce our memory usage to n / 2 for large inputs. We do this by scaling
+    // our allocation as max(n / 2, min(n, 8MB)), ensuring we scale like n for
+    // small inputs and n / 2 for large inputs, without a sudden drop off. We
+    // also need to ensure our alloc >= MIN_SMALL_SORT_SCRATCH_LEN, as the
+    // small-sort always needs this much memory.
+    const MAX_FULL_ALLOC_BYTES: usize = 8_000_000; // 8MB
+    let max_full_alloc = MAX_FULL_ALLOC_BYTES / mem::size_of::<T>();
+    let len = v.len();
+    let alloc_len =
+        cmp::max(cmp::max(len / 2, cmp::min(len, max_full_alloc)), SMALL_SORT_GENERAL_SCRATCH_LEN);
+
+    // For small inputs 4KiB of stack storage suffices, which allows us to avoid
+    // calling the (de-)allocator. Benchmarks showed this was quite beneficial.
+    let mut stack_buf = AlignedStorage::<T, 4096>::new();
+    let stack_scratch = stack_buf.as_uninit_slice_mut();
+    let mut heap_buf;
+    let scratch = if stack_scratch.len() >= alloc_len {
+        stack_scratch
+    } else {
+        heap_buf = BufT::with_capacity(alloc_len);
+        heap_buf.as_uninit_slice_mut()
+    };
+
+    // For small inputs using quicksort is not yet beneficial, and a single
+    // small-sort or two small-sorts plus a single merge outperforms it, so use
+    // eager mode.
+    let eager_sort = len <= T::small_sort_threshold() * 2;
+    crate::slice::sort::stable::drift::sort(v, scratch, eager_sort, is_less);
+}
+
+#[doc(hidden)]
+/// Abstracts owned memory buffer, so that sort code can live in core where no allocation is
+/// possible. This trait can then be implemented in a place that has access to allocation.
+pub trait BufGuard<T> {
+    /// Creates new buffer that holds at least `capacity` memory.
+    fn with_capacity(capacity: usize) -> Self;
+    /// Returns mutable access to uninitialized memory owned by the buffer.
+    fn as_uninit_slice_mut(&mut self) -> &mut [MaybeUninit<T>];
+}
+
+#[repr(C)]
+struct AlignedStorage<T, const N: usize> {
+    _align: [T; 0],
+    storage: [MaybeUninit<u8>; N],
+}
+
+impl<T, const N: usize> AlignedStorage<T, N> {
+    fn new() -> Self {
+        Self { _align: [], storage: [const { MaybeUninit::uninit() }; N] }
+    }
+
+    fn as_uninit_slice_mut(&mut self) -> &mut [MaybeUninit<T>] {
+        let len = N / mem::size_of::<T>();
+
+        // SAFETY: `_align` ensures we are correctly aligned.
+        unsafe { core::slice::from_raw_parts_mut(self.storage.as_mut_ptr().cast(), len) }
+    }
+}
diff --git a/core/src/slice/sort/stable/quicksort.rs b/core/src/slice/sort/stable/quicksort.rs
new file mode 100644
index 0000000000000..181fe603d2325
--- /dev/null
+++ b/core/src/slice/sort/stable/quicksort.rs
@@ -0,0 +1,257 @@
+//! This module contains a stable quicksort and partition implementation.
+
+use crate::intrinsics;
+use crate::mem::{self, ManuallyDrop, MaybeUninit};
+use crate::ptr;
+
+use crate::slice::sort::shared::pivot::choose_pivot;
+use crate::slice::sort::shared::smallsort::StableSmallSortTypeImpl;
+use crate::slice::sort::shared::FreezeMarker;
+
+/// Sorts `v` recursively using quicksort.
+///
+/// `limit` when initialized with `c*log(v.len())` for some c ensures we do not
+/// overflow the stack or go quadratic.
+#[inline(never)]
+pub fn quicksort<T, F: FnMut(&T, &T) -> bool>(
+    mut v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    mut limit: u32,
+    mut left_ancestor_pivot: Option<&T>,
+    is_less: &mut F,
+) {
+    loop {
+        let len = v.len();
+
+        if len <= T::small_sort_threshold() {
+            T::small_sort(v, scratch, is_less);
+            return;
+        }
+
+        if limit == 0 {
+            // We have had too many bad pivots, switch to O(n log n) fallback
+            // algorithm. In our case that is driftsort in eager mode.
+            crate::slice::sort::stable::drift::sort(v, scratch, true, is_less);
+            return;
+        }
+        limit -= 1;
+
+        let pivot_pos = choose_pivot(v, is_less);
+        // SAFETY: choose_pivot promises to return a valid pivot index.
+        unsafe {
+            intrinsics::assume(pivot_pos < v.len());
+        }
+
+        // SAFETY: We only access the temporary copy for Freeze types, otherwise
+        // self-modifications via `is_less` would not be observed and this would
+        // be unsound. Our temporary copy does not escape this scope.
+        let pivot_copy = unsafe { ManuallyDrop::new(ptr::read(&v[pivot_pos])) };
+        let pivot_ref = (!has_direct_interior_mutability::<T>()).then_some(&*pivot_copy);
+
+        // We choose a pivot, and check if this pivot is equal to our left
+        // ancestor. If true, we do a partition putting equal elements on the
+        // left and do not recurse on it. This gives O(n log k) sorting for k
+        // distinct values, a strategy borrowed from pdqsort. For types with
+        // interior mutability we can't soundly create a temporary copy of the
+        // ancestor pivot, and use left_partition_len == 0 as our method for
+        // detecting when we re-use a pivot, which means we do at most three
+        // partition operations with pivot p instead of the optimal two.
+        let mut perform_equal_partition = false;
+        if let Some(la_pivot) = left_ancestor_pivot {
+            perform_equal_partition = !is_less(la_pivot, &v[pivot_pos]);
+        }
+
+        let mut left_partition_len = 0;
+        if !perform_equal_partition {
+            left_partition_len = stable_partition(v, scratch, pivot_pos, false, is_less);
+            perform_equal_partition = left_partition_len == 0;
+        }
+
+        if perform_equal_partition {
+            let mid_eq = stable_partition(v, scratch, pivot_pos, true, &mut |a, b| !is_less(b, a));
+            v = &mut v[mid_eq..];
+            left_ancestor_pivot = None;
+            continue;
+        }
+
+        // Process left side with the next loop iter, right side with recursion.
+        let (left, right) = v.split_at_mut(left_partition_len);
+        quicksort(right, scratch, limit, pivot_ref, is_less);
+        v = left;
+    }
+}
+
+/// Partitions `v` using pivot `p = v[pivot_pos]` and returns the number of
+/// elements less than `p`. The relative order of elements that compare < p and
+/// those that compare >= p is preserved - it is a stable partition.
+///
+/// If `is_less` is not a strict total order or panics, `scratch.len() < v.len()`,
+/// or `pivot_pos >= v.len()`, the result and `v`'s state is sound but unspecified.
+fn stable_partition<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    scratch: &mut [MaybeUninit<T>],
+    pivot_pos: usize,
+    pivot_goes_left: bool,
+    is_less: &mut F,
+) -> usize {
+    let len = v.len();
+
+    if intrinsics::unlikely(scratch.len() < len || pivot_pos >= len) {
+        core::intrinsics::abort()
+    }
+
+    let v_base = v.as_ptr();
+    let scratch_base = MaybeUninit::slice_as_mut_ptr(scratch);
+
+    // The core idea is to write the values that compare as less-than to the left
+    // side of `scratch`, while the values that compared as greater or equal than
+    // `v[pivot_pos]` go to the right side of `scratch` in reverse. See
+    // PartitionState for details.
+
+    // SAFETY: see individual comments.
+    unsafe {
+        // SAFETY: we made sure the scratch has length >= len and that pivot_pos
+        // is in-bounds. v and scratch are disjoint slices.
+        let pivot = v_base.add(pivot_pos);
+        let mut state = PartitionState::new(v_base, scratch_base, len);
+
+        let mut pivot_in_scratch = ptr::null_mut();
+        let mut loop_end_pos = pivot_pos;
+
+        // SAFETY: this loop is equivalent to calling state.partition_one
+        // exactly len times.
+        loop {
+            // Ideally the outer loop won't be unrolled, to save binary size,
+            // but we do want the inner loop to be unrolled for small types, as
+            // this gave significant performance boosts in benchmarks. Unrolling
+            // through for _ in 0..UNROLL_LEN { .. } instead of manually improves
+            // compile times but has a ~10-20% performance penalty on opt-level=s.
+            if const { mem::size_of::<T>() <= 16 } {
+                const UNROLL_LEN: usize = 4;
+                let unroll_end = v_base.add(loop_end_pos.saturating_sub(UNROLL_LEN - 1));
+                while state.scan < unroll_end {
+                    state.partition_one(is_less(&*state.scan, &*pivot));
+                    state.partition_one(is_less(&*state.scan, &*pivot));
+                    state.partition_one(is_less(&*state.scan, &*pivot));
+                    state.partition_one(is_less(&*state.scan, &*pivot));
+                }
+            }
+
+            let loop_end = v_base.add(loop_end_pos);
+            while state.scan < loop_end {
+                state.partition_one(is_less(&*state.scan, &*pivot));
+            }
+
+            if loop_end_pos == len {
+                break;
+            }
+
+            // We avoid comparing pivot with itself, as this could create deadlocks for
+            // certain comparison operators. We also store its location later for later.
+            pivot_in_scratch = state.partition_one(pivot_goes_left);
+
+            loop_end_pos = len;
+        }
+
+        // `pivot` must be copied into its correct position again, because a
+        // comparison operator might have modified it.
+        if has_direct_interior_mutability::<T>() {
+            ptr::copy_nonoverlapping(pivot, pivot_in_scratch, 1);
+        }
+
+        // SAFETY: partition_one being called exactly len times guarantees that scratch
+        // is initialized with a permuted copy of `v`, and that num_left <= v.len().
+        // Copying scratch[0..num_left] and scratch[num_left..v.len()] back is thus
+        // sound, as the values in scratch will never be read again, meaning our copies
+        // semantically act as moves, permuting `v`.
+
+        // Copy all the elements < p directly from swap to v.
+        let v_base = v.as_mut_ptr();
+        ptr::copy_nonoverlapping(scratch_base, v_base, state.num_left);
+
+        // Copy the elements >= p in reverse order.
+        for i in 0..len - state.num_left {
+            ptr::copy_nonoverlapping(
+                scratch_base.add(len - 1 - i),
+                v_base.add(state.num_left + i),
+                1,
+            );
+        }
+
+        state.num_left
+    }
+}
+
+struct PartitionState<T> {
+    // The start of the scratch auxiliary memory.
+    scratch_base: *mut T,
+    // The current element that is being looked at, scans left to right through slice.
+    scan: *const T,
+    // Counts the number of elements that went to the left side, also works around:
+    // https://github.com/rust-lang/rust/issues/117128
+    num_left: usize,
+    // Reverse scratch output pointer.
+    scratch_rev: *mut T,
+}
+
+impl<T> PartitionState<T> {
+    /// # Safety
+    /// scan and scratch must point to valid disjoint buffers of length len. The
+    /// scan buffer must be initialized.
+    unsafe fn new(scan: *const T, scratch: *mut T, len: usize) -> Self {
+        // SAFETY: See function safety comment.
+        unsafe { Self { scratch_base: scratch, scan, num_left: 0, scratch_rev: scratch.add(len) } }
+    }
+
+    /// Depending on the value of `towards_left` this function will write a value
+    /// to the growing left or right side of the scratch memory. This forms the
+    /// branchless core of the partition.
+    ///
+    /// # Safety
+    /// This function may be called at most `len` times. If it is called exactly
+    /// `len` times the scratch buffer then contains a copy of each element from
+    /// the scan buffer exactly once - a permutation, and num_left <= len.
+    unsafe fn partition_one(&mut self, towards_left: bool) -> *mut T {
+        // SAFETY: see individual comments.
+        unsafe {
+            // SAFETY: in-bounds because this function is called at most len times, and thus
+            // right now is incremented at most len - 1 times. Similarly, num_left < len and
+            // num_right < len, where num_right == i - num_left at the start of the ith
+            // iteration (zero-indexed).
+            self.scratch_rev = self.scratch_rev.sub(1);
+
+            // SAFETY: now we have scratch_rev == base + len - (i + 1). This means
+            // scratch_rev + num_left == base + len - 1 - num_right < base + len.
+            let dst_base = if towards_left { self.scratch_base } else { self.scratch_rev };
+            let dst = dst_base.add(self.num_left);
+            ptr::copy_nonoverlapping(self.scan, dst, 1);
+
+            self.num_left += towards_left as usize;
+            self.scan = self.scan.add(1);
+            dst
+        }
+    }
+}
+
+trait IsFreeze {
+    fn is_freeze() -> bool;
+}
+
+impl<T> IsFreeze for T {
+    default fn is_freeze() -> bool {
+        false
+    }
+}
+impl<T: FreezeMarker> IsFreeze for T {
+    fn is_freeze() -> bool {
+        true
+    }
+}
+
+#[must_use]
+fn has_direct_interior_mutability<T>() -> bool {
+    // If a type has interior mutability it may alter itself during comparison
+    // in a way that must be preserved after the sort operation concludes.
+    // Otherwise a type like Mutex<Option<Box<str>>> could lead to double free.
+    !T::is_freeze()
+}
diff --git a/core/src/slice/sort/unstable/heapsort.rs b/core/src/slice/sort/unstable/heapsort.rs
new file mode 100644
index 0000000000000..559605ef4b6b3
--- /dev/null
+++ b/core/src/slice/sort/unstable/heapsort.rs
@@ -0,0 +1,80 @@
+//! This module contains a branchless heapsort as fallback for unstable quicksort.
+
+use crate::intrinsics;
+use crate::ptr;
+
+/// Sorts `v` using heapsort, which guarantees *O*(*n* \* log(*n*)) worst-case.
+///
+/// Never inline this, it sits the main hot-loop in `recurse` and is meant as unlikely algorithmic
+/// fallback.
+///
+/// SAFETY: The caller has to guarantee that `v.len()` >= 2.
+#[inline(never)]
+pub(crate) unsafe fn heapsort<T, F>(v: &mut [T], is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    // SAFETY: See function safety.
+    unsafe {
+        intrinsics::assume(v.len() >= 2);
+
+        // Build the heap in linear time.
+        for i in (0..v.len() / 2).rev() {
+            sift_down(v, i, is_less);
+        }
+
+        // Pop maximal elements from the heap.
+        for i in (1..v.len()).rev() {
+            v.swap(0, i);
+            sift_down(&mut v[..i], 0, is_less);
+        }
+    }
+}
+
+// This binary heap respects the invariant `parent >= child`.
+//
+// SAFETY: The caller has to guarantee that node < `v.len()`.
+#[inline(never)]
+unsafe fn sift_down<T, F>(v: &mut [T], mut node: usize, is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    // SAFETY: See function safety.
+    unsafe {
+        intrinsics::assume(node < v.len());
+    }
+
+    let len = v.len();
+
+    let v_base = v.as_mut_ptr();
+
+    loop {
+        // Children of `node`.
+        let mut child = 2 * node + 1;
+        if child >= len {
+            break;
+        }
+
+        // SAFETY: The invariants and checks guarantee that both node and child are in-bounds.
+        unsafe {
+            // Choose the greater child.
+            if child + 1 < len {
+                // We need a branch to be sure not to out-of-bounds index,
+                // but it's highly predictable.  The comparison, however,
+                // is better done branchless, especially for primitives.
+                child += is_less(&*v_base.add(child), &*v_base.add(child + 1)) as usize;
+            }
+
+            // Stop if the invariant holds at `node`.
+            if !is_less(&*v_base.add(node), &*v_base.add(child)) {
+                break;
+            }
+
+            // Swap `node` with the greater child, move one step down, and continue sifting. This
+            // could be ptr::swap_nonoverlapping but that adds a significant amount of binary-size.
+            ptr::swap(v_base.add(node), v_base.add(child));
+        }
+
+        node = child;
+    }
+}
diff --git a/core/src/slice/sort/unstable/mod.rs b/core/src/slice/sort/unstable/mod.rs
new file mode 100644
index 0000000000000..692c2d8f7c7ba
--- /dev/null
+++ b/core/src/slice/sort/unstable/mod.rs
@@ -0,0 +1,76 @@
+//! This module contains the entry points for `slice::sort_unstable`.
+
+use crate::intrinsics;
+use crate::mem::SizedTypeProperties;
+
+use crate::slice::sort::shared::find_existing_run;
+use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
+
+pub(crate) mod heapsort;
+pub(crate) mod quicksort;
+
+/// Unstable sort called ipnsort by Lukas Bergdoll.
+/// Design document:
+/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/ipnsort_introduction/text.md>
+///
+/// Upholds all safety properties outlined here:
+/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/sort_safety/text.md>
+#[inline(always)]
+pub fn sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
+    // Arrays of zero-sized types are always all-equal, and thus sorted.
+    if T::IS_ZST {
+        return;
+    }
+
+    // Instrumenting the standard library showed that 90+% of the calls to sort
+    // by rustc are either of size 0 or 1.
+    let len = v.len();
+    if intrinsics::likely(len < 2) {
+        return;
+    }
+
+    // More advanced sorting methods than insertion sort are faster if called in
+    // a hot loop for small inputs, but for general-purpose code the small
+    // binary size of insertion sort is more important. The instruction cache in
+    // modern processors is very valuable, and for a single sort call in general
+    // purpose code any gains from an advanced method are cancelled by i-cache
+    // misses during the sort, and thrashing the i-cache for surrounding code.
+    const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20;
+    if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) {
+        insertion_sort_shift_left(v, 1, is_less);
+        return;
+    }
+
+    ipnsort(v, is_less);
+}
+
+/// See [`sort`]
+///
+/// Deliberately don't inline the main sorting routine entrypoint to ensure the
+/// inlined insertion sort i-cache footprint remains minimal.
+#[inline(never)]
+fn ipnsort<T, F>(v: &mut [T], is_less: &mut F)
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    let len = v.len();
+    let (run_len, was_reversed) = find_existing_run(v, is_less);
+
+    // SAFETY: find_existing_run promises to return a valid run_len.
+    unsafe { intrinsics::assume(run_len <= len) };
+
+    if run_len == len {
+        if was_reversed {
+            v.reverse();
+        }
+
+        // It would be possible to a do in-place merging here for a long existing streak. But that
+        // makes the implementation a lot bigger, users can use `slice::sort` for that use-case.
+        return;
+    }
+
+    // Limit the number of imbalanced partitions to `2 * floor(log2(len))`.
+    // The binary OR by one is used to eliminate the zero-check in the logarithm.
+    let limit = 2 * (len | 1).ilog2();
+    crate::slice::sort::unstable::quicksort::quicksort(v, None, limit, is_less);
+}
diff --git a/core/src/slice/sort/unstable/quicksort.rs b/core/src/slice/sort/unstable/quicksort.rs
new file mode 100644
index 0000000000000..533b5b0eec767
--- /dev/null
+++ b/core/src/slice/sort/unstable/quicksort.rs
@@ -0,0 +1,351 @@
+//! This module contains an unstable quicksort and two partition implementations.
+
+use crate::intrinsics;
+use crate::mem::{self, ManuallyDrop};
+use crate::ptr;
+
+use crate::slice::sort::shared::pivot::choose_pivot;
+use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl;
+
+/// Sorts `v` recursively.
+///
+/// If the slice had a predecessor in the original array, it is specified as `ancestor_pivot`.
+///
+/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
+/// this function will immediately switch to heapsort.
+pub(crate) fn quicksort<'a, T, F>(
+    mut v: &'a mut [T],
+    mut ancestor_pivot: Option<&'a T>,
+    mut limit: u32,
+    is_less: &mut F,
+) where
+    F: FnMut(&T, &T) -> bool,
+{
+    loop {
+        if v.len() <= T::small_sort_threshold() {
+            T::small_sort(v, is_less);
+            return;
+        }
+
+        // If too many bad pivot choices were made, simply fall back to heapsort in order to
+        // guarantee `O(N x log(N))` worst-case.
+        if limit == 0 {
+            // SAFETY: We assume the `small_sort` threshold is at least 1.
+            unsafe {
+                crate::slice::sort::unstable::heapsort::heapsort(v, is_less);
+            }
+            return;
+        }
+
+        limit -= 1;
+
+        // Choose a pivot and try guessing whether the slice is already sorted.
+        let pivot_pos = choose_pivot(v, is_less);
+
+        // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
+        // slice. Partition the slice into elements equal to and elements greater than the pivot.
+        // This case is usually hit when the slice contains many duplicate elements.
+        if let Some(p) = ancestor_pivot {
+            // SAFETY: We assume choose_pivot yields an in-bounds position.
+            if !is_less(p, unsafe { v.get_unchecked(pivot_pos) }) {
+                let num_lt = partition(v, pivot_pos, &mut |a, b| !is_less(b, a));
+
+                // Continue sorting elements greater than the pivot. We know that `num_lt` contains
+                // the pivot. So we can continue after `num_lt`.
+                v = &mut v[(num_lt + 1)..];
+                ancestor_pivot = None;
+                continue;
+            }
+        }
+
+        // Partition the slice.
+        let num_lt = partition(v, pivot_pos, is_less);
+        // SAFETY: partition ensures that `num_lt` will be in-bounds.
+        unsafe { intrinsics::assume(num_lt < v.len()) };
+
+        // Split the slice into `left`, `pivot`, and `right`.
+        let (left, right) = v.split_at_mut(num_lt);
+        let (pivot, right) = right.split_at_mut(1);
+        let pivot = &pivot[0];
+
+        // Recurse into the left side. We have a fixed recursion limit, testing shows no real
+        // benefit for recursing into the shorter side.
+        quicksort(left, ancestor_pivot, limit, is_less);
+
+        // Continue with the right side.
+        v = right;
+        ancestor_pivot = Some(pivot);
+    }
+}
+
+/// Takes the input slice `v` and re-arranges elements such that when the call returns normally
+/// all elements that compare true for `is_less(elem, pivot)` where `pivot == v[pivot_pos]` are
+/// on the left side of `v` followed by the other elements, notionally considered greater or
+/// equal to `pivot`.
+///
+/// Returns the number of elements that are compared true for `is_less(elem, pivot)`.
+///
+/// If `is_less` does not implement a total order the resulting order and return value are
+/// unspecified. All original elements will remain in `v` and any possible modifications via
+/// interior mutability will be observable. Same is true if `is_less` panics or `v.len()`
+/// exceeds `scratch.len()`.
+pub(crate) fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    let len = v.len();
+
+    // Allows for panic-free code-gen by proving this property to the compiler.
+    if len == 0 {
+        return 0;
+    }
+
+    // Allows for panic-free code-gen by proving this property to the compiler.
+    if pivot >= len {
+        intrinsics::abort();
+    }
+
+    // Place the pivot at the beginning of slice.
+    v.swap(0, pivot);
+    let (pivot, v_without_pivot) = v.split_at_mut(1);
+
+    // Assuming that Rust generates noalias LLVM IR we can be sure that a partition function
+    // signature of the form `(v: &mut [T], pivot: &T)` guarantees that pivot and v can't alias.
+    // Having this guarantee is crucial for optimizations. It's possible to copy the pivot value
+    // into a stack value, but this creates issues for types with interior mutability mandating
+    // a drop guard.
+    let pivot = &mut pivot[0];
+
+    // This construct is used to limit the LLVM IR generated, which saves large amounts of
+    // compile-time by only instantiating the code that is needed. Idea by Frank Steffahn.
+    let num_lt = (const { inst_partition::<T, F>() })(v_without_pivot, pivot, is_less);
+
+    // Place the pivot between the two partitions.
+    v.swap(0, num_lt);
+
+    num_lt
+}
+
+const fn inst_partition<T, F: FnMut(&T, &T) -> bool>() -> fn(&mut [T], &T, &mut F) -> usize {
+    const MAX_BRANCHLESS_PARTITION_SIZE: usize = 96;
+    if mem::size_of::<T>() <= MAX_BRANCHLESS_PARTITION_SIZE {
+        // Specialize for types that are relatively cheap to copy, where branchless optimizations
+        // have large leverage e.g. `u64` and `String`.
+        partition_lomuto_branchless_cyclic::<T, F>
+    } else {
+        partition_hoare_branchy_cyclic::<T, F>
+    }
+}
+
+/// See [`partition`].
+fn partition_hoare_branchy_cyclic<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    let len = v.len();
+
+    if len == 0 {
+        return 0;
+    }
+
+    // Optimized for large types that are expensive to move. Not optimized for integers. Optimized
+    // for small code-gen, assuming that is_less is an expensive operation that generates
+    // substantial amounts of code or a call. And that copying elements will likely be a call to
+    // memcpy. Using 2 `ptr::copy_nonoverlapping` has the chance to be faster than
+    // `ptr::swap_nonoverlapping` because `memcpy` can use wide SIMD based on runtime feature
+    // detection. Benchmarks support this analysis.
+
+    let mut gap_opt: Option<GapGuard<T>> = None;
+
+    // SAFETY: The left-to-right scanning loop performs a bounds check, where we know that `left >=
+    // v_base && left < right && right <= v_base.add(len)`. The right-to-left scanning loop performs
+    // a bounds check ensuring that `right` is in-bounds. We checked that `len` is more than zero,
+    // which means that unconditional `right = right.sub(1)` is safe to do. The exit check makes
+    // sure that `left` and `right` never alias, making `ptr::copy_nonoverlapping` safe. The
+    // drop-guard `gap` ensures that should `is_less` panic we always overwrite the duplicate in the
+    // input. `gap.pos` stores the previous value of `right` and starts at `right` and so it too is
+    // in-bounds. We never pass the saved `gap.value` to `is_less` while it is inside the `GapGuard`
+    // thus any changes via interior mutability will be observed.
+    unsafe {
+        let v_base = v.as_mut_ptr();
+
+        let mut left = v_base;
+        let mut right = v_base.add(len);
+
+        loop {
+            // Find the first element greater than the pivot.
+            while left < right && is_less(&*left, pivot) {
+                left = left.add(1);
+            }
+
+            // Find the last element equal to the pivot.
+            loop {
+                right = right.sub(1);
+                if left >= right || is_less(&*right, pivot) {
+                    break;
+                }
+            }
+
+            if left >= right {
+                break;
+            }
+
+            // Swap the found pair of out-of-order elements via cyclic permutation.
+            let is_first_swap_pair = gap_opt.is_none();
+
+            if is_first_swap_pair {
+                gap_opt = Some(GapGuard { pos: right, value: ManuallyDrop::new(ptr::read(left)) });
+            }
+
+            let gap = gap_opt.as_mut().unwrap_unchecked();
+
+            // Single place where we instantiate ptr::copy_nonoverlapping in the partition.
+            if !is_first_swap_pair {
+                ptr::copy_nonoverlapping(left, gap.pos, 1);
+            }
+            gap.pos = right;
+            ptr::copy_nonoverlapping(right, left, 1);
+
+            left = left.add(1);
+        }
+
+        left.sub_ptr(v_base)
+
+        // `gap_opt` goes out of scope and overwrites the last wrong-side element on the right side
+        // with the first wrong-side element of the left side that was initially overwritten by the
+        // first wrong-side element on the right side element.
+    }
+}
+
+struct PartitionState<T> {
+    // The current element that is being looked at, scans left to right through slice.
+    right: *mut T,
+    // Counts the number of elements that compared less-than, also works around:
+    // https://github.com/rust-lang/rust/issues/117128
+    num_lt: usize,
+    // Gap guard that tracks the temporary duplicate in the input.
+    gap: GapGuardRaw<T>,
+}
+
+fn partition_lomuto_branchless_cyclic<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
+where
+    F: FnMut(&T, &T) -> bool,
+{
+    // Novel partition implementation by Lukas Bergdoll and Orson Peters. Branchless Lomuto
+    // partition paired with a cyclic permutation.
+    // https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md
+
+    let len = v.len();
+    let v_base = v.as_mut_ptr();
+
+    if len == 0 {
+        return 0;
+    }
+
+    // SAFETY: We checked that `len` is more than zero, which means that reading `v_base` is safe to
+    // do. From there we have a bounded loop where `v_base.add(i)` is guaranteed in-bounds. `v` and
+    // `pivot` can't alias because of type system rules. The drop-guard `gap` ensures that should
+    // `is_less` panic we always overwrite the duplicate in the input. `gap.pos` stores the previous
+    // value of `right` and starts at `v_base` and so it too is in-bounds. Given `UNROLL_LEN == 2`
+    // after the main loop we either have A) the last element in `v` that has not yet been processed
+    // because `len % 2 != 0`, or B) all elements have been processed except the gap value that was
+    // saved at the beginning with `ptr::read(v_base)`. In the case A) the loop will iterate twice,
+    // first performing loop_body to take care of the last element that didn't fit into the unroll.
+    // After that the behavior is the same as for B) where we use the saved value as `right` to
+    // overwrite the duplicate. If this very last call to `is_less` panics the saved value will be
+    // copied back including all possible changes via interior mutability. If `is_less` does not
+    // panic and the code continues we overwrite the duplicate and do `right = right.add(1)`, this
+    // is safe to do with `&mut *gap.value` because `T` is the same as `[T; 1]` and generating a
+    // pointer one past the allocation is safe.
+    unsafe {
+        let mut loop_body = |state: &mut PartitionState<T>| {
+            let right_is_lt = is_less(&*state.right, pivot);
+            let left = v_base.add(state.num_lt);
+
+            ptr::copy(left, state.gap.pos, 1);
+            ptr::copy_nonoverlapping(state.right, left, 1);
+
+            state.gap.pos = state.right;
+            state.num_lt += right_is_lt as usize;
+
+            state.right = state.right.add(1);
+        };
+
+        // Ideally we could just use GapGuard in PartitionState, but the reference that is
+        // materialized with `&mut state` when calling `loop_body` would create a mutable reference
+        // to the parent struct that contains the gap value, invalidating the reference pointer
+        // created from a reference to the gap value in the cleanup loop. This is only an issue
+        // under Stacked Borrows, Tree Borrows accepts the intuitive code using GapGuard as valid.
+        let mut gap_value = ManuallyDrop::new(ptr::read(v_base));
+
+        let mut state = PartitionState {
+            num_lt: 0,
+            right: v_base.add(1),
+
+            gap: GapGuardRaw { pos: v_base, value: &mut *gap_value },
+        };
+
+        // Manual unrolling that works well on x86, Arm and with opt-level=s without murdering
+        // compile-times. Leaving this to the compiler yields ok to bad results.
+        let unroll_len = const { if mem::size_of::<T>() <= 16 { 2 } else { 1 } };
+
+        let unroll_end = v_base.add(len - (unroll_len - 1));
+        while state.right < unroll_end {
+            if unroll_len == 2 {
+                loop_body(&mut state);
+                loop_body(&mut state);
+            } else {
+                loop_body(&mut state);
+            }
+        }
+
+        // Single instantiate `loop_body` for both the unroll cleanup and cyclic permutation
+        // cleanup. Optimizes binary-size and compile-time.
+        let end = v_base.add(len);
+        loop {
+            let is_done = state.right == end;
+            state.right = if is_done { state.gap.value } else { state.right };
+
+            loop_body(&mut state);
+
+            if is_done {
+                mem::forget(state.gap);
+                break;
+            }
+        }
+
+        state.num_lt
+    }
+}
+
+struct GapGuard<T> {
+    pos: *mut T,
+    value: ManuallyDrop<T>,
+}
+
+impl<T> Drop for GapGuard<T> {
+    fn drop(&mut self) {
+        // SAFETY: `self` MUST be constructed in a way that makes copying the gap value into
+        // `self.pos` sound.
+        unsafe {
+            ptr::copy_nonoverlapping(&*self.value, self.pos, 1);
+        }
+    }
+}
+
+/// Ideally this wouldn't be needed and we could just use the regular GapGuard.
+/// See comment in [`partition_lomuto_branchless_cyclic`].
+struct GapGuardRaw<T> {
+    pos: *mut T,
+    value: *mut T,
+}
+
+impl<T> Drop for GapGuardRaw<T> {
+    fn drop(&mut self) {
+        // SAFETY: `self` MUST be constructed in a way that makes copying the gap value into
+        // `self.pos` sound.
+        unsafe {
+            ptr::copy_nonoverlapping(self.value, self.pos, 1);
+        }
+    }
+}
diff --git a/core/src/str/converts.rs b/core/src/str/converts.rs
index b6ffb0a608d05..397759bd5cae7 100644
--- a/core/src/str/converts.rs
+++ b/core/src/str/converts.rs
@@ -222,7 +222,7 @@ pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
 #[rustc_const_unstable(feature = "str_from_raw_parts", issue = "119206")]
 pub const unsafe fn from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str {
     // SAFETY: the caller must uphold the safety contract for `from_raw_parts`.
-    unsafe { &*ptr::from_raw_parts(ptr.cast(), len) }
+    unsafe { &*ptr::from_raw_parts(ptr, len) }
 }
 
 /// Creates an `&mut str` from a pointer and a length.
@@ -241,5 +241,5 @@ pub const unsafe fn from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str {
 #[rustc_const_unstable(feature = "const_str_from_raw_parts_mut", issue = "119206")]
 pub const unsafe fn from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut str {
     // SAFETY: the caller must uphold the safety contract for `from_raw_parts_mut`.
-    unsafe { &mut *ptr::from_raw_parts_mut(ptr.cast(), len) }
+    unsafe { &mut *ptr::from_raw_parts_mut(ptr, len) }
 }
diff --git a/core/src/str/count.rs b/core/src/str/count.rs
index 28567a7e753aa..b5d7aaf05d4bd 100644
--- a/core/src/str/count.rs
+++ b/core/src/str/count.rs
@@ -17,6 +17,7 @@
 //! Note: Because the term "leading byte" can sometimes be ambiguous (for
 //! example, it could also refer to the first byte of a slice), we'll often use
 //! the term "non-continuation byte" to refer to these bytes in the code.
+
 use core::intrinsics::unlikely;
 
 const USIZE_SIZE: usize = core::mem::size_of::<usize>();
@@ -24,7 +25,7 @@ const UNROLL_INNER: usize = 4;
 
 #[inline]
 pub(super) fn count_chars(s: &str) -> usize {
-    if s.len() < USIZE_SIZE * UNROLL_INNER {
+    if cfg!(feature = "optimize_for_size") || s.len() < USIZE_SIZE * UNROLL_INNER {
         // Avoid entering the optimized implementation for strings where the
         // difference is not likely to matter, or where it might even be slower.
         // That said, a ton of thought was not spent on the particular threshold
diff --git a/core/src/str/iter.rs b/core/src/str/iter.rs
index d61f04102e5e5..19627f28e64f8 100644
--- a/core/src/str/iter.rs
+++ b/core/src/str/iter.rs
@@ -1274,10 +1274,8 @@ pub struct SplitWhitespace<'a> {
 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
 #[derive(Clone, Debug)]
 pub struct SplitAsciiWhitespace<'a> {
-    pub(super) inner: Map<
-        Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty<'a>>,
-        UnsafeBytesToStr<'a>,
-    >,
+    pub(super) inner:
+        Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>,
 }
 
 /// An iterator over the substrings of a string,
diff --git a/core/src/str/mod.rs b/core/src/str/mod.rs
index edda4d1b68703..683109380439c 100644
--- a/core/src/str/mod.rs
+++ b/core/src/str/mod.rs
@@ -732,7 +732,7 @@ impl str {
     /// ```
     #[inline]
     #[must_use]
-    #[stable(feature = "split_at_checked", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "split_at_checked", since = "1.80.0")]
     pub fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> {
         // is_char_boundary checks that the index is in [0, .len()]
         if self.is_char_boundary(mid) {
@@ -772,7 +772,7 @@ impl str {
     /// ```
     #[inline]
     #[must_use]
-    #[stable(feature = "split_at_checked", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "split_at_checked", since = "1.80.0")]
     pub fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> {
         // is_char_boundary checks that the index is in [0, .len()]
         if self.is_char_boundary(mid) {
@@ -983,7 +983,7 @@ impl str {
     #[cfg_attr(not(test), rustc_diagnostic_item = "str_split_whitespace")]
     #[inline]
     pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
-        SplitWhitespace { inner: self.split(char::is_whitespace).filter(|s| !s.is_empty()) }
+        SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
     }
 
     /// Splits a string slice by ASCII whitespace.
@@ -1032,13 +1032,8 @@ impl str {
     #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
     #[inline]
     pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
-        let inner = self
-            .as_bytes()
-            .split(u8::is_ascii_whitespace)
-            .filter(|s| !s.is_empty())
-            // SAFETY: the byte slice came from a string and was only split
-            // along character boundaries, so the resulting slices are strings.
-            .map(|bytes| unsafe { from_utf8_unchecked(bytes) });
+        let inner =
+            self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
         SplitAsciiWhitespace { inner }
     }
 
@@ -1090,11 +1085,7 @@ impl str {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn lines(&self) -> Lines<'_> {
-        Lines(self.split_inclusive('\n').map(|line| {
-            let Some(line) = line.strip_suffix('\n') else { return line };
-            let Some(line) = line.strip_suffix('\r') else { return line };
-            line
-        }))
+        Lines(self.split_inclusive('\n').map(LinesMap))
     }
 
     /// An iterator over the lines of a string.
@@ -2546,8 +2537,8 @@ impl str {
     /// ```
     #[must_use = "this returns the trimmed string as a new slice, \
                   without modifying the original"]
-    #[stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
+    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
     #[inline]
     pub const fn trim_ascii_start(&self) -> &str {
         // SAFETY: Removing ASCII characters from a `&str` does not invalidate
@@ -2571,8 +2562,8 @@ impl str {
     /// ```
     #[must_use = "this returns the trimmed string as a new slice, \
                   without modifying the original"]
-    #[stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
+    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
     #[inline]
     pub const fn trim_ascii_end(&self) -> &str {
         // SAFETY: Removing ASCII characters from a `&str` does not invalidate
@@ -2597,8 +2588,8 @@ impl str {
     /// ```
     #[must_use = "this returns the trimmed string as a new slice, \
                   without modifying the original"]
-    #[stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
-    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
+    #[rustc_const_stable(feature = "byte_slice_trim_ascii", since = "1.80.0")]
     #[inline]
     pub const fn trim_ascii(&self) -> &str {
         // SAFETY: Removing ASCII characters from a `&str` does not invalidate
@@ -2645,19 +2636,14 @@ impl str {
     #[stable(feature = "str_escape", since = "1.34.0")]
     pub fn escape_debug(&self) -> EscapeDebug<'_> {
         let mut chars = self.chars();
-        let first = chars
-            .next()
-            .map(|first| first.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL))
-            .into_iter()
-            .flatten();
-        let inner = first.chain(chars.flat_map(|c| {
-            c.escape_debug_ext(EscapeDebugExtArgs {
-                escape_grapheme_extended: false,
-                escape_single_quote: true,
-                escape_double_quote: true,
-            })
-        }));
-        EscapeDebug { inner }
+        EscapeDebug {
+            inner: chars
+                .next()
+                .map(|first| first.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL))
+                .into_iter()
+                .flatten()
+                .chain(chars.flat_map(CharEscapeDebugContinue)),
+        }
     }
 
     /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
@@ -2695,7 +2681,7 @@ impl str {
                   without modifying the original"]
     #[stable(feature = "str_escape", since = "1.34.0")]
     pub fn escape_default(&self) -> EscapeDefault<'_> {
-        EscapeDefault { inner: self.chars().flat_map(char::escape_default) }
+        EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
     }
 
     /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
@@ -2733,7 +2719,7 @@ impl str {
                   without modifying the original"]
     #[stable(feature = "str_escape", since = "1.34.0")]
     pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
-        EscapeUnicode { inner: self.chars().flat_map(char::escape_unicode) }
+        EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
     }
 }
 
@@ -2764,15 +2750,59 @@ impl Default for &mut str {
     }
 }
 
-type LinesMap = impl (Fn(&str) -> &str) + Copy;
-type CharEscapeDebugContinue = impl (FnMut(char) -> char::EscapeDebug) + Copy;
-type CharEscapeUnicode = impl (Fn(char) -> char::EscapeUnicode) + Copy;
-type CharEscapeDefault = impl (Fn(char) -> char::EscapeDefault) + Copy;
-type IsWhitespace = impl (Fn(char) -> bool) + Copy;
-type IsAsciiWhitespace = impl (Fn(&u8) -> bool) + Copy;
-type IsNotEmpty = impl (Fn(&&str) -> bool) + Copy;
-type BytesIsNotEmpty<'a> = impl (FnMut(&&'a [u8]) -> bool) + Copy;
-type UnsafeBytesToStr<'a> = impl (FnMut(&'a [u8]) -> &'a str) + Copy;
+impl_fn_for_zst! {
+    /// A nameable, cloneable fn type
+    #[derive(Clone)]
+    struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str {
+        let Some(line) = line.strip_suffix('\n') else { return line };
+        let Some(line) = line.strip_suffix('\r') else { return line };
+        line
+    };
+
+    #[derive(Clone)]
+    struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
+        c.escape_debug_ext(EscapeDebugExtArgs {
+            escape_grapheme_extended: false,
+            escape_single_quote: true,
+            escape_double_quote: true
+        })
+    };
+
+    #[derive(Clone)]
+    struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
+        c.escape_unicode()
+    };
+    #[derive(Clone)]
+    struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
+        c.escape_default()
+    };
+
+    #[derive(Clone)]
+    struct IsWhitespace impl Fn = |c: char| -> bool {
+        c.is_whitespace()
+    };
+
+    #[derive(Clone)]
+    struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
+        byte.is_ascii_whitespace()
+    };
+
+    #[derive(Clone)]
+    struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
+        !s.is_empty()
+    };
+
+    #[derive(Clone)]
+    struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
+        !s.is_empty()
+    };
+
+    #[derive(Clone)]
+    struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
+        // SAFETY: not safe
+        unsafe { from_utf8_unchecked(bytes) }
+    };
+}
 
 // This is required to make `impl From<&str> for Box<dyn Error>` and `impl<E> From<E> for Box<dyn Error>` not overlap.
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/core/src/str/pattern.rs b/core/src/str/pattern.rs
index cc66da25795dd..8988229be2e57 100644
--- a/core/src/str/pattern.rs
+++ b/core/src/str/pattern.rs
@@ -342,7 +342,7 @@ pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
 ///
 /// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
 /// the pattern `"aa"` in the haystack `"aaa"` matches as either
-/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
+/// `"[aa]a"` or `"a[aa]"`, depending on which side it is searched.
 pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 
 /////////////////////////////////////////////////////////////////////////////
diff --git a/core/src/str/traits.rs b/core/src/str/traits.rs
index ba2d6f644962e..3de5546c4d4e3 100644
--- a/core/src/str/traits.rs
+++ b/core/src/str/traits.rs
@@ -4,6 +4,7 @@ use crate::cmp::Ordering;
 use crate::intrinsics::unchecked_sub;
 use crate::ops;
 use crate::ptr;
+use crate::range;
 use crate::slice::SliceIndex;
 use crate::ub_checks::assert_unsafe_precondition;
 
@@ -261,6 +262,108 @@ unsafe impl SliceIndex<str> for ops::Range<usize> {
     }
 }
 
+#[unstable(feature = "new_range_api", issue = "125687")]
+unsafe impl SliceIndex<str> for range::Range<usize> {
+    type Output = str;
+    #[inline]
+    fn get(self, slice: &str) -> Option<&Self::Output> {
+        if self.start <= self.end
+            && slice.is_char_boundary(self.start)
+            && slice.is_char_boundary(self.end)
+        {
+            // SAFETY: just checked that `start` and `end` are on a char boundary,
+            // and we are passing in a safe reference, so the return value will also be one.
+            // We also checked char boundaries, so this is valid UTF-8.
+            Some(unsafe { &*self.get_unchecked(slice) })
+        } else {
+            None
+        }
+    }
+    #[inline]
+    fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
+        if self.start <= self.end
+            && slice.is_char_boundary(self.start)
+            && slice.is_char_boundary(self.end)
+        {
+            // SAFETY: just checked that `start` and `end` are on a char boundary.
+            // We know the pointer is unique because we got it from `slice`.
+            Some(unsafe { &mut *self.get_unchecked_mut(slice) })
+        } else {
+            None
+        }
+    }
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
+        let slice = slice as *const [u8];
+
+        assert_unsafe_precondition!(
+            // We'd like to check that the bounds are on char boundaries,
+            // but there's not really a way to do so without reading
+            // behind the pointer, which has aliasing implications.
+            // It's also not possible to move this check up to
+            // `str::get_unchecked` without adding a special function
+            // to `SliceIndex` just for this.
+            check_library_ub,
+            "str::get_unchecked requires that the range is within the string slice",
+            (
+                start: usize = self.start,
+                end: usize = self.end,
+                len: usize = slice.len()
+            ) => end >= start && end <= len,
+        );
+
+        // SAFETY: the caller guarantees that `self` is in bounds of `slice`
+        // which satisfies all the conditions for `add`.
+        unsafe {
+            let new_len = unchecked_sub(self.end, self.start);
+            ptr::slice_from_raw_parts(slice.as_ptr().add(self.start), new_len) as *const str
+        }
+    }
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
+        let slice = slice as *mut [u8];
+
+        assert_unsafe_precondition!(
+            check_library_ub,
+            "str::get_unchecked_mut requires that the range is within the string slice",
+            (
+                start: usize = self.start,
+                end: usize = self.end,
+                len: usize = slice.len()
+            ) => end >= start && end <= len,
+        );
+
+        // SAFETY: see comments for `get_unchecked`.
+        unsafe {
+            let new_len = unchecked_sub(self.end, self.start);
+            ptr::slice_from_raw_parts_mut(slice.as_mut_ptr().add(self.start), new_len) as *mut str
+        }
+    }
+    #[inline]
+    fn index(self, slice: &str) -> &Self::Output {
+        let (start, end) = (self.start, self.end);
+        match self.get(slice) {
+            Some(s) => s,
+            None => super::slice_error_fail(slice, start, end),
+        }
+    }
+    #[inline]
+    fn index_mut(self, slice: &mut str) -> &mut Self::Output {
+        // is_char_boundary checks that the index is in [0, .len()]
+        // cannot reuse `get` as above, because of NLL trouble
+        if self.start <= self.end
+            && slice.is_char_boundary(self.start)
+            && slice.is_char_boundary(self.end)
+        {
+            // SAFETY: just checked that `start` and `end` are on a char boundary,
+            // and we are passing in a safe reference, so the return value will also be one.
+            unsafe { &mut *self.get_unchecked_mut(slice) }
+        } else {
+            super::slice_error_fail(slice, self.start, self.end)
+        }
+    }
+}
+
 /// Implements substring slicing for arbitrary bounds.
 ///
 /// Returns a slice of the given string bounded by the byte indices
@@ -453,6 +556,61 @@ unsafe impl SliceIndex<str> for ops::RangeFrom<usize> {
     }
 }
 
+#[unstable(feature = "new_range_api", issue = "125687")]
+unsafe impl SliceIndex<str> for range::RangeFrom<usize> {
+    type Output = str;
+    #[inline]
+    fn get(self, slice: &str) -> Option<&Self::Output> {
+        if slice.is_char_boundary(self.start) {
+            // SAFETY: just checked that `start` is on a char boundary,
+            // and we are passing in a safe reference, so the return value will also be one.
+            Some(unsafe { &*self.get_unchecked(slice) })
+        } else {
+            None
+        }
+    }
+    #[inline]
+    fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
+        if slice.is_char_boundary(self.start) {
+            // SAFETY: just checked that `start` is on a char boundary,
+            // and we are passing in a safe reference, so the return value will also be one.
+            Some(unsafe { &mut *self.get_unchecked_mut(slice) })
+        } else {
+            None
+        }
+    }
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
+        let len = (slice as *const [u8]).len();
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked`.
+        unsafe { (self.start..len).get_unchecked(slice) }
+    }
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
+        let len = (slice as *mut [u8]).len();
+        // SAFETY: the caller has to uphold the safety contract for `get_unchecked_mut`.
+        unsafe { (self.start..len).get_unchecked_mut(slice) }
+    }
+    #[inline]
+    fn index(self, slice: &str) -> &Self::Output {
+        let (start, end) = (self.start, slice.len());
+        match self.get(slice) {
+            Some(s) => s,
+            None => super::slice_error_fail(slice, start, end),
+        }
+    }
+    #[inline]
+    fn index_mut(self, slice: &mut str) -> &mut Self::Output {
+        if slice.is_char_boundary(self.start) {
+            // SAFETY: just checked that `start` is on a char boundary,
+            // and we are passing in a safe reference, so the return value will also be one.
+            unsafe { &mut *self.get_unchecked_mut(slice) }
+        } else {
+            super::slice_error_fail(slice, self.start, slice.len())
+        }
+    }
+}
+
 /// Implements substring slicing with syntax `&self[begin ..= end]` or `&mut
 /// self[begin ..= end]`.
 ///
@@ -507,6 +665,43 @@ unsafe impl SliceIndex<str> for ops::RangeInclusive<usize> {
     }
 }
 
+#[unstable(feature = "new_range_api", issue = "125687")]
+unsafe impl SliceIndex<str> for range::RangeInclusive<usize> {
+    type Output = str;
+    #[inline]
+    fn get(self, slice: &str) -> Option<&Self::Output> {
+        if self.end == usize::MAX { None } else { self.into_slice_range().get(slice) }
+    }
+    #[inline]
+    fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
+        if self.end == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
+    }
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
+        // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
+        unsafe { self.into_slice_range().get_unchecked(slice) }
+    }
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
+        // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
+        unsafe { self.into_slice_range().get_unchecked_mut(slice) }
+    }
+    #[inline]
+    fn index(self, slice: &str) -> &Self::Output {
+        if self.end == usize::MAX {
+            str_index_overflow_fail();
+        }
+        self.into_slice_range().index(slice)
+    }
+    #[inline]
+    fn index_mut(self, slice: &mut str) -> &mut Self::Output {
+        if self.end == usize::MAX {
+            str_index_overflow_fail();
+        }
+        self.into_slice_range().index_mut(slice)
+    }
+}
+
 /// Implements substring slicing with syntax `&self[..= end]` or `&mut
 /// self[..= end]`.
 ///
diff --git a/core/src/sync/atomic.rs b/core/src/sync/atomic.rs
index 232ec589093d3..efc07f38f68e0 100644
--- a/core/src/sync/atomic.rs
+++ b/core/src/sync/atomic.rs
@@ -183,7 +183,7 @@
 //!
 //!     let spinlock_clone = Arc::clone(&spinlock);
 //!
-//!     let thread = thread::spawn(move|| {
+//!     let thread = thread::spawn(move || {
 //!         spinlock_clone.store(0, Ordering::Release);
 //!     });
 //!
@@ -443,8 +443,8 @@ impl AtomicBool {
     ///
     /// # Safety
     ///
-    /// * `ptr` must be aligned to `align_of::<AtomicBool>()` (note that on some platforms this can
-    ///   be bigger than `align_of::<bool>()`).
+    /// * `ptr` must be aligned to `align_of::<AtomicBool>()` (note that this is always true, since
+    ///   `align_of::<AtomicBool>() == 1`).
     /// * `ptr` must be [valid] for both reads and writes for the whole lifetime `'a`.
     /// * You must adhere to the [Memory model for atomic accesses]. In particular, it is not
     ///   allowed to mix atomic and non-atomic accesses, or atomic accesses of different sizes,
@@ -1069,7 +1069,6 @@ impl AtomicBool {
     /// # Examples
     ///
     /// ```
-    /// #![feature(atomic_bool_fetch_not)]
     /// use std::sync::atomic::{AtomicBool, Ordering};
     ///
     /// let foo = AtomicBool::new(true);
@@ -1081,7 +1080,7 @@ impl AtomicBool {
     /// assert_eq!(foo.load(Ordering::SeqCst), true);
     /// ```
     #[inline]
-    #[unstable(feature = "atomic_bool_fetch_not", issue = "98485")]
+    #[stable(feature = "atomic_bool_fetch_not", since = "CURRENT_RUSTC_VERSION")]
     #[cfg(target_has_atomic = "8")]
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     pub fn fetch_not(&self, order: Ordering) -> bool {
@@ -1296,7 +1295,6 @@ impl<T> AtomicPtr<T> {
     #[cfg(target_has_atomic_equal_alignment = "ptr")]
     #[unstable(feature = "atomic_from_mut", issue = "76314")]
     pub fn from_mut(v: &mut *mut T) -> &mut Self {
-        use crate::mem::align_of;
         let [] = [(); align_of::<AtomicPtr<()>>() - align_of::<*mut ()>()];
         // SAFETY:
         //  - the mutable reference guarantees unique ownership.
@@ -2092,10 +2090,10 @@ impl<T> From<*mut T> for AtomicPtr<T> {
 }
 
 #[allow(unused_macros)] // This macro ends up being unused on some architectures.
-macro_rules! if_not_8_bit {
-    (u8, $($tt:tt)*) => { "" };
-    (i8, $($tt:tt)*) => { "" };
-    ($_:ident, $($tt:tt)*) => { $($tt)* };
+macro_rules! if_8_bit {
+    (u8, $( yes = [$($yes:tt)*], )? $( no = [$($no:tt)*], )? ) => { concat!("", $($($yes)*)?) };
+    (i8, $( yes = [$($yes:tt)*], )? $( no = [$($no:tt)*], )? ) => { concat!("", $($($yes)*)?) };
+    ($_:ident, $( yes = [$($yes:tt)*], )? $( no = [$($no:tt)*], )? ) => { concat!("", $($($no)*)?) };
 }
 
 #[cfg(target_has_atomic_load_store)]
@@ -2117,18 +2115,24 @@ macro_rules! atomic_int {
      $int_type:ident $atomic_type:ident) => {
         /// An integer type which can be safely shared between threads.
         ///
-        /// This type has the same size and bit validity as the underlying
-        /// integer type, [`
+        /// This type has the same
+        #[doc = if_8_bit!(
+            $int_type,
+            yes = ["size, alignment, and bit validity"],
+            no = ["size and bit validity"],
+        )]
+        /// as the underlying integer type, [`
         #[doc = $s_int_type]
         /// `].
-        #[doc = if_not_8_bit! {
+        #[doc = if_8_bit! {
             $int_type,
-            concat!(
+            no = [
                 "However, the alignment of this type is always equal to its ",
                 "size, even on targets where [`", $s_int_type, "`] has a ",
                 "lesser alignment."
-            )
+            ],
         }]
+        ///
         /// For more about the differences between atomic types and
         /// non-atomic types as well as information about the portability of
         /// this type, please see the [module-level documentation].
@@ -2221,9 +2225,19 @@ macro_rules! atomic_int {
             ///
             /// # Safety
             ///
-            #[doc = concat!(" * `ptr` must be aligned to \
-                `align_of::<", stringify!($atomic_type), ">()` (note that on some platforms this \
-                can be bigger than `align_of::<", stringify!($int_type), ">()`).")]
+            /// * `ptr` must be aligned to
+            #[doc = concat!("  `align_of::<", stringify!($atomic_type), ">()`")]
+            #[doc = if_8_bit!{
+                $int_type,
+                yes = [
+                    "  (note that this is always true, since `align_of::<",
+                    stringify!($atomic_type), ">() == 1`)."
+                ],
+                no = [
+                    "  (note that on some platforms this can be bigger than `align_of::<",
+                    stringify!($int_type), ">()`)."
+                ],
+            }]
             /// * `ptr` must be [valid] for both reads and writes for the whole lifetime `'a`.
             /// * You must adhere to the [Memory model for atomic accesses]. In particular, it is not
             ///   allowed to mix atomic and non-atomic accesses, or atomic accesses of different sizes,
@@ -2262,12 +2276,12 @@ macro_rules! atomic_int {
 
             #[doc = concat!("Get atomic access to a `&mut ", stringify!($int_type), "`.")]
             ///
-            #[doc = if_not_8_bit! {
+            #[doc = if_8_bit! {
                 $int_type,
-                concat!(
+                no = [
                     "**Note:** This function is only available on targets where `",
                     stringify!($int_type), "` has an alignment of ", $align, " bytes."
-                )
+                ],
             }]
             ///
             /// # Examples
@@ -2286,7 +2300,6 @@ macro_rules! atomic_int {
             #[$cfg_align]
             #[unstable(feature = "atomic_from_mut", issue = "76314")]
             pub fn from_mut(v: &mut $int_type) -> &mut Self {
-                use crate::mem::align_of;
                 let [] = [(); align_of::<Self>() - align_of::<$int_type>()];
                 // SAFETY:
                 //  - the mutable reference guarantees unique ownership.
@@ -2354,7 +2367,6 @@ macro_rules! atomic_int {
             #[$cfg_align]
             #[unstable(feature = "atomic_from_mut", issue = "76314")]
             pub fn from_mut_slice(v: &mut [$int_type]) -> &mut [Self] {
-                use crate::mem::align_of;
                 let [] = [(); align_of::<Self>() - align_of::<$int_type>()];
                 // SAFETY:
                 //  - the mutable reference guarantees unique ownership.
@@ -3753,7 +3765,7 @@ impl<T> fmt::Debug for AtomicPtr<T> {
 #[stable(feature = "atomic_pointer", since = "1.24.0")]
 impl<T> fmt::Pointer for AtomicPtr<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::Pointer::fmt(&self.load(Ordering::SeqCst), f)
+        fmt::Pointer::fmt(&self.load(Ordering::Relaxed), f)
     }
 }
 
diff --git a/core/src/task/wake.rs b/core/src/task/wake.rs
index 3d21b09fa8a02..86a965f68e085 100644
--- a/core/src/task/wake.rs
+++ b/core/src/task/wake.rs
@@ -282,7 +282,7 @@ impl<'a> Context<'a> {
     pub const fn ext(&mut self) -> &mut dyn Any {
         // FIXME: this field makes Context extra-weird about unwind safety
         // can we justify AssertUnwindSafe if we stabilize this? do we care?
-        match &mut *self.ext {
+        match &mut self.ext.0 {
             ExtData::Some(data) => *data,
             ExtData::None(unit) => unit,
         }
@@ -356,7 +356,7 @@ impl<'a> ContextBuilder<'a> {
     #[rustc_const_unstable(feature = "const_waker", issue = "102012")]
     #[unstable(feature = "context_ext", issue = "123392")]
     pub const fn from(cx: &'a mut Context<'_>) -> Self {
-        let ext = match &mut *cx.ext {
+        let ext = match &mut cx.ext.0 {
             ExtData::Some(ext) => ExtData::Some(*ext),
             ExtData::None(()) => ExtData::None(()),
         };
diff --git a/core/src/time.rs b/core/src/time.rs
index 88fe29c999749..d66f558078ea8 100644
--- a/core/src/time.rs
+++ b/core/src/time.rs
@@ -348,7 +348,7 @@ impl Duration {
     #[inline]
     pub const fn from_weeks(weeks: u64) -> Duration {
         if weeks > u64::MAX / (SECS_PER_MINUTE * MINS_PER_HOUR * HOURS_PER_DAY * DAYS_PER_WEEK) {
-            panic!("overflow in Duration::from_days");
+            panic!("overflow in Duration::from_weeks");
         }
 
         Duration::from_secs(weeks * MINS_PER_HOUR * SECS_PER_MINUTE * HOURS_PER_DAY * DAYS_PER_WEEK)
@@ -620,13 +620,14 @@ impl Duration {
     /// Basic usage:
     ///
     /// ```
-    /// #![feature(duration_abs_diff)]
     /// use std::time::Duration;
     ///
     /// assert_eq!(Duration::new(100, 0).abs_diff(Duration::new(80, 0)), Duration::new(20, 0));
     /// assert_eq!(Duration::new(100, 400_000_000).abs_diff(Duration::new(110, 0)), Duration::new(9, 600_000_000));
     /// ```
-    #[unstable(feature = "duration_abs_diff", issue = "117618")]
+    #[stable(feature = "duration_abs_diff", since = "CURRENT_RUSTC_VERSION")]
+    #[rustc_const_stable(feature = "duration_abs_diff", since = "CURRENT_RUSTC_VERSION")]
+    #[rustc_allow_const_fn_unstable(const_option)]
     #[must_use = "this returns the result of the operation, \
                   without modifying the original"]
     #[inline]
@@ -842,7 +843,7 @@ impl Duration {
 
     /// Returns the number of seconds contained by this `Duration` as `f64`.
     ///
-    /// The returned value does include the fractional (nanosecond) part of the duration.
+    /// The returned value includes the fractional (nanosecond) part of the duration.
     ///
     /// # Examples
     /// ```
@@ -861,7 +862,7 @@ impl Duration {
 
     /// Returns the number of seconds contained by this `Duration` as `f32`.
     ///
-    /// The returned value does include the fractional (nanosecond) part of the duration.
+    /// The returned value includes the fractional (nanosecond) part of the duration.
     ///
     /// # Examples
     /// ```
@@ -880,7 +881,7 @@ impl Duration {
 
     /// Returns the number of milliseconds contained by this `Duration` as `f64`.
     ///
-    /// The returned value does include the fractional (nanosecond) part of the duration.
+    /// The returned value includes the fractional (nanosecond) part of the duration.
     ///
     /// # Examples
     /// ```
@@ -901,7 +902,7 @@ impl Duration {
 
     /// Returns the number of milliseconds contained by this `Duration` as `f32`.
     ///
-    /// The returned value does include the fractional (nanosecond) part of the duration.
+    /// The returned value includes the fractional (nanosecond) part of the duration.
     ///
     /// # Examples
     /// ```
@@ -1084,40 +1085,42 @@ impl Duration {
     ///
     /// # Examples
     /// ```
-    /// #![feature(div_duration)]
     /// use std::time::Duration;
     ///
     /// let dur1 = Duration::new(2, 700_000_000);
     /// let dur2 = Duration::new(5, 400_000_000);
     /// assert_eq!(dur1.div_duration_f64(dur2), 0.5);
     /// ```
-    #[unstable(feature = "div_duration", issue = "63139")]
+    #[stable(feature = "div_duration", since = "1.80.0")]
     #[must_use = "this returns the result of the operation, \
                   without modifying the original"]
     #[inline]
     #[rustc_const_unstable(feature = "duration_consts_float", issue = "72440")]
     pub const fn div_duration_f64(self, rhs: Duration) -> f64 {
-        self.as_secs_f64() / rhs.as_secs_f64()
+        let self_nanos = (self.secs as f64) * (NANOS_PER_SEC as f64) + (self.nanos.0 as f64);
+        let rhs_nanos = (rhs.secs as f64) * (NANOS_PER_SEC as f64) + (rhs.nanos.0 as f64);
+        self_nanos / rhs_nanos
     }
 
     /// Divide `Duration` by `Duration` and return `f32`.
     ///
     /// # Examples
     /// ```
-    /// #![feature(div_duration)]
     /// use std::time::Duration;
     ///
     /// let dur1 = Duration::new(2, 700_000_000);
     /// let dur2 = Duration::new(5, 400_000_000);
     /// assert_eq!(dur1.div_duration_f32(dur2), 0.5);
     /// ```
-    #[unstable(feature = "div_duration", issue = "63139")]
+    #[stable(feature = "div_duration", since = "1.80.0")]
     #[must_use = "this returns the result of the operation, \
                   without modifying the original"]
     #[inline]
     #[rustc_const_unstable(feature = "duration_consts_float", issue = "72440")]
     pub const fn div_duration_f32(self, rhs: Duration) -> f32 {
-        self.as_secs_f32() / rhs.as_secs_f32()
+        let self_nanos = (self.secs as f32) * (NANOS_PER_SEC as f32) + (self.nanos.0 as f32);
+        let rhs_nanos = (rhs.secs as f32) * (NANOS_PER_SEC as f32) + (rhs.nanos.0 as f32);
+        self_nanos / rhs_nanos
     }
 }
 
diff --git a/core/src/unicode/mod.rs b/core/src/unicode/mod.rs
index e1faa407d54c5..5ddd9f7476dd8 100644
--- a/core/src/unicode/mod.rs
+++ b/core/src/unicode/mod.rs
@@ -1,6 +1,19 @@
 #![unstable(feature = "unicode_internals", issue = "none")]
 #![allow(missing_docs)]
 
+// The `pub use` ones are for use in alloc, and are not re-exported in std.
+
+pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
+pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
+pub use unicode_data::cased::lookup as Cased;
+pub(crate) use unicode_data::cc::lookup as Cc;
+pub use unicode_data::conversions;
+pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
+pub(crate) use unicode_data::lowercase::lookup as Lowercase;
+pub(crate) use unicode_data::n::lookup as N;
+pub(crate) use unicode_data::uppercase::lookup as Uppercase;
+pub(crate) use unicode_data::white_space::lookup as White_Space;
+
 pub(crate) mod printable;
 mod unicode_data;
 
@@ -16,16 +29,3 @@ mod unicode_data;
 /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
 #[stable(feature = "unicode_version", since = "1.45.0")]
 pub const UNICODE_VERSION: (u8, u8, u8) = unicode_data::UNICODE_VERSION;
-
-// For use in alloc, not re-exported in std.
-pub use unicode_data::{
-    case_ignorable::lookup as Case_Ignorable, cased::lookup as Cased, conversions,
-};
-
-pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
-pub(crate) use unicode_data::cc::lookup as Cc;
-pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
-pub(crate) use unicode_data::lowercase::lookup as Lowercase;
-pub(crate) use unicode_data::n::lookup as N;
-pub(crate) use unicode_data::uppercase::lookup as Uppercase;
-pub(crate) use unicode_data::white_space::lookup as White_Space;
diff --git a/core/tests/clone.rs b/core/tests/clone.rs
index 64193e1155890..23efab2f1b598 100644
--- a/core/tests/clone.rs
+++ b/core/tests/clone.rs
@@ -1,3 +1,6 @@
+use core::clone::CloneToUninit;
+use core::mem::MaybeUninit;
+
 #[test]
 #[allow(suspicious_double_ref_op)]
 fn test_borrowed_clone() {
@@ -14,3 +17,66 @@ fn test_clone_from() {
     b.clone_from(&a);
     assert_eq!(*b, 5);
 }
+
+#[test]
+fn test_clone_to_uninit_slice_success() {
+    // Using `String`s to exercise allocation and Drop of the individual elements;
+    // if something is aliased or double-freed, at least Miri will catch that.
+    let a: [String; 3] = ["a", "b", "c"].map(String::from);
+
+    let mut storage: MaybeUninit<[String; 3]> = MaybeUninit::uninit();
+    let b: [String; 3] = unsafe {
+        a[..].clone_to_uninit(storage.as_mut_ptr() as *mut [String]);
+        storage.assume_init()
+    };
+
+    assert_eq!(a, b);
+}
+
+#[test]
+#[cfg(panic = "unwind")]
+fn test_clone_to_uninit_slice_drops_on_panic() {
+    use core::sync::atomic::{AtomicUsize, Ordering::Relaxed};
+
+    /// A static counter is OK to use as long as _this one test_ isn't run several times in
+    /// multiple threads.
+    static COUNTER: AtomicUsize = AtomicUsize::new(0);
+    /// Counts how many instances are live, and panics if a fifth one is created
+    struct CountsDropsAndPanics {}
+    impl CountsDropsAndPanics {
+        fn new() -> Self {
+            COUNTER.fetch_add(1, Relaxed);
+            Self {}
+        }
+    }
+    impl Clone for CountsDropsAndPanics {
+        fn clone(&self) -> Self {
+            if COUNTER.load(Relaxed) == 4 { panic!("intentional panic") } else { Self::new() }
+        }
+    }
+    impl Drop for CountsDropsAndPanics {
+        fn drop(&mut self) {
+            COUNTER.fetch_sub(1, Relaxed);
+        }
+    }
+
+    let a: [CountsDropsAndPanics; 3] = core::array::from_fn(|_| CountsDropsAndPanics::new());
+    assert_eq!(COUNTER.load(Relaxed), 3);
+
+    let panic_payload = std::panic::catch_unwind(|| {
+        let mut storage: MaybeUninit<[CountsDropsAndPanics; 3]> = MaybeUninit::uninit();
+        // This should panic halfway through
+        unsafe {
+            a[..].clone_to_uninit(storage.as_mut_ptr() as *mut [CountsDropsAndPanics]);
+        }
+    })
+    .unwrap_err();
+    assert_eq!(panic_payload.downcast().unwrap(), Box::new("intentional panic"));
+
+    // Check for lack of leak, which is what this test is looking for
+    assert_eq!(COUNTER.load(Relaxed), 3, "leaked during clone!");
+
+    // Might as well exercise the rest of the drops
+    drop(a);
+    assert_eq!(COUNTER.load(Relaxed), 0);
+}
diff --git a/core/tests/fmt/builders.rs b/core/tests/fmt/builders.rs
index 487ce46be28d7..2bdc334b7c027 100644
--- a/core/tests/fmt/builders.rs
+++ b/core/tests/fmt/builders.rs
@@ -441,7 +441,7 @@ mod debug_map {
             }
         }
 
-        format!("{Foo:?}");
+        let _ = format!("{Foo:?}");
     }
 
     #[test]
@@ -455,7 +455,7 @@ mod debug_map {
             }
         }
 
-        format!("{Foo:?}");
+        let _ = format!("{Foo:?}");
     }
 
     #[test]
@@ -469,7 +469,7 @@ mod debug_map {
             }
         }
 
-        format!("{Foo:?}");
+        let _ = format!("{Foo:?}");
     }
 }
 
diff --git a/core/tests/iter/adapters/chain.rs b/core/tests/iter/adapters/chain.rs
index b2429588de12b..c93510df524cf 100644
--- a/core/tests/iter/adapters/chain.rs
+++ b/core/tests/iter/adapters/chain.rs
@@ -2,6 +2,14 @@ use super::*;
 use core::iter::*;
 use core::num::NonZero;
 
+#[test]
+fn test_chain() {
+    let xs = [0, 1, 2, 3, 4, 5];
+    let ys = [30, 40, 50, 60];
+    let expected = [0, 1, 2, 3, 4, 5, 30, 40, 50, 60];
+    assert_eq!(Vec::from_iter(chain(xs, ys)), expected);
+}
+
 #[test]
 fn test_iterator_chain() {
     let xs = [0, 1, 2, 3, 4, 5];
diff --git a/core/tests/iter/adapters/filter.rs b/core/tests/iter/adapters/filter.rs
index a2050d89d8564..167851e33336e 100644
--- a/core/tests/iter/adapters/filter.rs
+++ b/core/tests/iter/adapters/filter.rs
@@ -1,4 +1,5 @@
 use core::iter::*;
+use std::rc::Rc;
 
 #[test]
 fn test_iterator_filter_count() {
@@ -50,3 +51,15 @@ fn test_double_ended_filter() {
     assert_eq!(it.next().unwrap(), &2);
     assert_eq!(it.next_back(), None);
 }
+
+#[test]
+fn test_next_chunk_does_not_leak() {
+    let drop_witness: [_; 5] = std::array::from_fn(|_| Rc::new(()));
+
+    let v = (0..5).map(|i| drop_witness[i].clone()).collect::<Vec<_>>();
+    let _ = v.into_iter().filter(|_| false).next_chunk::<1>();
+
+    for ref w in drop_witness {
+        assert_eq!(Rc::strong_count(w), 1);
+    }
+}
diff --git a/core/tests/iter/adapters/map_windows.rs b/core/tests/iter/adapters/map_windows.rs
index 7fb2408f8acb7..6744eff3fa26f 100644
--- a/core/tests/iter/adapters/map_windows.rs
+++ b/core/tests/iter/adapters/map_windows.rs
@@ -3,6 +3,7 @@ use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
 #[cfg(not(panic = "abort"))]
 mod drop_checks {
     //! These tests mainly make sure the elements are correctly dropped.
+
     use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst};
 
     #[derive(Debug)]
diff --git a/core/tests/lib.rs b/core/tests/lib.rs
index 797108a8425de..83a615fcd8be3 100644
--- a/core/tests/lib.rs
+++ b/core/tests/lib.rs
@@ -8,6 +8,7 @@
 #![feature(async_iterator)]
 #![feature(bigint_helper_methods)]
 #![feature(cell_update)]
+#![feature(clone_to_uninit)]
 #![feature(const_align_offset)]
 #![feature(const_align_of_val_raw)]
 #![feature(const_black_box)]
@@ -29,8 +30,6 @@
 #![feature(core_private_bignum)]
 #![feature(core_private_diy_float)]
 #![feature(dec2flt)]
-#![feature(div_duration)]
-#![feature(duration_abs_diff)]
 #![feature(duration_consts_float)]
 #![feature(duration_constants)]
 #![feature(duration_constructors)]
@@ -49,13 +48,11 @@
 #![feature(is_sorted)]
 #![feature(layout_for_ptr)]
 #![feature(pattern)]
-#![feature(sort_internals)]
 #![feature(slice_take)]
 #![feature(slice_from_ptr_range)]
 #![feature(slice_split_once)]
 #![feature(split_as_slice)]
 #![feature(maybe_uninit_fill)]
-#![feature(maybe_uninit_uninit_array)]
 #![feature(maybe_uninit_write_slice)]
 #![feature(maybe_uninit_uninit_array_transpose)]
 #![feature(min_specialization)]
@@ -76,6 +73,7 @@
 #![feature(ip)]
 #![feature(iter_advance_by)]
 #![feature(iter_array_chunks)]
+#![feature(iter_chain)]
 #![feature(iter_collect_into)]
 #![feature(iter_partition_in_place)]
 #![feature(iter_intersperse)]
@@ -96,7 +94,6 @@
 #![feature(pointer_is_aligned_to)]
 #![feature(portable_simd)]
 #![feature(ptr_metadata)]
-#![feature(lazy_cell)]
 #![feature(unsized_tuple_coercion)]
 #![feature(const_option)]
 #![feature(const_option_ext)]
@@ -112,9 +109,7 @@
 #![feature(const_array_from_ref)]
 #![feature(const_slice_from_ref)]
 #![feature(waker_getters)]
-#![feature(slice_flatten)]
 #![feature(error_generic_member_access)]
-#![feature(error_in_core)]
 #![feature(trait_upcasting)]
 #![feature(is_ascii_octdigit)]
 #![feature(get_many_mut)]
diff --git a/core/tests/mem.rs b/core/tests/mem.rs
index e388800f400df..cc73391630760 100644
--- a/core/tests/mem.rs
+++ b/core/tests/mem.rs
@@ -83,12 +83,12 @@ fn align_of_val_raw_packed() {
         f: [u32],
     }
     let storage = [0u8; 4];
-    let b: *const B = ptr::from_raw_parts(storage.as_ptr().cast(), 1);
+    let b: *const B = ptr::from_raw_parts(storage.as_ptr(), 1);
     assert_eq!(unsafe { align_of_val_raw(b) }, 1);
 
     const ALIGN_OF_VAL_RAW: usize = {
         let storage = [0u8; 4];
-        let b: *const B = ptr::from_raw_parts(storage.as_ptr().cast(), 1);
+        let b: *const B = ptr::from_raw_parts(storage.as_ptr(), 1);
         unsafe { align_of_val_raw(b) }
     };
     assert_eq!(ALIGN_OF_VAL_RAW, 1);
diff --git a/core/tests/net/parser.rs b/core/tests/net/parser.rs
index 36b87d7c1f5e0..e03959ac77c35 100644
--- a/core/tests/net/parser.rs
+++ b/core/tests/net/parser.rs
@@ -1,4 +1,5 @@
 // FIXME: These tests are all excellent candidates for AFL fuzz testing
+
 use core::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
 use core::str::FromStr;
 
diff --git a/core/tests/num/ieee754.rs b/core/tests/num/ieee754.rs
index 48ab75b6f17a5..b0f6a7545aa93 100644
--- a/core/tests/num/ieee754.rs
+++ b/core/tests/num/ieee754.rs
@@ -27,6 +27,7 @@
 //! standard. That is why they accept wildly diverse inputs or may seem to duplicate other tests.
 //! Please consider this carefully when adding, removing, or reorganizing these tests. They are
 //! here so that it is clear what tests are required by the standard and what can be changed.
+
 use ::core::str::FromStr;
 
 // IEEE 754 for many tests is applied to specific bit patterns.
diff --git a/core/tests/num/int_log.rs b/core/tests/num/int_log.rs
index a1edb1a518632..2320a7acc35ac 100644
--- a/core/tests/num/int_log.rs
+++ b/core/tests/num/int_log.rs
@@ -24,15 +24,15 @@ fn checked_ilog() {
 
     #[cfg(not(miri))] // Miri is too slow
     for i in i16::MIN..=0 {
-        assert_eq!(i.checked_ilog(4), None);
+        assert_eq!(i.checked_ilog(4), None, "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in 1..=i16::MAX {
-        assert_eq!(i.checked_ilog(13), Some((i as f32).log(13.0) as u32));
+        assert_eq!(i.checked_ilog(13), Some((i as f32).log(13.0) as u32), "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in 1..=u16::MAX {
-        assert_eq!(i.checked_ilog(13), Some((i as f32).log(13.0) as u32));
+        assert_eq!(i.checked_ilog(13), Some((i as f32).log(13.0) as u32), "checking {i}");
     }
 }
 
@@ -49,30 +49,30 @@ fn checked_ilog2() {
     assert_eq!(0i16.checked_ilog2(), None);
 
     for i in 1..=u8::MAX {
-        assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32));
+        assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32), "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in 1..=u16::MAX {
         // Guard against Android's imprecise f32::ilog2 implementation.
         if i != 8192 && i != 32768 {
-            assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32));
+            assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32), "checking {i}");
         }
     }
     for i in i8::MIN..=0 {
-        assert_eq!(i.checked_ilog2(), None);
+        assert_eq!(i.checked_ilog2(), None, "checking {i}");
     }
     for i in 1..=i8::MAX {
-        assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32));
+        assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32), "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in i16::MIN..=0 {
-        assert_eq!(i.checked_ilog2(), None);
+        assert_eq!(i.checked_ilog2(), None, "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in 1..=i16::MAX {
         // Guard against Android's imprecise f32::ilog2 implementation.
         if i != 8192 {
-            assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32));
+            assert_eq!(i.checked_ilog2(), Some((i as f32).log2() as u32), "checking {i}");
         }
     }
 }
@@ -95,19 +95,19 @@ fn checked_ilog10() {
 
     #[cfg(not(miri))] // Miri is too slow
     for i in i16::MIN..=0 {
-        assert_eq!(i.checked_ilog10(), None);
+        assert_eq!(i.checked_ilog10(), None, "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in 1..=i16::MAX {
-        assert_eq!(i.checked_ilog10(), Some((i as f32).log10() as u32));
+        assert_eq!(i.checked_ilog10(), Some((i as f32).log10() as u32), "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in 1..=u16::MAX {
-        assert_eq!(i.checked_ilog10(), Some((i as f32).log10() as u32));
+        assert_eq!(i.checked_ilog10(), Some((i as f32).log10() as u32), "checking {i}");
     }
     #[cfg(not(miri))] // Miri is too slow
     for i in 1..=100_000u32 {
-        assert_eq!(i.checked_ilog10(), Some((i as f32).log10() as u32));
+        assert_eq!(i.checked_ilog10(), Some((i as f32).log10() as u32), "checking {i}");
     }
 }
 
diff --git a/core/tests/num/mod.rs b/core/tests/num/mod.rs
index 0fed854318d54..9d2912c4b22dc 100644
--- a/core/tests/num/mod.rs
+++ b/core/tests/num/mod.rs
@@ -729,7 +729,7 @@ assume_usize_width! {
 }
 
 macro_rules! test_float {
-    ($modname: ident, $fty: ty, $inf: expr, $neginf: expr, $nan: expr, $min: expr, $max: expr, $min_pos: expr) => {
+    ($modname: ident, $fty: ty, $inf: expr, $neginf: expr, $nan: expr, $min: expr, $max: expr, $min_pos: expr, $max_exp:expr) => {
         mod $modname {
             #[test]
             fn min() {
@@ -880,6 +880,27 @@ macro_rules! test_float {
                 assert!(($nan as $fty).midpoint(1.0).is_nan());
                 assert!((1.0 as $fty).midpoint($nan).is_nan());
                 assert!(($nan as $fty).midpoint($nan).is_nan());
+
+                // test if large differences in magnitude are still correctly computed.
+                // NOTE: that because of how small x and y are, x + y can never overflow
+                // so (x + y) / 2.0 is always correct
+                // in particular, `2.pow(i)` will  never be at the max exponent, so it could
+                // be safely doubled, while j is significantly smaller.
+                for i in $max_exp.saturating_sub(64)..$max_exp {
+                    for j in 0..64u8 {
+                        let large = <$fty>::from(2.0f32).powi(i);
+                        // a much smaller number, such that there is no chance of overflow to test
+                        // potential double rounding in midpoint's implementation.
+                        let small = <$fty>::from(2.0f32).powi($max_exp - 1)
+                            * <$fty>::EPSILON
+                            * <$fty>::from(j);
+
+                        let naive = (large + small) / 2.0;
+                        let midpoint = large.midpoint(small);
+
+                        assert_eq!(naive, midpoint);
+                    }
+                }
             }
             #[test]
             fn rem_euclid() {
@@ -912,7 +933,8 @@ test_float!(
     f32::NAN,
     f32::MIN,
     f32::MAX,
-    f32::MIN_POSITIVE
+    f32::MIN_POSITIVE,
+    f32::MAX_EXP
 );
 test_float!(
     f64,
@@ -922,5 +944,6 @@ test_float!(
     f64::NAN,
     f64::MIN,
     f64::MAX,
-    f64::MIN_POSITIVE
+    f64::MIN_POSITIVE,
+    f64::MAX_EXP
 );
diff --git a/core/tests/option.rs b/core/tests/option.rs
index b1b9492f182e0..336a79a02ceeb 100644
--- a/core/tests/option.rs
+++ b/core/tests/option.rs
@@ -574,4 +574,13 @@ fn as_slice() {
     assert_eq!(Some(43).as_mut_slice(), &[43]);
     assert_eq!(None::<i32>.as_slice(), &[]);
     assert_eq!(None::<i32>.as_mut_slice(), &[]);
+
+    const A: &[u32] = Some(44).as_slice();
+    const B: &[u32] = None.as_slice();
+    const _: () = {
+        let [45] = Some(45).as_mut_slice() else { panic!() };
+        let []: &[u32] = None.as_mut_slice() else { panic!() };
+    };
+    assert_eq!(A, &[44]);
+    assert_eq!(B, &[]);
 }
diff --git a/core/tests/pin_macro.rs b/core/tests/pin_macro.rs
index 79c8c166c58d9..57485ef3974cc 100644
--- a/core/tests/pin_macro.rs
+++ b/core/tests/pin_macro.rs
@@ -1,4 +1,5 @@
 // edition:2021
+
 use core::{
     marker::PhantomPinned,
     mem::{drop as stuff, transmute},
diff --git a/core/tests/ptr.rs b/core/tests/ptr.rs
index 7b55c2bf8a813..e3830165eda61 100644
--- a/core/tests/ptr.rs
+++ b/core/tests/ptr.rs
@@ -965,7 +965,7 @@ fn thin_box() {
         fn value_ptr(&self) -> *const T {
             let (_, offset) = self.layout();
             let data_ptr = unsafe { self.ptr.cast::<u8>().as_ptr().add(offset) };
-            ptr::from_raw_parts(data_ptr.cast(), self.meta())
+            ptr::from_raw_parts(data_ptr, self.meta())
         }
 
         fn value_mut_ptr(&mut self) -> *mut T {
@@ -973,7 +973,7 @@ fn thin_box() {
             // FIXME: can this line be shared with the same in `value_ptr()`
             // without upsetting Stacked Borrows?
             let data_ptr = unsafe { self.ptr.cast::<u8>().as_ptr().add(offset) };
-            from_raw_parts_mut(data_ptr.cast(), self.meta())
+            from_raw_parts_mut(data_ptr, self.meta())
         }
     }
 
@@ -1171,3 +1171,15 @@ fn test_ptr_from_raw_parts_in_const() {
     assert_eq!(EMPTY_SLICE_PTR.addr(), 123);
     assert_eq!(EMPTY_SLICE_PTR.len(), 456);
 }
+
+#[test]
+fn test_ptr_metadata_in_const() {
+    use std::fmt::Debug;
+
+    const ARRAY_META: () = std::ptr::metadata::<[u16; 3]>(&[1, 2, 3]);
+    const SLICE_META: usize = std::ptr::metadata::<[u16]>(&[1, 2, 3]);
+    const DYN_META: DynMetadata<dyn Debug> = std::ptr::metadata::<dyn Debug>(&[0_u8; 42]);
+    assert_eq!(ARRAY_META, ());
+    assert_eq!(SLICE_META, 3);
+    assert_eq!(DYN_META.size_of(), 42);
+}
diff --git a/core/tests/slice.rs b/core/tests/slice.rs
index c91ac2fbb43b0..4cbbabb672ba0 100644
--- a/core/tests/slice.rs
+++ b/core/tests/slice.rs
@@ -1803,9 +1803,7 @@ fn brute_force_rotate_test_1() {
 #[test]
 #[cfg(not(target_arch = "wasm32"))]
 fn sort_unstable() {
-    use core::cmp::Ordering::{Equal, Greater, Less};
-    use core::slice::heapsort;
-    use rand::{seq::SliceRandom, Rng};
+    use rand::Rng;
 
     // Miri is too slow (but still need to `chain` to make the types match)
     let lens = if cfg!(miri) { (2..20).chain(0..0) } else { (2..25).chain(500..510) };
@@ -1839,31 +1837,10 @@ fn sort_unstable() {
                 tmp.copy_from_slice(v);
                 tmp.sort_unstable_by(|a, b| b.cmp(a));
                 assert!(tmp.windows(2).all(|w| w[0] >= w[1]));
-
-                // Test heapsort using `<` operator.
-                tmp.copy_from_slice(v);
-                heapsort(tmp, |a, b| a < b);
-                assert!(tmp.windows(2).all(|w| w[0] <= w[1]));
-
-                // Test heapsort using `>` operator.
-                tmp.copy_from_slice(v);
-                heapsort(tmp, |a, b| a > b);
-                assert!(tmp.windows(2).all(|w| w[0] >= w[1]));
             }
         }
     }
 
-    // Sort using a completely random comparison function.
-    // This will reorder the elements *somehow*, but won't panic.
-    for i in 0..v.len() {
-        v[i] = i as i32;
-    }
-    v.sort_unstable_by(|_, _| *[Less, Equal, Greater].choose(&mut rng).unwrap());
-    v.sort_unstable();
-    for i in 0..v.len() {
-        assert_eq!(v[i], i as i32);
-    }
-
     // Should not panic.
     [0i32; 0].sort_unstable();
     [(); 10].sort_unstable();
diff --git a/panic_abort/src/lib.rs b/panic_abort/src/lib.rs
index 353de8c5c5743..14ba4af2bb575 100644
--- a/panic_abort/src/lib.rs
+++ b/panic_abort/src/lib.rs
@@ -14,7 +14,7 @@
 #![feature(std_internals)]
 #![feature(staged_api)]
 #![feature(rustc_attrs)]
-#![feature(c_unwind)]
+#![cfg_attr(bootstrap, feature(c_unwind))]
 #![allow(internal_features)]
 
 #[cfg(target_os = "android")]
diff --git a/panic_unwind/Cargo.toml b/panic_unwind/Cargo.toml
index dce2da3164440..f830808d19648 100644
--- a/panic_unwind/Cargo.toml
+++ b/panic_unwind/Cargo.toml
@@ -16,7 +16,7 @@ alloc = { path = "../alloc" }
 core = { path = "../core" }
 unwind = { path = "../unwind" }
 compiler_builtins = "0.1.0"
-cfg-if = "1.0"
+cfg-if = { version = "1.0", features = ['rustc-dep-of-std'] }
 
 [target.'cfg(not(all(windows, target_env = "msvc")))'.dependencies]
 libc = { version = "0.2", default-features = false }
diff --git a/panic_unwind/src/lib.rs b/panic_unwind/src/lib.rs
index b0245de501e7e..77abb9125f651 100644
--- a/panic_unwind/src/lib.rs
+++ b/panic_unwind/src/lib.rs
@@ -24,7 +24,7 @@
 #![feature(rustc_attrs)]
 #![panic_runtime]
 #![feature(panic_runtime)]
-#![feature(c_unwind)]
+#![cfg_attr(bootstrap, feature(c_unwind))]
 // `real_imp` is unused with Miri, so silence warnings.
 #![cfg_attr(miri, allow(dead_code))]
 #![allow(internal_features)]
@@ -36,18 +36,14 @@ use core::panic::PanicPayload;
 cfg_if::cfg_if! {
     if #[cfg(target_os = "emscripten")] {
         #[path = "emcc.rs"]
-        mod real_imp;
+        mod imp;
     } else if #[cfg(target_os = "hermit")] {
         #[path = "hermit.rs"]
-        mod real_imp;
+        mod imp;
     } else if #[cfg(target_os = "l4re")] {
         // L4Re is unix family but does not yet support unwinding.
         #[path = "dummy.rs"]
-        mod real_imp;
-    } else if #[cfg(all(target_env = "msvc", not(target_arch = "arm")))] {
-        // LLVM does not support unwinding on 32 bit ARM msvc (thumbv7a-pc-windows-msvc)
-        #[path = "seh.rs"]
-        mod real_imp;
+        mod imp;
     } else if #[cfg(any(
         all(target_family = "windows", target_env = "gnu"),
         target_os = "psp",
@@ -58,7 +54,16 @@ cfg_if::cfg_if! {
         target_family = "wasm",
     ))] {
         #[path = "gcc.rs"]
-        mod real_imp;
+        mod imp;
+    } else if #[cfg(miri)] {
+        // Use the Miri runtime on Windows as miri doesn't support funclet based unwinding,
+        // only landingpad based unwinding. Also use the Miri runtime on unsupported platforms.
+        #[path = "miri.rs"]
+        mod imp;
+    } else if #[cfg(all(target_env = "msvc", not(target_arch = "arm")))] {
+        // LLVM does not support unwinding on 32 bit ARM msvc (thumbv7a-pc-windows-msvc)
+        #[path = "seh.rs"]
+        mod imp;
     } else {
         // Targets that don't support unwinding.
         // - os=none ("bare metal" targets)
@@ -67,20 +72,7 @@ cfg_if::cfg_if! {
         // - nvptx64-nvidia-cuda
         // - arch=avr
         #[path = "dummy.rs"]
-        mod real_imp;
-    }
-}
-
-cfg_if::cfg_if! {
-    if #[cfg(miri)] {
-        // Use the Miri runtime.
-        // We still need to also load the normal runtime above, as rustc expects certain lang
-        // items from there to be defined.
-        #[path = "miri.rs"]
         mod imp;
-    } else {
-        // Use the real runtime.
-        use real_imp as imp;
     }
 }
 
diff --git a/panic_unwind/src/miri.rs b/panic_unwind/src/miri.rs
index 4d21e846010e9..695adadd59b55 100644
--- a/panic_unwind/src/miri.rs
+++ b/panic_unwind/src/miri.rs
@@ -1,4 +1,5 @@
 //! Unwinding panics for Miri.
+
 use alloc::boxed::Box;
 use core::any::Any;
 
diff --git a/portable-simd/crates/core_simd/examples/dot_product.rs b/portable-simd/crates/core_simd/examples/dot_product.rs
index f047010a65c16..75d152ae7f0e3 100644
--- a/portable-simd/crates/core_simd/examples/dot_product.rs
+++ b/portable-simd/crates/core_simd/examples/dot_product.rs
@@ -1,6 +1,5 @@
-// Code taken from the `packed_simd` crate
-// Run this code with `cargo test --example dot_product`
-//use std::iter::zip;
+//! Code taken from the `packed_simd` crate.
+//! Run this code with `cargo test --example dot_product`.
 
 #![feature(array_chunks)]
 #![feature(slice_as_chunks)]
diff --git a/portable-simd/crates/core_simd/src/ops/assign.rs b/portable-simd/crates/core_simd/src/ops/assign.rs
index 0e87785025a38..d21d867de26d6 100644
--- a/portable-simd/crates/core_simd/src/ops/assign.rs
+++ b/portable-simd/crates/core_simd/src/ops/assign.rs
@@ -1,4 +1,5 @@
 //! Assignment operators
+
 use super::*;
 use core::ops::{AddAssign, MulAssign}; // commutative binary op-assignment
 use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; // commutative bit binary op-assignment
diff --git a/portable-simd/crates/core_simd/src/ops/deref.rs b/portable-simd/crates/core_simd/src/ops/deref.rs
index 89a60ba114146..0ff76cfba39bb 100644
--- a/portable-simd/crates/core_simd/src/ops/deref.rs
+++ b/portable-simd/crates/core_simd/src/ops/deref.rs
@@ -2,6 +2,7 @@
 //! Ideally, Rust would take care of this itself,
 //! and method calls usually handle the LHS implicitly.
 //! But this is not the case with arithmetic ops.
+
 use super::*;
 
 macro_rules! deref_lhs {
diff --git a/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs b/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
index 0f1719206c9ce..cbffbc564cfed 100644
--- a/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
+++ b/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -96,7 +96,7 @@ where
     fn cast<U>(self) -> Self::CastPtr<U> {
         // SimdElement currently requires zero-sized metadata, so this should never fail.
         // If this ever changes, `simd_cast_ptr` should produce a post-mono error.
-        use core::{mem::size_of, ptr::Pointee};
+        use core::ptr::Pointee;
         assert_eq!(size_of::<<T as Pointee>::Metadata>(), 0);
         assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
 
diff --git a/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs b/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
index 7ba996d149c0c..6bc6ca3ac42dc 100644
--- a/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
+++ b/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -93,7 +93,7 @@ where
     fn cast<U>(self) -> Self::CastPtr<U> {
         // SimdElement currently requires zero-sized metadata, so this should never fail.
         // If this ever changes, `simd_cast_ptr` should produce a post-mono error.
-        use core::{mem::size_of, ptr::Pointee};
+        use core::ptr::Pointee;
         assert_eq!(size_of::<<T as Pointee>::Metadata>(), 0);
         assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
 
diff --git a/portable-simd/crates/core_simd/src/swizzle_dyn.rs b/portable-simd/crates/core_simd/src/swizzle_dyn.rs
index 8a1079042f076..3b6388d0f2759 100644
--- a/portable-simd/crates/core_simd/src/swizzle_dyn.rs
+++ b/portable-simd/crates/core_simd/src/swizzle_dyn.rs
@@ -30,6 +30,8 @@ where
         use core::arch::arm::{uint8x8_t, vtbl1_u8};
         #[cfg(target_arch = "wasm32")]
         use core::arch::wasm32 as wasm;
+        #[cfg(target_arch = "wasm64")]
+        use core::arch::wasm64 as wasm;
         #[cfg(target_arch = "x86")]
         use core::arch::x86;
         #[cfg(target_arch = "x86_64")]
diff --git a/proc_macro/src/bridge/buffer.rs b/proc_macro/src/bridge/buffer.rs
index 48030f8d82dca..149767bf70521 100644
--- a/proc_macro/src/bridge/buffer.rs
+++ b/proc_macro/src/bridge/buffer.rs
@@ -119,7 +119,9 @@ impl Write for Buffer {
 }
 
 impl Drop for Buffer {
-    #[inline]
+    // HACK(nbdd0121): Hack to prevent LLVM < 17.0.4 from misoptimising,
+    // change to `#[inline]` if fixed.
+    #[inline(never)]
     fn drop(&mut self) {
         let b = self.take();
         (b.drop)(b);
diff --git a/proc_macro/src/bridge/fxhash.rs b/proc_macro/src/bridge/fxhash.rs
index f4e9054419721..9fb79eabd0556 100644
--- a/proc_macro/src/bridge/fxhash.rs
+++ b/proc_macro/src/bridge/fxhash.rs
@@ -7,7 +7,6 @@
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
 use std::hash::Hasher;
-use std::mem::size_of;
 use std::ops::BitXor;
 
 /// Type alias for a hashmap using the `fx` hash algorithm.
@@ -69,7 +68,7 @@ impl Hasher for FxHasher {
             hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize);
             bytes = &bytes[2..];
         }
-        if (size_of::<usize>() > 1) && bytes.len() >= 1 {
+        if (size_of::<usize>() > 1) && !bytes.is_empty() {
             hash.add_to_hash(bytes[0] as usize);
         }
         self.hash = hash.hash;
diff --git a/proc_macro/src/bridge/rpc.rs b/proc_macro/src/bridge/rpc.rs
index 6d75a5a627c82..202a8e04543b2 100644
--- a/proc_macro/src/bridge/rpc.rs
+++ b/proc_macro/src/bridge/rpc.rs
@@ -264,9 +264,9 @@ impl From<Box<dyn Any + Send>> for PanicMessage {
     }
 }
 
-impl Into<Box<dyn Any + Send>> for PanicMessage {
-    fn into(self) -> Box<dyn Any + Send> {
-        match self {
+impl From<PanicMessage> for Box<dyn Any + Send> {
+    fn from(val: PanicMessage) -> Self {
+        match val {
             PanicMessage::StaticStr(s) => Box::new(s),
             PanicMessage::String(s) => Box::new(s),
             PanicMessage::Unknown => {
diff --git a/proc_macro/src/escape.rs b/proc_macro/src/escape.rs
new file mode 100644
index 0000000000000..87a4d1d50fd48
--- /dev/null
+++ b/proc_macro/src/escape.rs
@@ -0,0 +1,57 @@
+#[derive(Copy, Clone)]
+pub(crate) struct EscapeOptions {
+    /// Produce \'.
+    pub escape_single_quote: bool,
+    /// Produce \".
+    pub escape_double_quote: bool,
+    /// Produce \x escapes for non-ASCII, and use \x rather than \u for ASCII
+    /// control characters.
+    pub escape_nonascii: bool,
+}
+
+pub(crate) fn escape_bytes(bytes: &[u8], opt: EscapeOptions) -> String {
+    let mut repr = String::new();
+
+    if opt.escape_nonascii {
+        for &byte in bytes {
+            escape_single_byte(byte, opt, &mut repr);
+        }
+    } else {
+        let mut chunks = bytes.utf8_chunks();
+        while let Some(chunk) = chunks.next() {
+            for ch in chunk.valid().chars() {
+                escape_single_char(ch, opt, &mut repr);
+            }
+            for &byte in chunk.invalid() {
+                escape_single_byte(byte, opt, &mut repr);
+            }
+        }
+    }
+
+    repr
+}
+
+fn escape_single_byte(byte: u8, opt: EscapeOptions, repr: &mut String) {
+    if byte == b'\0' {
+        repr.push_str("\\0");
+    } else if (byte == b'\'' && !opt.escape_single_quote)
+        || (byte == b'"' && !opt.escape_double_quote)
+    {
+        repr.push(byte as char);
+    } else {
+        // Escapes \t, \r, \n, \\, \', \", and uses \x## for non-ASCII and
+        // for ASCII control characters.
+        repr.extend(byte.escape_ascii().map(char::from));
+    }
+}
+
+fn escape_single_char(ch: char, opt: EscapeOptions, repr: &mut String) {
+    if (ch == '\'' && !opt.escape_single_quote) || (ch == '"' && !opt.escape_double_quote) {
+        repr.push(ch);
+    } else {
+        // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for
+        // non-printable characters and for Grapheme_Extend characters, which
+        // includes things like U+0300 "Combining Grave Accent".
+        repr.extend(ch.escape_debug());
+    }
+}
diff --git a/proc_macro/src/lib.rs b/proc_macro/src/lib.rs
index 3d7d36b27e53b..581d7e3efe373 100644
--- a/proc_macro/src/lib.rs
+++ b/proc_macro/src/lib.rs
@@ -43,10 +43,12 @@
 pub mod bridge;
 
 mod diagnostic;
+mod escape;
 
 #[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
 pub use diagnostic::{Diagnostic, Level, MultiSpan};
 
+use crate::escape::{escape_bytes, EscapeOptions};
 use std::ffi::CStr;
 use std::ops::{Range, RangeBounds};
 use std::path::PathBuf;
@@ -1356,40 +1358,61 @@ impl Literal {
     /// String literal.
     #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
     pub fn string(string: &str) -> Literal {
-        let quoted = format!("{:?}", string);
-        assert!(quoted.starts_with('"') && quoted.ends_with('"'));
-        let symbol = &quoted[1..quoted.len() - 1];
-        Literal::new(bridge::LitKind::Str, symbol, None)
+        let escape = EscapeOptions {
+            escape_single_quote: false,
+            escape_double_quote: true,
+            escape_nonascii: false,
+        };
+        let repr = escape_bytes(string.as_bytes(), escape);
+        Literal::new(bridge::LitKind::Str, &repr, None)
     }
 
     /// Character literal.
     #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
     pub fn character(ch: char) -> Literal {
-        let quoted = format!("{:?}", ch);
-        assert!(quoted.starts_with('\'') && quoted.ends_with('\''));
-        let symbol = &quoted[1..quoted.len() - 1];
-        Literal::new(bridge::LitKind::Char, symbol, None)
+        let escape = EscapeOptions {
+            escape_single_quote: true,
+            escape_double_quote: false,
+            escape_nonascii: false,
+        };
+        let repr = escape_bytes(ch.encode_utf8(&mut [0u8; 4]).as_bytes(), escape);
+        Literal::new(bridge::LitKind::Char, &repr, None)
     }
 
     /// Byte character literal.
     #[stable(feature = "proc_macro_byte_character", since = "1.79.0")]
     pub fn byte_character(byte: u8) -> Literal {
-        let string = [byte].escape_ascii().to_string();
-        Literal::new(bridge::LitKind::Byte, &string, None)
+        let escape = EscapeOptions {
+            escape_single_quote: true,
+            escape_double_quote: false,
+            escape_nonascii: true,
+        };
+        let repr = escape_bytes(&[byte], escape);
+        Literal::new(bridge::LitKind::Byte, &repr, None)
     }
 
     /// Byte string literal.
     #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
     pub fn byte_string(bytes: &[u8]) -> Literal {
-        let string = bytes.escape_ascii().to_string();
-        Literal::new(bridge::LitKind::ByteStr, &string, None)
+        let escape = EscapeOptions {
+            escape_single_quote: false,
+            escape_double_quote: true,
+            escape_nonascii: true,
+        };
+        let repr = escape_bytes(bytes, escape);
+        Literal::new(bridge::LitKind::ByteStr, &repr, None)
     }
 
     /// C string literal.
     #[stable(feature = "proc_macro_c_str_literals", since = "1.79.0")]
     pub fn c_string(string: &CStr) -> Literal {
-        let string = string.to_bytes().escape_ascii().to_string();
-        Literal::new(bridge::LitKind::CStr, &string, None)
+        let escape = EscapeOptions {
+            escape_single_quote: false,
+            escape_double_quote: true,
+            escape_nonascii: false,
+        };
+        let repr = escape_bytes(string.to_bytes(), escape);
+        Literal::new(bridge::LitKind::CStr, &repr, None)
     }
 
     /// Returns the span encompassing this literal.
diff --git a/std/Cargo.toml b/std/Cargo.toml
index e56f03808b311..b991b1cf22dd8 100644
--- a/std/Cargo.toml
+++ b/std/Cargo.toml
@@ -24,20 +24,20 @@ hashbrown = { version = "0.14", default-features = false, features = ['rustc-dep
 std_detect = { path = "../stdarch/crates/std_detect", default-features = false, features = ['rustc-dep-of-std'] }
 
 # Dependencies of the `backtrace` crate
-rustc-demangle = { version = "0.1.21", features = ['rustc-dep-of-std'] }
+rustc-demangle = { version = "0.1.24", features = ['rustc-dep-of-std'] }
 
 [target.'cfg(not(all(windows, target_env = "msvc", not(target_vendor = "uwp"))))'.dependencies]
 miniz_oxide = { version = "0.7.0", optional = true, default-features = false }
-addr2line = { version = "0.21.0", optional = true, default-features = false }
+addr2line = { version = "0.22.0", optional = true, default-features = false }
 
 [target.'cfg(not(all(windows, target_env = "msvc")))'.dependencies]
 libc = { version = "0.2.153", default-features = false, features = ['rustc-dep-of-std'], public = true }
 
 [target.'cfg(all(not(target_os = "aix"), not(all(windows, target_env = "msvc", not(target_vendor = "uwp")))))'.dependencies]
-object = { version = "0.32.0", default-features = false, optional = true, features = ['read_core', 'elf', 'macho', 'pe', 'unaligned', 'archive'] }
+object = { version = "0.36.0", default-features = false, optional = true, features = ['read_core', 'elf', 'macho', 'pe', 'unaligned', 'archive'] }
 
 [target.'cfg(target_os = "aix")'.dependencies]
-object = { version = "0.32.0", default-features = false, optional = true, features = ['read_core', 'xcoff', 'unaligned', 'archive'] }
+object = { version = "0.36.0", default-features = false, optional = true, features = ['read_core', 'xcoff', 'unaligned', 'archive'] }
 
 [dev-dependencies]
 rand = { version = "0.8.5", default-features = false, features = ["alloc"] }
@@ -50,7 +50,7 @@ dlmalloc = { version = "0.2.4", features = ['rustc-dep-of-std'] }
 fortanix-sgx-abi = { version = "0.5.0", features = ['rustc-dep-of-std'], public = true }
 
 [target.'cfg(target_os = "hermit")'.dependencies]
-hermit-abi = { version = "0.3.9", features = ['rustc-dep-of-std'], public = true }
+hermit-abi = { version = "0.4.0", features = ['rustc-dep-of-std'], public = true }
 
 [target.'cfg(target_os = "wasi")'.dependencies]
 wasi = { version = "0.11.0", features = ['rustc-dep-of-std'], default-features = false }
@@ -87,6 +87,10 @@ std_detect_file_io = ["std_detect/std_detect_file_io"]
 std_detect_dlsym_getauxval = ["std_detect/std_detect_dlsym_getauxval"]
 std_detect_env_override = ["std_detect/std_detect_env_override"]
 
+# Enable using raw-dylib for Windows imports.
+# This will eventually be the default.
+windows_raw_dylib = []
+
 [package.metadata.fortanix-sgx]
 # Maximum possible number of threads when testing
 threads = 125
@@ -100,9 +104,6 @@ test = true
 
 [lints.rust.unexpected_cfgs]
 level = "warn"
-# x.py uses beta cargo, so `check-cfg` entries do not yet take effect
-# for rust-lang/rust. But for users of `-Zbuild-std` it does.
-# The unused warning is waiting for rust-lang/cargo#13925 to reach beta.
 check-cfg = [
     'cfg(bootstrap)',
     'cfg(target_arch, values("xtensa"))',
diff --git a/std/build.rs b/std/build.rs
index 7d975df545ecf..c542ba81eedc1 100644
--- a/std/build.rs
+++ b/std/build.rs
@@ -7,6 +7,10 @@ fn main() {
     let target_vendor =
         env::var("CARGO_CFG_TARGET_VENDOR").expect("CARGO_CFG_TARGET_VENDOR was not set");
     let target_env = env::var("CARGO_CFG_TARGET_ENV").expect("CARGO_CFG_TARGET_ENV was not set");
+    let target_pointer_width: u32 = env::var("CARGO_CFG_TARGET_POINTER_WIDTH")
+        .expect("CARGO_CFG_TARGET_POINTER_WIDTH was not set")
+        .parse()
+        .unwrap();
 
     println!("cargo:rustc-check-cfg=cfg(netbsd10)");
     if target_os == "netbsd" && env::var("RUSTC_STD_NETBSD10").is_ok() {
@@ -70,4 +74,64 @@ fn main() {
     println!("cargo:rustc-cfg=backtrace_in_libstd");
 
     println!("cargo:rustc-env=STD_ENV_ARCH={}", env::var("CARGO_CFG_TARGET_ARCH").unwrap());
+
+    // Emit these on platforms that have no known ABI bugs, LLVM selection bugs, lowering bugs,
+    // missing symbols, or other problems, to determine when tests get run.
+    // If more broken platforms are found, please update the tracking issue at
+    // <https://github.com/rust-lang/rust/issues/116909>
+    //
+    // Some of these match arms are redundant; the goal is to separate reasons that the type is
+    // unreliable, even when multiple reasons might fail the same platform.
+    println!("cargo:rustc-check-cfg=cfg(reliable_f16)");
+    println!("cargo:rustc-check-cfg=cfg(reliable_f128)");
+
+    let has_reliable_f16 = match (target_arch.as_str(), target_os.as_str()) {
+        // Selection failure until recent LLVM <https://github.com/llvm/llvm-project/issues/93894>
+        // FIXME(llvm19): can probably be removed at the version bump
+        ("loongarch64", _) => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
+        ("s390x", _) => false,
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        ("arm64ec", _) => false,
+        // MinGW ABI bugs <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>
+        ("x86", "windows") => false,
+        // x86 has ABI bugs that show up with optimizations. This should be partially fixed with
+        // the compiler-builtins update. <https://github.com/rust-lang/rust/issues/123885>
+        ("x86" | "x86_64", _) => false,
+        // Missing `__gnu_h2f_ieee` and `__gnu_f2h_ieee`
+        ("powerpc" | "powerpc64", _) => false,
+        // Missing `__gnu_h2f_ieee` and `__gnu_f2h_ieee`
+        ("mips" | "mips32r6" | "mips64" | "mips64r6", _) => false,
+        // Missing `__extendhfsf` and `__truncsfhf`
+        ("riscv32" | "riscv64", _) => false,
+        // Most OSs are missing `__extendhfsf` and `__truncsfhf`
+        (_, "linux" | "macos") => true,
+        // Almost all OSs besides Linux and MacOS are missing symbols until compiler-builtins can
+        // be updated. <https://github.com/rust-lang/rust/pull/125016> will get some of these, the
+        // next CB update should get the rest.
+        _ => false,
+    };
+
+    let has_reliable_f128 = match (target_arch.as_str(), target_os.as_str()) {
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        ("arm64ec", _) => false,
+        // ABI and precision bugs <https://github.com/rust-lang/rust/issues/125109>
+        // <https://github.com/rust-lang/rust/issues/125102>
+        ("powerpc" | "powerpc64", _) => false,
+        // Selection bug <https://github.com/llvm/llvm-project/issues/95471>
+        ("nvptx64", _) => false,
+        // ABI unsupported  <https://github.com/llvm/llvm-project/issues/41838>
+        ("sparc", _) => false,
+        // 64-bit Linux is about the only platform to have f128 symbols by default
+        (_, "linux") if target_pointer_width == 64 => true,
+        // Same as for f16, except MacOS is also missing f128 symbols.
+        _ => false,
+    };
+
+    if has_reliable_f16 {
+        println!("cargo:rustc-cfg=reliable_f16");
+    }
+    if has_reliable_f128 {
+        println!("cargo:rustc-cfg=reliable_f128");
+    }
 }
diff --git a/std/src/alloc.rs b/std/src/alloc.rs
index b98fbbf762fa2..dc4924cdf581d 100644
--- a/std/src/alloc.rs
+++ b/std/src/alloc.rs
@@ -73,7 +73,9 @@ pub use alloc_crate::alloc::*;
 /// work, such as to serve alignment requests greater than the alignment
 /// provided directly by the backing system allocator.
 ///
-/// This type implements the `GlobalAlloc` trait and Rust programs by default
+/// This type implements the [`GlobalAlloc`] trait. Currently the default
+/// global allocator is unspecified. Libraries, however, like `cdylib`s and
+/// `staticlib`s are guaranteed to use the [`System`] by default and as such
 /// work as if they had this definition:
 ///
 /// ```rust
diff --git a/std/src/backtrace.rs b/std/src/backtrace.rs
index 475b3e7eb9312..4d376753cb6d2 100644
--- a/std/src/backtrace.rs
+++ b/std/src/backtrace.rs
@@ -95,7 +95,7 @@ use crate::fmt;
 use crate::panic::UnwindSafe;
 use crate::sync::atomic::{AtomicU8, Ordering::Relaxed};
 use crate::sync::LazyLock;
-use crate::sys_common::backtrace::{lock, output_filename, set_image_base};
+use crate::sys::backtrace::{lock, output_filename, set_image_base};
 
 /// A captured OS thread stack backtrace.
 ///
@@ -428,39 +428,43 @@ impl fmt::Display for Backtrace {
     }
 }
 
-type LazyResolve = impl (FnOnce() -> Capture) + Send + Sync + UnwindSafe;
-
-fn lazy_resolve(mut capture: Capture) -> LazyResolve {
-    move || {
-        // Use the global backtrace lock to synchronize this as it's a
-        // requirement of the `backtrace` crate, and then actually resolve
-        // everything.
-        let _lock = lock();
-        for frame in capture.frames.iter_mut() {
-            let symbols = &mut frame.symbols;
-            let frame = match &frame.frame {
-                RawFrame::Actual(frame) => frame,
-                #[cfg(test)]
-                RawFrame::Fake => unimplemented!(),
-            };
-            unsafe {
-                backtrace_rs::resolve_frame_unsynchronized(frame, |symbol| {
-                    symbols.push(BacktraceSymbol {
-                        name: symbol.name().map(|m| m.as_bytes().to_vec()),
-                        filename: symbol.filename_raw().map(|b| match b {
-                            BytesOrWideString::Bytes(b) => BytesOrWide::Bytes(b.to_owned()),
-                            BytesOrWideString::Wide(b) => BytesOrWide::Wide(b.to_owned()),
-                        }),
-                        lineno: symbol.lineno(),
-                        colno: symbol.colno(),
+mod helper {
+    use super::*;
+    pub(super) type LazyResolve = impl (FnOnce() -> Capture) + Send + Sync + UnwindSafe;
+
+    pub(super) fn lazy_resolve(mut capture: Capture) -> LazyResolve {
+        move || {
+            // Use the global backtrace lock to synchronize this as it's a
+            // requirement of the `backtrace` crate, and then actually resolve
+            // everything.
+            let _lock = lock();
+            for frame in capture.frames.iter_mut() {
+                let symbols = &mut frame.symbols;
+                let frame = match &frame.frame {
+                    RawFrame::Actual(frame) => frame,
+                    #[cfg(test)]
+                    RawFrame::Fake => unimplemented!(),
+                };
+                unsafe {
+                    backtrace_rs::resolve_frame_unsynchronized(frame, |symbol| {
+                        symbols.push(BacktraceSymbol {
+                            name: symbol.name().map(|m| m.as_bytes().to_vec()),
+                            filename: symbol.filename_raw().map(|b| match b {
+                                BytesOrWideString::Bytes(b) => BytesOrWide::Bytes(b.to_owned()),
+                                BytesOrWideString::Wide(b) => BytesOrWide::Wide(b.to_owned()),
+                            }),
+                            lineno: symbol.lineno(),
+                            colno: symbol.colno(),
+                        });
                     });
-                });
+                }
             }
-        }
 
-        capture
+            capture
+        }
     }
 }
+use helper::*;
 
 impl RawFrame {
     fn ip(&self) -> *mut c_void {
diff --git a/std/src/collections/hash/map.rs b/std/src/collections/hash/map.rs
index 5039f0b6bb289..1f6a3e904795a 100644
--- a/std/src/collections/hash/map.rs
+++ b/std/src/collections/hash/map.rs
@@ -1018,7 +1018,7 @@ where
         K: Borrow<Q>,
         Q: Hash + Eq,
     {
-        self.base.get_many_unchecked_mut(ks)
+        unsafe { self.base.get_many_unchecked_mut(ks) }
     }
 
     /// Returns `true` if the map contains a value for the specified key.
@@ -1218,7 +1218,7 @@ where
     /// will cause the map to produce seemingly random results. Higher-level and
     /// more foolproof APIs like `entry` should be preferred when possible.
     ///
-    /// In particular, the hash used to initialized the raw entry must still be
+    /// In particular, the hash used to initialize the raw entry must still be
     /// consistent with the hash of the key that is ultimately stored in the entry.
     /// This is because implementations of HashMap may need to recompute hashes
     /// when resizing, at which point only the keys are available.
diff --git a/std/src/env.rs b/std/src/env.rs
index 6f8ac17f12c70..fc9b8cfd46d65 100644
--- a/std/src/env.rs
+++ b/std/src/env.rs
@@ -120,11 +120,8 @@ pub struct VarsOs {
 /// # Examples
 ///
 /// ```
-/// use std::env;
-///
-/// // We will iterate through the references to the element returned by
-/// // env::vars();
-/// for (key, value) in env::vars() {
+/// // Print all environment variables.
+/// for (key, value) in std::env::vars() {
 ///     println!("{key}: {value}");
 /// }
 /// ```
@@ -150,11 +147,8 @@ pub fn vars() -> Vars {
 /// # Examples
 ///
 /// ```
-/// use std::env;
-///
-/// // We will iterate through the references to the element returned by
-/// // env::vars_os();
-/// for (key, value) in env::vars_os() {
+/// // Print all environment variables.
+/// for (key, value) in std::env::vars_os() {
 ///     println!("{key:?}: {value:?}");
 /// }
 /// ```
@@ -318,22 +312,27 @@ impl Error for VarError {
 ///
 /// # Safety
 ///
-/// Even though this function is currently not marked as `unsafe`, it needs to
-/// be because invoking it can cause undefined behaviour. The function will be
-/// marked `unsafe` in a future version of Rust. This is tracked in
-/// [rust#27970](https://github.com/rust-lang/rust/issues/27970).
-///
 /// This function is safe to call in a single-threaded program.
 ///
-/// In multi-threaded programs, you must ensure that are no other threads
-/// concurrently writing or *reading*(!) from the environment through functions
-/// other than the ones in this module. You are responsible for figuring out
-/// how to achieve this, but we strongly suggest not using `set_var` or
-/// `remove_var` in multi-threaded programs at all.
-///
-/// Most C libraries, including libc itself do not advertise which functions
-/// read from the environment. Even functions from the Rust standard library do
-/// that, e.g. for DNS lookups from [`std::net::ToSocketAddrs`].
+/// This function is also always safe to call on Windows, in single-threaded
+/// and multi-threaded programs.
+///
+/// In multi-threaded programs on other operating systems, the only safe option is
+/// to not use `set_var` or `remove_var` at all.
+///
+/// The exact requirement is: you
+/// must ensure that there are no other threads concurrently writing or
+/// *reading*(!) the environment through functions or global variables other
+/// than the ones in this module. The problem is that these operating systems
+/// do not provide a thread-safe way to read the environment, and most C
+/// libraries, including libc itself, do not advertise which functions read
+/// from the environment. Even functions from the Rust standard library may
+/// read the environment without going through this module, e.g. for DNS
+/// lookups from [`std::net::ToSocketAddrs`]. No stable guarantee is made about
+/// which functions may read from the environment in future versions of a
+/// library. All this makes it not practically possible for you to guarantee
+/// that no other thread will read the environment, so the only safe option is
+/// to not use `set_var` or `remove_var` in multi-threaded programs at all.
 ///
 /// Discussion of this unsafety on Unix may be found in:
 ///
@@ -353,16 +352,16 @@ impl Error for VarError {
 /// use std::env;
 ///
 /// let key = "KEY";
-/// env::set_var(key, "VALUE");
+/// unsafe {
+///     env::set_var(key, "VALUE");
+/// }
 /// assert_eq!(env::var(key), Ok("VALUE".to_string()));
 /// ```
+#[rustc_deprecated_safe_2024]
 #[stable(feature = "env", since = "1.0.0")]
-pub fn set_var<K: AsRef<OsStr>, V: AsRef<OsStr>>(key: K, value: V) {
-    _set_var(key.as_ref(), value.as_ref())
-}
-
-fn _set_var(key: &OsStr, value: &OsStr) {
-    os_imp::setenv(key, value).unwrap_or_else(|e| {
+pub unsafe fn set_var<K: AsRef<OsStr>, V: AsRef<OsStr>>(key: K, value: V) {
+    let (key, value) = (key.as_ref(), value.as_ref());
+    unsafe { os_imp::setenv(key, value) }.unwrap_or_else(|e| {
         panic!("failed to set environment variable `{key:?}` to `{value:?}`: {e}")
     })
 }
@@ -371,22 +370,27 @@ fn _set_var(key: &OsStr, value: &OsStr) {
 ///
 /// # Safety
 ///
-/// Even though this function is currently not marked as `unsafe`, it needs to
-/// be because invoking it can cause undefined behaviour. The function will be
-/// marked `unsafe` in a future version of Rust. This is tracked in
-/// [rust#27970](https://github.com/rust-lang/rust/issues/27970).
-///
 /// This function is safe to call in a single-threaded program.
 ///
-/// In multi-threaded programs, you must ensure that are no other threads
-/// concurrently writing or *reading*(!) from the environment through functions
-/// other than the ones in this module. You are responsible for figuring out
-/// how to achieve this, but we strongly suggest not using `set_var` or
-/// `remove_var` in multi-threaded programs at all.
-///
-/// Most C libraries, including libc itself do not advertise which functions
-/// read from the environment. Even functions from the Rust standard library do
-/// that, e.g. for DNS lookups from [`std::net::ToSocketAddrs`].
+/// This function is also always safe to call on Windows, in single-threaded
+/// and multi-threaded programs.
+///
+/// In multi-threaded programs on other operating systems, the only safe option is
+/// to not use `set_var` or `remove_var` at all.
+///
+/// The exact requirement is: you
+/// must ensure that there are no other threads concurrently writing or
+/// *reading*(!) the environment through functions or global variables other
+/// than the ones in this module. The problem is that these operating systems
+/// do not provide a thread-safe way to read the environment, and most C
+/// libraries, including libc itself, do not advertise which functions read
+/// from the environment. Even functions from the Rust standard library may
+/// read the environment without going through this module, e.g. for DNS
+/// lookups from [`std::net::ToSocketAddrs`]. No stable guarantee is made about
+/// which functions may read from the environment in future versions of a
+/// library. All this makes it not practically possible for you to guarantee
+/// that no other thread will read the environment, so the only safe option is
+/// to not use `set_var` or `remove_var` in multi-threaded programs at all.
 ///
 /// Discussion of this unsafety on Unix may be found in:
 ///
@@ -403,23 +407,25 @@ fn _set_var(key: &OsStr, value: &OsStr) {
 ///
 /// # Examples
 ///
-/// ```
+/// ```no_run
 /// use std::env;
 ///
 /// let key = "KEY";
-/// env::set_var(key, "VALUE");
+/// unsafe {
+///     env::set_var(key, "VALUE");
+/// }
 /// assert_eq!(env::var(key), Ok("VALUE".to_string()));
 ///
-/// env::remove_var(key);
+/// unsafe {
+///     env::remove_var(key);
+/// }
 /// assert!(env::var(key).is_err());
 /// ```
+#[rustc_deprecated_safe_2024]
 #[stable(feature = "env", since = "1.0.0")]
-pub fn remove_var<K: AsRef<OsStr>>(key: K) {
-    _remove_var(key.as_ref())
-}
-
-fn _remove_var(key: &OsStr) {
-    os_imp::unsetenv(key)
+pub unsafe fn remove_var<K: AsRef<OsStr>>(key: K) {
+    let key = key.as_ref();
+    unsafe { os_imp::unsetenv(key) }
         .unwrap_or_else(|e| panic!("failed to remove environment variable `{key:?}`: {e}"))
 }
 
diff --git a/std/src/error.rs b/std/src/error.rs
index b240e4e2c45be..87aad8f764bd0 100644
--- a/std/src/error.rs
+++ b/std/src/error.rs
@@ -429,7 +429,7 @@ impl<E> Report<E> {
     ///    1: rust_out::main::_doctest_main_src_error_rs_1158_0
     ///    2: rust_out::main
     ///    3: core::ops::function::FnOnce::call_once
-    ///    4: std::sys_common::backtrace::__rust_begin_short_backtrace
+    ///    4: std::sys::backtrace::__rust_begin_short_backtrace
     ///    5: std::rt::lang_start::{{closure}}
     ///    6: std::panicking::try
     ///    7: std::rt::lang_start_internal
diff --git a/std/src/f128.rs b/std/src/f128.rs
index 491235a872eaf..0591c6f517b44 100644
--- a/std/src/f128.rs
+++ b/std/src/f128.rs
@@ -32,4 +32,34 @@ impl f128 {
     pub fn powi(self, n: i32) -> f128 {
         unsafe { intrinsics::powif128(self, n) }
     }
+
+    /// Computes the absolute value of `self`.
+    ///
+    /// This function always returns the precise result.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f128)]
+    /// # #[cfg(reliable_f128)] { // FIXME(f16_f128): reliable_f128
+    ///
+    /// let x = 3.5_f128;
+    /// let y = -3.5_f128;
+    ///
+    /// assert_eq!(x.abs(), x);
+    /// assert_eq!(y.abs(), -y);
+    ///
+    /// assert!(f128::NAN.abs().is_nan());
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[rustc_allow_incoherent_impl]
+    #[unstable(feature = "f128", issue = "116909")]
+    #[must_use = "method returns a new number and does not mutate the original value"]
+    pub fn abs(self) -> Self {
+        // FIXME(f16_f128): replace with `intrinsics::fabsf128` when available
+        // We don't do this now because LLVM has lowering bugs for f128 math.
+        Self::from_bits(self.to_bits() & !(1 << 127))
+    }
 }
diff --git a/std/src/f128/tests.rs b/std/src/f128/tests.rs
index b64c7f856a15f..0b3e485b0e735 100644
--- a/std/src/f128/tests.rs
+++ b/std/src/f128/tests.rs
@@ -1,29 +1,32 @@
-#![allow(dead_code)] // FIXME(f16_f128): remove once constants are used
+#![cfg(not(bootstrap))]
+// FIXME(f16_f128): only tested on platforms that have symbols and aren't buggy
+#![cfg(reliable_f128)]
+
+use crate::f128::consts;
+use crate::num::FpCategory as Fp;
+use crate::num::*;
 
 /// Smallest number
 const TINY_BITS: u128 = 0x1;
+
 /// Next smallest number
 const TINY_UP_BITS: u128 = 0x2;
+
 /// Exponent = 0b11...10, Sifnificand 0b1111..10. Min val > 0
-const MAX_DOWN_BITS: u128 = 0x7ffeffffffffffffffffffffffffffff;
+const MAX_DOWN_BITS: u128 = 0x7ffefffffffffffffffffffffffffffe;
+
 /// Zeroed exponent, full significant
 const LARGEST_SUBNORMAL_BITS: u128 = 0x0000ffffffffffffffffffffffffffff;
+
 /// Exponent = 0b1, zeroed significand
 const SMALLEST_NORMAL_BITS: u128 = 0x00010000000000000000000000000000;
+
 /// First pattern over the mantissa
 const NAN_MASK1: u128 = 0x0000aaaaaaaaaaaaaaaaaaaaaaaaaaaa;
+
 /// Second pattern over the mantissa
 const NAN_MASK2: u128 = 0x00005555555555555555555555555555;
 
-/// Compare by value
-#[allow(unused_macros)]
-macro_rules! assert_f128_eq {
-    ($a:expr, $b:expr) => {
-        let (l, r): (&f128, &f128) = (&$a, &$b);
-        assert_eq!(*l, *r, "\na: {:#0130x}\nb: {:#0130x}", l.to_bits(), r.to_bits())
-    };
-}
-
 /// Compare by representation
 #[allow(unused_macros)]
 macro_rules! assert_f128_biteq {
@@ -31,10 +34,530 @@ macro_rules! assert_f128_biteq {
         let (l, r): (&f128, &f128) = (&$a, &$b);
         let lb = l.to_bits();
         let rb = r.to_bits();
-        assert_eq!(
-            lb, rb,
-            "float {:?} is not bitequal to {:?}.\na: {:#0130x}\nb: {:#0130x}",
-            *l, *r, lb, rb
-        );
+        assert_eq!(lb, rb, "float {l:?} is not bitequal to {r:?}.\na: {lb:#034x}\nb: {rb:#034x}");
     };
 }
+
+#[test]
+fn test_num_f128() {
+    test_num(10f128, 2f128);
+}
+
+// FIXME(f16_f128): add min and max tests when available
+
+#[test]
+fn test_nan() {
+    let nan: f128 = f128::NAN;
+    assert!(nan.is_nan());
+    assert!(!nan.is_infinite());
+    assert!(!nan.is_finite());
+    assert!(nan.is_sign_positive());
+    assert!(!nan.is_sign_negative());
+    assert!(!nan.is_normal());
+    assert_eq!(Fp::Nan, nan.classify());
+}
+
+#[test]
+fn test_infinity() {
+    let inf: f128 = f128::INFINITY;
+    assert!(inf.is_infinite());
+    assert!(!inf.is_finite());
+    assert!(inf.is_sign_positive());
+    assert!(!inf.is_sign_negative());
+    assert!(!inf.is_nan());
+    assert!(!inf.is_normal());
+    assert_eq!(Fp::Infinite, inf.classify());
+}
+
+#[test]
+fn test_neg_infinity() {
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    assert!(neg_inf.is_infinite());
+    assert!(!neg_inf.is_finite());
+    assert!(!neg_inf.is_sign_positive());
+    assert!(neg_inf.is_sign_negative());
+    assert!(!neg_inf.is_nan());
+    assert!(!neg_inf.is_normal());
+    assert_eq!(Fp::Infinite, neg_inf.classify());
+}
+
+#[test]
+fn test_zero() {
+    let zero: f128 = 0.0f128;
+    assert_eq!(0.0, zero);
+    assert!(!zero.is_infinite());
+    assert!(zero.is_finite());
+    assert!(zero.is_sign_positive());
+    assert!(!zero.is_sign_negative());
+    assert!(!zero.is_nan());
+    assert!(!zero.is_normal());
+    assert_eq!(Fp::Zero, zero.classify());
+}
+
+#[test]
+fn test_neg_zero() {
+    let neg_zero: f128 = -0.0;
+    assert_eq!(0.0, neg_zero);
+    assert!(!neg_zero.is_infinite());
+    assert!(neg_zero.is_finite());
+    assert!(!neg_zero.is_sign_positive());
+    assert!(neg_zero.is_sign_negative());
+    assert!(!neg_zero.is_nan());
+    assert!(!neg_zero.is_normal());
+    assert_eq!(Fp::Zero, neg_zero.classify());
+}
+
+#[test]
+fn test_one() {
+    let one: f128 = 1.0f128;
+    assert_eq!(1.0, one);
+    assert!(!one.is_infinite());
+    assert!(one.is_finite());
+    assert!(one.is_sign_positive());
+    assert!(!one.is_sign_negative());
+    assert!(!one.is_nan());
+    assert!(one.is_normal());
+    assert_eq!(Fp::Normal, one.classify());
+}
+
+#[test]
+fn test_is_nan() {
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    assert!(nan.is_nan());
+    assert!(!0.0f128.is_nan());
+    assert!(!5.3f128.is_nan());
+    assert!(!(-10.732f128).is_nan());
+    assert!(!inf.is_nan());
+    assert!(!neg_inf.is_nan());
+}
+
+#[test]
+fn test_is_infinite() {
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    assert!(!nan.is_infinite());
+    assert!(inf.is_infinite());
+    assert!(neg_inf.is_infinite());
+    assert!(!0.0f128.is_infinite());
+    assert!(!42.8f128.is_infinite());
+    assert!(!(-109.2f128).is_infinite());
+}
+
+#[test]
+fn test_is_finite() {
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    assert!(!nan.is_finite());
+    assert!(!inf.is_finite());
+    assert!(!neg_inf.is_finite());
+    assert!(0.0f128.is_finite());
+    assert!(42.8f128.is_finite());
+    assert!((-109.2f128).is_finite());
+}
+
+#[test]
+fn test_is_normal() {
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    let zero: f128 = 0.0f128;
+    let neg_zero: f128 = -0.0;
+    assert!(!nan.is_normal());
+    assert!(!inf.is_normal());
+    assert!(!neg_inf.is_normal());
+    assert!(!zero.is_normal());
+    assert!(!neg_zero.is_normal());
+    assert!(1f128.is_normal());
+    assert!(1e-4931f128.is_normal());
+    assert!(!1e-4932f128.is_normal());
+}
+
+#[test]
+fn test_classify() {
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    let zero: f128 = 0.0f128;
+    let neg_zero: f128 = -0.0;
+    assert_eq!(nan.classify(), Fp::Nan);
+    assert_eq!(inf.classify(), Fp::Infinite);
+    assert_eq!(neg_inf.classify(), Fp::Infinite);
+    assert_eq!(zero.classify(), Fp::Zero);
+    assert_eq!(neg_zero.classify(), Fp::Zero);
+    assert_eq!(1f128.classify(), Fp::Normal);
+    assert_eq!(1e-4931f128.classify(), Fp::Normal);
+    assert_eq!(1e-4932f128.classify(), Fp::Subnormal);
+}
+
+// FIXME(f16_f128): add missing math functions when available
+
+#[test]
+fn test_abs() {
+    assert_eq!(f128::INFINITY.abs(), f128::INFINITY);
+    assert_eq!(1f128.abs(), 1f128);
+    assert_eq!(0f128.abs(), 0f128);
+    assert_eq!((-0f128).abs(), 0f128);
+    assert_eq!((-1f128).abs(), 1f128);
+    assert_eq!(f128::NEG_INFINITY.abs(), f128::INFINITY);
+    assert_eq!((1f128 / f128::NEG_INFINITY).abs(), 0f128);
+    assert!(f128::NAN.abs().is_nan());
+}
+
+#[test]
+fn test_is_sign_positive() {
+    assert!(f128::INFINITY.is_sign_positive());
+    assert!(1f128.is_sign_positive());
+    assert!(0f128.is_sign_positive());
+    assert!(!(-0f128).is_sign_positive());
+    assert!(!(-1f128).is_sign_positive());
+    assert!(!f128::NEG_INFINITY.is_sign_positive());
+    assert!(!(1f128 / f128::NEG_INFINITY).is_sign_positive());
+    assert!(f128::NAN.is_sign_positive());
+    assert!(!(-f128::NAN).is_sign_positive());
+}
+
+#[test]
+fn test_is_sign_negative() {
+    assert!(!f128::INFINITY.is_sign_negative());
+    assert!(!1f128.is_sign_negative());
+    assert!(!0f128.is_sign_negative());
+    assert!((-0f128).is_sign_negative());
+    assert!((-1f128).is_sign_negative());
+    assert!(f128::NEG_INFINITY.is_sign_negative());
+    assert!((1f128 / f128::NEG_INFINITY).is_sign_negative());
+    assert!(!f128::NAN.is_sign_negative());
+    assert!((-f128::NAN).is_sign_negative());
+}
+
+#[test]
+fn test_next_up() {
+    let tiny = f128::from_bits(TINY_BITS);
+    let tiny_up = f128::from_bits(TINY_UP_BITS);
+    let max_down = f128::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f128::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f128::from_bits(SMALLEST_NORMAL_BITS);
+    assert_f128_biteq!(f128::NEG_INFINITY.next_up(), f128::MIN);
+    assert_f128_biteq!(f128::MIN.next_up(), -max_down);
+    assert_f128_biteq!((-1.0 - f128::EPSILON).next_up(), -1.0);
+    assert_f128_biteq!((-smallest_normal).next_up(), -largest_subnormal);
+    assert_f128_biteq!((-tiny_up).next_up(), -tiny);
+    assert_f128_biteq!((-tiny).next_up(), -0.0f128);
+    assert_f128_biteq!((-0.0f128).next_up(), tiny);
+    assert_f128_biteq!(0.0f128.next_up(), tiny);
+    assert_f128_biteq!(tiny.next_up(), tiny_up);
+    assert_f128_biteq!(largest_subnormal.next_up(), smallest_normal);
+    assert_f128_biteq!(1.0f128.next_up(), 1.0 + f128::EPSILON);
+    assert_f128_biteq!(f128::MAX.next_up(), f128::INFINITY);
+    assert_f128_biteq!(f128::INFINITY.next_up(), f128::INFINITY);
+
+    // Check that NaNs roundtrip.
+    let nan0 = f128::NAN;
+    let nan1 = f128::from_bits(f128::NAN.to_bits() ^ 0x002a_aaaa);
+    let nan2 = f128::from_bits(f128::NAN.to_bits() ^ 0x0055_5555);
+    assert_f128_biteq!(nan0.next_up(), nan0);
+    assert_f128_biteq!(nan1.next_up(), nan1);
+    assert_f128_biteq!(nan2.next_up(), nan2);
+}
+
+#[test]
+fn test_next_down() {
+    let tiny = f128::from_bits(TINY_BITS);
+    let tiny_up = f128::from_bits(TINY_UP_BITS);
+    let max_down = f128::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f128::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f128::from_bits(SMALLEST_NORMAL_BITS);
+    assert_f128_biteq!(f128::NEG_INFINITY.next_down(), f128::NEG_INFINITY);
+    assert_f128_biteq!(f128::MIN.next_down(), f128::NEG_INFINITY);
+    assert_f128_biteq!((-max_down).next_down(), f128::MIN);
+    assert_f128_biteq!((-1.0f128).next_down(), -1.0 - f128::EPSILON);
+    assert_f128_biteq!((-largest_subnormal).next_down(), -smallest_normal);
+    assert_f128_biteq!((-tiny).next_down(), -tiny_up);
+    assert_f128_biteq!((-0.0f128).next_down(), -tiny);
+    assert_f128_biteq!((0.0f128).next_down(), -tiny);
+    assert_f128_biteq!(tiny.next_down(), 0.0f128);
+    assert_f128_biteq!(tiny_up.next_down(), tiny);
+    assert_f128_biteq!(smallest_normal.next_down(), largest_subnormal);
+    assert_f128_biteq!((1.0 + f128::EPSILON).next_down(), 1.0f128);
+    assert_f128_biteq!(f128::MAX.next_down(), max_down);
+    assert_f128_biteq!(f128::INFINITY.next_down(), f128::MAX);
+
+    // Check that NaNs roundtrip.
+    let nan0 = f128::NAN;
+    let nan1 = f128::from_bits(f128::NAN.to_bits() ^ 0x002a_aaaa);
+    let nan2 = f128::from_bits(f128::NAN.to_bits() ^ 0x0055_5555);
+    assert_f128_biteq!(nan0.next_down(), nan0);
+    assert_f128_biteq!(nan1.next_down(), nan1);
+    assert_f128_biteq!(nan2.next_down(), nan2);
+}
+
+#[test]
+fn test_recip() {
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    assert_eq!(1.0f128.recip(), 1.0);
+    assert_eq!(2.0f128.recip(), 0.5);
+    assert_eq!((-0.4f128).recip(), -2.5);
+    assert_eq!(0.0f128.recip(), inf);
+    assert!(nan.recip().is_nan());
+    assert_eq!(inf.recip(), 0.0);
+    assert_eq!(neg_inf.recip(), 0.0);
+}
+
+#[test]
+fn test_to_degrees() {
+    let pi: f128 = consts::PI;
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    assert_eq!(0.0f128.to_degrees(), 0.0);
+    assert_approx_eq!((-5.8f128).to_degrees(), -332.315521);
+    assert_eq!(pi.to_degrees(), 180.0);
+    assert!(nan.to_degrees().is_nan());
+    assert_eq!(inf.to_degrees(), inf);
+    assert_eq!(neg_inf.to_degrees(), neg_inf);
+    assert_eq!(1_f128.to_degrees(), 57.2957795130823208767981548141051703);
+}
+
+#[test]
+fn test_to_radians() {
+    let pi: f128 = consts::PI;
+    let nan: f128 = f128::NAN;
+    let inf: f128 = f128::INFINITY;
+    let neg_inf: f128 = f128::NEG_INFINITY;
+    assert_eq!(0.0f128.to_radians(), 0.0);
+    assert_approx_eq!(154.6f128.to_radians(), 2.698279);
+    assert_approx_eq!((-332.31f128).to_radians(), -5.799903);
+    // check approx rather than exact because round trip for pi doesn't fall on an exactly
+    // representable value (unlike `f32` and `f64`).
+    assert_approx_eq!(180.0f128.to_radians(), pi);
+    assert!(nan.to_radians().is_nan());
+    assert_eq!(inf.to_radians(), inf);
+    assert_eq!(neg_inf.to_radians(), neg_inf);
+}
+
+#[test]
+fn test_real_consts() {
+    // FIXME(f16_f128): add math tests when available
+    use super::consts;
+
+    let pi: f128 = consts::PI;
+    let frac_pi_2: f128 = consts::FRAC_PI_2;
+    let frac_pi_3: f128 = consts::FRAC_PI_3;
+    let frac_pi_4: f128 = consts::FRAC_PI_4;
+    let frac_pi_6: f128 = consts::FRAC_PI_6;
+    let frac_pi_8: f128 = consts::FRAC_PI_8;
+    let frac_1_pi: f128 = consts::FRAC_1_PI;
+    let frac_2_pi: f128 = consts::FRAC_2_PI;
+    // let frac_2_sqrtpi: f128 = consts::FRAC_2_SQRT_PI;
+    // let sqrt2: f128 = consts::SQRT_2;
+    // let frac_1_sqrt2: f128 = consts::FRAC_1_SQRT_2;
+    // let e: f128 = consts::E;
+    // let log2_e: f128 = consts::LOG2_E;
+    // let log10_e: f128 = consts::LOG10_E;
+    // let ln_2: f128 = consts::LN_2;
+    // let ln_10: f128 = consts::LN_10;
+
+    assert_approx_eq!(frac_pi_2, pi / 2f128);
+    assert_approx_eq!(frac_pi_3, pi / 3f128);
+    assert_approx_eq!(frac_pi_4, pi / 4f128);
+    assert_approx_eq!(frac_pi_6, pi / 6f128);
+    assert_approx_eq!(frac_pi_8, pi / 8f128);
+    assert_approx_eq!(frac_1_pi, 1f128 / pi);
+    assert_approx_eq!(frac_2_pi, 2f128 / pi);
+    // assert_approx_eq!(frac_2_sqrtpi, 2f128 / pi.sqrt());
+    // assert_approx_eq!(sqrt2, 2f128.sqrt());
+    // assert_approx_eq!(frac_1_sqrt2, 1f128 / 2f128.sqrt());
+    // assert_approx_eq!(log2_e, e.log2());
+    // assert_approx_eq!(log10_e, e.log10());
+    // assert_approx_eq!(ln_2, 2f128.ln());
+    // assert_approx_eq!(ln_10, 10f128.ln());
+}
+
+#[test]
+fn test_float_bits_conv() {
+    assert_eq!((1f128).to_bits(), 0x3fff0000000000000000000000000000);
+    assert_eq!((12.5f128).to_bits(), 0x40029000000000000000000000000000);
+    assert_eq!((1337f128).to_bits(), 0x40094e40000000000000000000000000);
+    assert_eq!((-14.25f128).to_bits(), 0xc002c800000000000000000000000000);
+    assert_approx_eq!(f128::from_bits(0x3fff0000000000000000000000000000), 1.0);
+    assert_approx_eq!(f128::from_bits(0x40029000000000000000000000000000), 12.5);
+    assert_approx_eq!(f128::from_bits(0x40094e40000000000000000000000000), 1337.0);
+    assert_approx_eq!(f128::from_bits(0xc002c800000000000000000000000000), -14.25);
+
+    // Check that NaNs roundtrip their bits regardless of signaling-ness
+    // 0xA is 0b1010; 0x5 is 0b0101 -- so these two together clobbers all the mantissa bits
+    let masked_nan1 = f128::NAN.to_bits() ^ NAN_MASK1;
+    let masked_nan2 = f128::NAN.to_bits() ^ NAN_MASK2;
+    assert!(f128::from_bits(masked_nan1).is_nan());
+    assert!(f128::from_bits(masked_nan2).is_nan());
+
+    assert_eq!(f128::from_bits(masked_nan1).to_bits(), masked_nan1);
+    assert_eq!(f128::from_bits(masked_nan2).to_bits(), masked_nan2);
+}
+
+#[test]
+#[should_panic]
+fn test_clamp_min_greater_than_max() {
+    let _ = 1.0f128.clamp(3.0, 1.0);
+}
+
+#[test]
+#[should_panic]
+fn test_clamp_min_is_nan() {
+    let _ = 1.0f128.clamp(f128::NAN, 1.0);
+}
+
+#[test]
+#[should_panic]
+fn test_clamp_max_is_nan() {
+    let _ = 1.0f128.clamp(3.0, f128::NAN);
+}
+
+#[test]
+fn test_total_cmp() {
+    use core::cmp::Ordering;
+
+    fn quiet_bit_mask() -> u128 {
+        1 << (f128::MANTISSA_DIGITS - 2)
+    }
+
+    // FIXME(f16_f128): test subnormals when powf is available
+    // fn min_subnorm() -> f128 {
+    //     f128::MIN_POSITIVE / f128::powf(2.0, f128::MANTISSA_DIGITS as f128 - 1.0)
+    // }
+
+    // fn max_subnorm() -> f128 {
+    //     f128::MIN_POSITIVE - min_subnorm()
+    // }
+
+    fn q_nan() -> f128 {
+        f128::from_bits(f128::NAN.to_bits() | quiet_bit_mask())
+    }
+
+    fn s_nan() -> f128 {
+        f128::from_bits((f128::NAN.to_bits() & !quiet_bit_mask()) + 42)
+    }
+
+    assert_eq!(Ordering::Equal, (-q_nan()).total_cmp(&-q_nan()));
+    assert_eq!(Ordering::Equal, (-s_nan()).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Equal, (-f128::INFINITY).total_cmp(&-f128::INFINITY));
+    assert_eq!(Ordering::Equal, (-f128::MAX).total_cmp(&-f128::MAX));
+    assert_eq!(Ordering::Equal, (-2.5_f128).total_cmp(&-2.5));
+    assert_eq!(Ordering::Equal, (-1.0_f128).total_cmp(&-1.0));
+    assert_eq!(Ordering::Equal, (-1.5_f128).total_cmp(&-1.5));
+    assert_eq!(Ordering::Equal, (-0.5_f128).total_cmp(&-0.5));
+    assert_eq!(Ordering::Equal, (-f128::MIN_POSITIVE).total_cmp(&-f128::MIN_POSITIVE));
+    // assert_eq!(Ordering::Equal, (-max_subnorm()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Equal, (-min_subnorm()).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Equal, (-0.0_f128).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, 0.0_f128.total_cmp(&0.0));
+    // assert_eq!(Ordering::Equal, min_subnorm().total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Equal, max_subnorm().total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Equal, f128::MIN_POSITIVE.total_cmp(&f128::MIN_POSITIVE));
+    assert_eq!(Ordering::Equal, 0.5_f128.total_cmp(&0.5));
+    assert_eq!(Ordering::Equal, 1.0_f128.total_cmp(&1.0));
+    assert_eq!(Ordering::Equal, 1.5_f128.total_cmp(&1.5));
+    assert_eq!(Ordering::Equal, 2.5_f128.total_cmp(&2.5));
+    assert_eq!(Ordering::Equal, f128::MAX.total_cmp(&f128::MAX));
+    assert_eq!(Ordering::Equal, f128::INFINITY.total_cmp(&f128::INFINITY));
+    assert_eq!(Ordering::Equal, s_nan().total_cmp(&s_nan()));
+    assert_eq!(Ordering::Equal, q_nan().total_cmp(&q_nan()));
+
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f128::INFINITY));
+    assert_eq!(Ordering::Less, (-f128::INFINITY).total_cmp(&-f128::MAX));
+    assert_eq!(Ordering::Less, (-f128::MAX).total_cmp(&-2.5));
+    assert_eq!(Ordering::Less, (-2.5_f128).total_cmp(&-1.5));
+    assert_eq!(Ordering::Less, (-1.5_f128).total_cmp(&-1.0));
+    assert_eq!(Ordering::Less, (-1.0_f128).total_cmp(&-0.5));
+    assert_eq!(Ordering::Less, (-0.5_f128).total_cmp(&-f128::MIN_POSITIVE));
+    // assert_eq!(Ordering::Less, (-f128::MIN_POSITIVE).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Less, (-max_subnorm()).total_cmp(&-min_subnorm()));
+    // assert_eq!(Ordering::Less, (-min_subnorm()).total_cmp(&-0.0));
+    assert_eq!(Ordering::Less, (-0.0_f128).total_cmp(&0.0));
+    // assert_eq!(Ordering::Less, 0.0_f128.total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Less, min_subnorm().total_cmp(&max_subnorm()));
+    // assert_eq!(Ordering::Less, max_subnorm().total_cmp(&f128::MIN_POSITIVE));
+    assert_eq!(Ordering::Less, f128::MIN_POSITIVE.total_cmp(&0.5));
+    assert_eq!(Ordering::Less, 0.5_f128.total_cmp(&1.0));
+    assert_eq!(Ordering::Less, 1.0_f128.total_cmp(&1.5));
+    assert_eq!(Ordering::Less, 1.5_f128.total_cmp(&2.5));
+    assert_eq!(Ordering::Less, 2.5_f128.total_cmp(&f128::MAX));
+    assert_eq!(Ordering::Less, f128::MAX.total_cmp(&f128::INFINITY));
+    assert_eq!(Ordering::Less, f128::INFINITY.total_cmp(&s_nan()));
+    assert_eq!(Ordering::Less, s_nan().total_cmp(&q_nan()));
+
+    assert_eq!(Ordering::Greater, (-s_nan()).total_cmp(&-q_nan()));
+    assert_eq!(Ordering::Greater, (-f128::INFINITY).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Greater, (-f128::MAX).total_cmp(&-f128::INFINITY));
+    assert_eq!(Ordering::Greater, (-2.5_f128).total_cmp(&-f128::MAX));
+    assert_eq!(Ordering::Greater, (-1.5_f128).total_cmp(&-2.5));
+    assert_eq!(Ordering::Greater, (-1.0_f128).total_cmp(&-1.5));
+    assert_eq!(Ordering::Greater, (-0.5_f128).total_cmp(&-1.0));
+    assert_eq!(Ordering::Greater, (-f128::MIN_POSITIVE).total_cmp(&-0.5));
+    // assert_eq!(Ordering::Greater, (-max_subnorm()).total_cmp(&-f128::MIN_POSITIVE));
+    // assert_eq!(Ordering::Greater, (-min_subnorm()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Greater, (-0.0_f128).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Greater, 0.0_f128.total_cmp(&-0.0));
+    // assert_eq!(Ordering::Greater, min_subnorm().total_cmp(&0.0));
+    // assert_eq!(Ordering::Greater, max_subnorm().total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Greater, f128::MIN_POSITIVE.total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Greater, 0.5_f128.total_cmp(&f128::MIN_POSITIVE));
+    assert_eq!(Ordering::Greater, 1.0_f128.total_cmp(&0.5));
+    assert_eq!(Ordering::Greater, 1.5_f128.total_cmp(&1.0));
+    assert_eq!(Ordering::Greater, 2.5_f128.total_cmp(&1.5));
+    assert_eq!(Ordering::Greater, f128::MAX.total_cmp(&2.5));
+    assert_eq!(Ordering::Greater, f128::INFINITY.total_cmp(&f128::MAX));
+    assert_eq!(Ordering::Greater, s_nan().total_cmp(&f128::INFINITY));
+    assert_eq!(Ordering::Greater, q_nan().total_cmp(&s_nan()));
+
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-f128::INFINITY));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-f128::MAX));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-2.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-1.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-1.0));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-0.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-f128::MIN_POSITIVE));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-0.0));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&0.0));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&f128::MIN_POSITIVE));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&0.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&1.0));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&1.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&2.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&f128::MAX));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&f128::INFINITY));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&s_nan()));
+
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f128::INFINITY));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f128::MAX));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-2.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-1.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-1.0));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-0.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f128::MIN_POSITIVE));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-0.0));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&0.0));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&f128::MIN_POSITIVE));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&0.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&1.0));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&1.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&2.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&f128::MAX));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&f128::INFINITY));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&s_nan()));
+}
diff --git a/std/src/f16.rs b/std/src/f16.rs
index 1cb655ffabd84..d48518622999a 100644
--- a/std/src/f16.rs
+++ b/std/src/f16.rs
@@ -32,4 +32,33 @@ impl f16 {
     pub fn powi(self, n: i32) -> f16 {
         unsafe { intrinsics::powif16(self, n) }
     }
+
+    /// Computes the absolute value of `self`.
+    ///
+    /// This function always returns the precise result.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(f16)]
+    /// # #[cfg(reliable_f16)] {
+    ///
+    /// let x = 3.5_f16;
+    /// let y = -3.5_f16;
+    ///
+    /// assert_eq!(x.abs(), x);
+    /// assert_eq!(y.abs(), -y);
+    ///
+    /// assert!(f16::NAN.abs().is_nan());
+    /// # }
+    /// ```
+    #[inline]
+    #[cfg(not(bootstrap))]
+    #[rustc_allow_incoherent_impl]
+    #[unstable(feature = "f16", issue = "116909")]
+    #[must_use = "method returns a new number and does not mutate the original value"]
+    pub fn abs(self) -> Self {
+        // FIXME(f16_f128): replace with `intrinsics::fabsf16` when available
+        Self::from_bits(self.to_bits() & !(1 << 15))
+    }
 }
diff --git a/std/src/f16/tests.rs b/std/src/f16/tests.rs
index d65c43eca4bb8..26658a0be87bc 100644
--- a/std/src/f16/tests.rs
+++ b/std/src/f16/tests.rs
@@ -1,35 +1,38 @@
-#![allow(dead_code)] // FIXME(f16_f128): remove once constants are used
+#![cfg(not(bootstrap))]
+// FIXME(f16_f128): only tested on platforms that have symbols and aren't buggy
+#![cfg(reliable_f16)]
+
+use crate::f16::consts;
+use crate::num::FpCategory as Fp;
+use crate::num::*;
 
 // We run out of precision pretty quickly with f16
-const F16_APPROX_L1: f16 = 0.001;
+// const F16_APPROX_L1: f16 = 0.001;
 const F16_APPROX_L2: f16 = 0.01;
-const F16_APPROX_L3: f16 = 0.1;
+// const F16_APPROX_L3: f16 = 0.1;
 const F16_APPROX_L4: f16 = 0.5;
 
 /// Smallest number
 const TINY_BITS: u16 = 0x1;
+
 /// Next smallest number
 const TINY_UP_BITS: u16 = 0x2;
+
 /// Exponent = 0b11...10, Sifnificand 0b1111..10. Min val > 0
 const MAX_DOWN_BITS: u16 = 0x7bfe;
+
 /// Zeroed exponent, full significant
 const LARGEST_SUBNORMAL_BITS: u16 = 0x03ff;
+
 /// Exponent = 0b1, zeroed significand
 const SMALLEST_NORMAL_BITS: u16 = 0x0400;
+
 /// First pattern over the mantissa
 const NAN_MASK1: u16 = 0x02aa;
+
 /// Second pattern over the mantissa
 const NAN_MASK2: u16 = 0x0155;
 
-/// Compare by value
-#[allow(unused_macros)]
-macro_rules! assert_f16_eq {
-    ($a:expr, $b:expr) => {
-        let (l, r): (&f16, &f16) = (&$a, &$b);
-        assert_eq!(*l, *r, "\na: {:#018x}\nb: {:#018x}", l.to_bits(), r.to_bits())
-    };
-}
-
 /// Compare by representation
 #[allow(unused_macros)]
 macro_rules! assert_f16_biteq {
@@ -37,10 +40,527 @@ macro_rules! assert_f16_biteq {
         let (l, r): (&f16, &f16) = (&$a, &$b);
         let lb = l.to_bits();
         let rb = r.to_bits();
-        assert_eq!(
-            lb, rb,
-            "float {:?} is not bitequal to {:?}.\na: {:#018x}\nb: {:#018x}",
-            *l, *r, lb, rb
-        );
+        assert_eq!(lb, rb, "float {l:?} ({lb:#04x}) is not bitequal to {r:?} ({rb:#04x})");
     };
 }
+
+#[test]
+fn test_num_f16() {
+    test_num(10f16, 2f16);
+}
+
+// FIXME(f16_f128): add min and max tests when available
+
+#[test]
+fn test_nan() {
+    let nan: f16 = f16::NAN;
+    assert!(nan.is_nan());
+    assert!(!nan.is_infinite());
+    assert!(!nan.is_finite());
+    assert!(nan.is_sign_positive());
+    assert!(!nan.is_sign_negative());
+    assert!(!nan.is_normal());
+    assert_eq!(Fp::Nan, nan.classify());
+}
+
+#[test]
+fn test_infinity() {
+    let inf: f16 = f16::INFINITY;
+    assert!(inf.is_infinite());
+    assert!(!inf.is_finite());
+    assert!(inf.is_sign_positive());
+    assert!(!inf.is_sign_negative());
+    assert!(!inf.is_nan());
+    assert!(!inf.is_normal());
+    assert_eq!(Fp::Infinite, inf.classify());
+}
+
+#[test]
+fn test_neg_infinity() {
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    assert!(neg_inf.is_infinite());
+    assert!(!neg_inf.is_finite());
+    assert!(!neg_inf.is_sign_positive());
+    assert!(neg_inf.is_sign_negative());
+    assert!(!neg_inf.is_nan());
+    assert!(!neg_inf.is_normal());
+    assert_eq!(Fp::Infinite, neg_inf.classify());
+}
+
+#[test]
+fn test_zero() {
+    let zero: f16 = 0.0f16;
+    assert_eq!(0.0, zero);
+    assert!(!zero.is_infinite());
+    assert!(zero.is_finite());
+    assert!(zero.is_sign_positive());
+    assert!(!zero.is_sign_negative());
+    assert!(!zero.is_nan());
+    assert!(!zero.is_normal());
+    assert_eq!(Fp::Zero, zero.classify());
+}
+
+#[test]
+fn test_neg_zero() {
+    let neg_zero: f16 = -0.0;
+    assert_eq!(0.0, neg_zero);
+    assert!(!neg_zero.is_infinite());
+    assert!(neg_zero.is_finite());
+    assert!(!neg_zero.is_sign_positive());
+    assert!(neg_zero.is_sign_negative());
+    assert!(!neg_zero.is_nan());
+    assert!(!neg_zero.is_normal());
+    assert_eq!(Fp::Zero, neg_zero.classify());
+}
+
+#[test]
+fn test_one() {
+    let one: f16 = 1.0f16;
+    assert_eq!(1.0, one);
+    assert!(!one.is_infinite());
+    assert!(one.is_finite());
+    assert!(one.is_sign_positive());
+    assert!(!one.is_sign_negative());
+    assert!(!one.is_nan());
+    assert!(one.is_normal());
+    assert_eq!(Fp::Normal, one.classify());
+}
+
+#[test]
+fn test_is_nan() {
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    assert!(nan.is_nan());
+    assert!(!0.0f16.is_nan());
+    assert!(!5.3f16.is_nan());
+    assert!(!(-10.732f16).is_nan());
+    assert!(!inf.is_nan());
+    assert!(!neg_inf.is_nan());
+}
+
+#[test]
+fn test_is_infinite() {
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    assert!(!nan.is_infinite());
+    assert!(inf.is_infinite());
+    assert!(neg_inf.is_infinite());
+    assert!(!0.0f16.is_infinite());
+    assert!(!42.8f16.is_infinite());
+    assert!(!(-109.2f16).is_infinite());
+}
+
+#[test]
+fn test_is_finite() {
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    assert!(!nan.is_finite());
+    assert!(!inf.is_finite());
+    assert!(!neg_inf.is_finite());
+    assert!(0.0f16.is_finite());
+    assert!(42.8f16.is_finite());
+    assert!((-109.2f16).is_finite());
+}
+
+#[test]
+fn test_is_normal() {
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    let zero: f16 = 0.0f16;
+    let neg_zero: f16 = -0.0;
+    assert!(!nan.is_normal());
+    assert!(!inf.is_normal());
+    assert!(!neg_inf.is_normal());
+    assert!(!zero.is_normal());
+    assert!(!neg_zero.is_normal());
+    assert!(1f16.is_normal());
+    assert!(1e-4f16.is_normal());
+    assert!(!1e-5f16.is_normal());
+}
+
+#[test]
+fn test_classify() {
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    let zero: f16 = 0.0f16;
+    let neg_zero: f16 = -0.0;
+    assert_eq!(nan.classify(), Fp::Nan);
+    assert_eq!(inf.classify(), Fp::Infinite);
+    assert_eq!(neg_inf.classify(), Fp::Infinite);
+    assert_eq!(zero.classify(), Fp::Zero);
+    assert_eq!(neg_zero.classify(), Fp::Zero);
+    assert_eq!(1f16.classify(), Fp::Normal);
+    assert_eq!(1e-4f16.classify(), Fp::Normal);
+    assert_eq!(1e-5f16.classify(), Fp::Subnormal);
+}
+
+// FIXME(f16_f128): add missing math functions when available
+
+#[test]
+fn test_abs() {
+    assert_eq!(f16::INFINITY.abs(), f16::INFINITY);
+    assert_eq!(1f16.abs(), 1f16);
+    assert_eq!(0f16.abs(), 0f16);
+    assert_eq!((-0f16).abs(), 0f16);
+    assert_eq!((-1f16).abs(), 1f16);
+    assert_eq!(f16::NEG_INFINITY.abs(), f16::INFINITY);
+    assert_eq!((1f16 / f16::NEG_INFINITY).abs(), 0f16);
+    assert!(f16::NAN.abs().is_nan());
+}
+
+#[test]
+fn test_is_sign_positive() {
+    assert!(f16::INFINITY.is_sign_positive());
+    assert!(1f16.is_sign_positive());
+    assert!(0f16.is_sign_positive());
+    assert!(!(-0f16).is_sign_positive());
+    assert!(!(-1f16).is_sign_positive());
+    assert!(!f16::NEG_INFINITY.is_sign_positive());
+    assert!(!(1f16 / f16::NEG_INFINITY).is_sign_positive());
+    assert!(f16::NAN.is_sign_positive());
+    assert!(!(-f16::NAN).is_sign_positive());
+}
+
+#[test]
+fn test_is_sign_negative() {
+    assert!(!f16::INFINITY.is_sign_negative());
+    assert!(!1f16.is_sign_negative());
+    assert!(!0f16.is_sign_negative());
+    assert!((-0f16).is_sign_negative());
+    assert!((-1f16).is_sign_negative());
+    assert!(f16::NEG_INFINITY.is_sign_negative());
+    assert!((1f16 / f16::NEG_INFINITY).is_sign_negative());
+    assert!(!f16::NAN.is_sign_negative());
+    assert!((-f16::NAN).is_sign_negative());
+}
+
+#[test]
+fn test_next_up() {
+    let tiny = f16::from_bits(TINY_BITS);
+    let tiny_up = f16::from_bits(TINY_UP_BITS);
+    let max_down = f16::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f16::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f16::from_bits(SMALLEST_NORMAL_BITS);
+    assert_f16_biteq!(f16::NEG_INFINITY.next_up(), f16::MIN);
+    assert_f16_biteq!(f16::MIN.next_up(), -max_down);
+    assert_f16_biteq!((-1.0 - f16::EPSILON).next_up(), -1.0);
+    assert_f16_biteq!((-smallest_normal).next_up(), -largest_subnormal);
+    assert_f16_biteq!((-tiny_up).next_up(), -tiny);
+    assert_f16_biteq!((-tiny).next_up(), -0.0f16);
+    assert_f16_biteq!((-0.0f16).next_up(), tiny);
+    assert_f16_biteq!(0.0f16.next_up(), tiny);
+    assert_f16_biteq!(tiny.next_up(), tiny_up);
+    assert_f16_biteq!(largest_subnormal.next_up(), smallest_normal);
+    assert_f16_biteq!(1.0f16.next_up(), 1.0 + f16::EPSILON);
+    assert_f16_biteq!(f16::MAX.next_up(), f16::INFINITY);
+    assert_f16_biteq!(f16::INFINITY.next_up(), f16::INFINITY);
+
+    // Check that NaNs roundtrip.
+    let nan0 = f16::NAN;
+    let nan1 = f16::from_bits(f16::NAN.to_bits() ^ NAN_MASK1);
+    let nan2 = f16::from_bits(f16::NAN.to_bits() ^ NAN_MASK2);
+    assert_f16_biteq!(nan0.next_up(), nan0);
+    assert_f16_biteq!(nan1.next_up(), nan1);
+    assert_f16_biteq!(nan2.next_up(), nan2);
+}
+
+#[test]
+fn test_next_down() {
+    let tiny = f16::from_bits(TINY_BITS);
+    let tiny_up = f16::from_bits(TINY_UP_BITS);
+    let max_down = f16::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f16::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f16::from_bits(SMALLEST_NORMAL_BITS);
+    assert_f16_biteq!(f16::NEG_INFINITY.next_down(), f16::NEG_INFINITY);
+    assert_f16_biteq!(f16::MIN.next_down(), f16::NEG_INFINITY);
+    assert_f16_biteq!((-max_down).next_down(), f16::MIN);
+    assert_f16_biteq!((-1.0f16).next_down(), -1.0 - f16::EPSILON);
+    assert_f16_biteq!((-largest_subnormal).next_down(), -smallest_normal);
+    assert_f16_biteq!((-tiny).next_down(), -tiny_up);
+    assert_f16_biteq!((-0.0f16).next_down(), -tiny);
+    assert_f16_biteq!((0.0f16).next_down(), -tiny);
+    assert_f16_biteq!(tiny.next_down(), 0.0f16);
+    assert_f16_biteq!(tiny_up.next_down(), tiny);
+    assert_f16_biteq!(smallest_normal.next_down(), largest_subnormal);
+    assert_f16_biteq!((1.0 + f16::EPSILON).next_down(), 1.0f16);
+    assert_f16_biteq!(f16::MAX.next_down(), max_down);
+    assert_f16_biteq!(f16::INFINITY.next_down(), f16::MAX);
+
+    // Check that NaNs roundtrip.
+    let nan0 = f16::NAN;
+    let nan1 = f16::from_bits(f16::NAN.to_bits() ^ NAN_MASK1);
+    let nan2 = f16::from_bits(f16::NAN.to_bits() ^ NAN_MASK2);
+    assert_f16_biteq!(nan0.next_down(), nan0);
+    assert_f16_biteq!(nan1.next_down(), nan1);
+    assert_f16_biteq!(nan2.next_down(), nan2);
+}
+
+#[test]
+fn test_recip() {
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    assert_eq!(1.0f16.recip(), 1.0);
+    assert_eq!(2.0f16.recip(), 0.5);
+    assert_eq!((-0.4f16).recip(), -2.5);
+    assert_eq!(0.0f16.recip(), inf);
+    assert!(nan.recip().is_nan());
+    assert_eq!(inf.recip(), 0.0);
+    assert_eq!(neg_inf.recip(), 0.0);
+}
+
+#[test]
+fn test_to_degrees() {
+    let pi: f16 = consts::PI;
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    assert_eq!(0.0f16.to_degrees(), 0.0);
+    assert_approx_eq!((-5.8f16).to_degrees(), -332.315521);
+    assert_approx_eq!(pi.to_degrees(), 180.0, F16_APPROX_L4);
+    assert!(nan.to_degrees().is_nan());
+    assert_eq!(inf.to_degrees(), inf);
+    assert_eq!(neg_inf.to_degrees(), neg_inf);
+    assert_eq!(1_f16.to_degrees(), 57.2957795130823208767981548141051703);
+}
+
+#[test]
+fn test_to_radians() {
+    let pi: f16 = consts::PI;
+    let nan: f16 = f16::NAN;
+    let inf: f16 = f16::INFINITY;
+    let neg_inf: f16 = f16::NEG_INFINITY;
+    assert_eq!(0.0f16.to_radians(), 0.0);
+    assert_approx_eq!(154.6f16.to_radians(), 2.698279);
+    assert_approx_eq!((-332.31f16).to_radians(), -5.799903);
+    assert_approx_eq!(180.0f16.to_radians(), pi, F16_APPROX_L2);
+    assert!(nan.to_radians().is_nan());
+    assert_eq!(inf.to_radians(), inf);
+    assert_eq!(neg_inf.to_radians(), neg_inf);
+}
+
+#[test]
+fn test_real_consts() {
+    // FIXME(f16_f128): add math tests when available
+    use super::consts;
+
+    let pi: f16 = consts::PI;
+    let frac_pi_2: f16 = consts::FRAC_PI_2;
+    let frac_pi_3: f16 = consts::FRAC_PI_3;
+    let frac_pi_4: f16 = consts::FRAC_PI_4;
+    let frac_pi_6: f16 = consts::FRAC_PI_6;
+    let frac_pi_8: f16 = consts::FRAC_PI_8;
+    let frac_1_pi: f16 = consts::FRAC_1_PI;
+    let frac_2_pi: f16 = consts::FRAC_2_PI;
+    // let frac_2_sqrtpi: f16 = consts::FRAC_2_SQRT_PI;
+    // let sqrt2: f16 = consts::SQRT_2;
+    // let frac_1_sqrt2: f16 = consts::FRAC_1_SQRT_2;
+    // let e: f16 = consts::E;
+    // let log2_e: f16 = consts::LOG2_E;
+    // let log10_e: f16 = consts::LOG10_E;
+    // let ln_2: f16 = consts::LN_2;
+    // let ln_10: f16 = consts::LN_10;
+
+    assert_approx_eq!(frac_pi_2, pi / 2f16);
+    assert_approx_eq!(frac_pi_3, pi / 3f16);
+    assert_approx_eq!(frac_pi_4, pi / 4f16);
+    assert_approx_eq!(frac_pi_6, pi / 6f16);
+    assert_approx_eq!(frac_pi_8, pi / 8f16);
+    assert_approx_eq!(frac_1_pi, 1f16 / pi);
+    assert_approx_eq!(frac_2_pi, 2f16 / pi);
+    // assert_approx_eq!(frac_2_sqrtpi, 2f16 / pi.sqrt());
+    // assert_approx_eq!(sqrt2, 2f16.sqrt());
+    // assert_approx_eq!(frac_1_sqrt2, 1f16 / 2f16.sqrt());
+    // assert_approx_eq!(log2_e, e.log2());
+    // assert_approx_eq!(log10_e, e.log10());
+    // assert_approx_eq!(ln_2, 2f16.ln());
+    // assert_approx_eq!(ln_10, 10f16.ln());
+}
+
+#[test]
+fn test_float_bits_conv() {
+    assert_eq!((1f16).to_bits(), 0x3c00);
+    assert_eq!((12.5f16).to_bits(), 0x4a40);
+    assert_eq!((1337f16).to_bits(), 0x6539);
+    assert_eq!((-14.25f16).to_bits(), 0xcb20);
+    assert_approx_eq!(f16::from_bits(0x3c00), 1.0);
+    assert_approx_eq!(f16::from_bits(0x4a40), 12.5);
+    assert_approx_eq!(f16::from_bits(0x6539), 1337.0);
+    assert_approx_eq!(f16::from_bits(0xcb20), -14.25);
+
+    // Check that NaNs roundtrip their bits regardless of signaling-ness
+    let masked_nan1 = f16::NAN.to_bits() ^ NAN_MASK1;
+    let masked_nan2 = f16::NAN.to_bits() ^ NAN_MASK2;
+    assert!(f16::from_bits(masked_nan1).is_nan());
+    assert!(f16::from_bits(masked_nan2).is_nan());
+
+    assert_eq!(f16::from_bits(masked_nan1).to_bits(), masked_nan1);
+    assert_eq!(f16::from_bits(masked_nan2).to_bits(), masked_nan2);
+}
+
+#[test]
+#[should_panic]
+fn test_clamp_min_greater_than_max() {
+    let _ = 1.0f16.clamp(3.0, 1.0);
+}
+
+#[test]
+#[should_panic]
+fn test_clamp_min_is_nan() {
+    let _ = 1.0f16.clamp(f16::NAN, 1.0);
+}
+
+#[test]
+#[should_panic]
+fn test_clamp_max_is_nan() {
+    let _ = 1.0f16.clamp(3.0, f16::NAN);
+}
+
+#[test]
+fn test_total_cmp() {
+    use core::cmp::Ordering;
+
+    fn quiet_bit_mask() -> u16 {
+        1 << (f16::MANTISSA_DIGITS - 2)
+    }
+
+    // FIXME(f16_f128): test subnormals when powf is available
+    // fn min_subnorm() -> f16 {
+    //     f16::MIN_POSITIVE / f16::powf(2.0, f16::MANTISSA_DIGITS as f16 - 1.0)
+    // }
+
+    // fn max_subnorm() -> f16 {
+    //     f16::MIN_POSITIVE - min_subnorm()
+    // }
+
+    fn q_nan() -> f16 {
+        f16::from_bits(f16::NAN.to_bits() | quiet_bit_mask())
+    }
+
+    fn s_nan() -> f16 {
+        f16::from_bits((f16::NAN.to_bits() & !quiet_bit_mask()) + 42)
+    }
+
+    assert_eq!(Ordering::Equal, (-q_nan()).total_cmp(&-q_nan()));
+    assert_eq!(Ordering::Equal, (-s_nan()).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Equal, (-f16::INFINITY).total_cmp(&-f16::INFINITY));
+    assert_eq!(Ordering::Equal, (-f16::MAX).total_cmp(&-f16::MAX));
+    assert_eq!(Ordering::Equal, (-2.5_f16).total_cmp(&-2.5));
+    assert_eq!(Ordering::Equal, (-1.0_f16).total_cmp(&-1.0));
+    assert_eq!(Ordering::Equal, (-1.5_f16).total_cmp(&-1.5));
+    assert_eq!(Ordering::Equal, (-0.5_f16).total_cmp(&-0.5));
+    assert_eq!(Ordering::Equal, (-f16::MIN_POSITIVE).total_cmp(&-f16::MIN_POSITIVE));
+    // assert_eq!(Ordering::Equal, (-max_subnorm()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Equal, (-min_subnorm()).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Equal, (-0.0_f16).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, 0.0_f16.total_cmp(&0.0));
+    // assert_eq!(Ordering::Equal, min_subnorm().total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Equal, max_subnorm().total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Equal, f16::MIN_POSITIVE.total_cmp(&f16::MIN_POSITIVE));
+    assert_eq!(Ordering::Equal, 0.5_f16.total_cmp(&0.5));
+    assert_eq!(Ordering::Equal, 1.0_f16.total_cmp(&1.0));
+    assert_eq!(Ordering::Equal, 1.5_f16.total_cmp(&1.5));
+    assert_eq!(Ordering::Equal, 2.5_f16.total_cmp(&2.5));
+    assert_eq!(Ordering::Equal, f16::MAX.total_cmp(&f16::MAX));
+    assert_eq!(Ordering::Equal, f16::INFINITY.total_cmp(&f16::INFINITY));
+    assert_eq!(Ordering::Equal, s_nan().total_cmp(&s_nan()));
+    assert_eq!(Ordering::Equal, q_nan().total_cmp(&q_nan()));
+
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f16::INFINITY));
+    assert_eq!(Ordering::Less, (-f16::INFINITY).total_cmp(&-f16::MAX));
+    assert_eq!(Ordering::Less, (-f16::MAX).total_cmp(&-2.5));
+    assert_eq!(Ordering::Less, (-2.5_f16).total_cmp(&-1.5));
+    assert_eq!(Ordering::Less, (-1.5_f16).total_cmp(&-1.0));
+    assert_eq!(Ordering::Less, (-1.0_f16).total_cmp(&-0.5));
+    assert_eq!(Ordering::Less, (-0.5_f16).total_cmp(&-f16::MIN_POSITIVE));
+    // assert_eq!(Ordering::Less, (-f16::MIN_POSITIVE).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Less, (-max_subnorm()).total_cmp(&-min_subnorm()));
+    // assert_eq!(Ordering::Less, (-min_subnorm()).total_cmp(&-0.0));
+    assert_eq!(Ordering::Less, (-0.0_f16).total_cmp(&0.0));
+    // assert_eq!(Ordering::Less, 0.0_f16.total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Less, min_subnorm().total_cmp(&max_subnorm()));
+    // assert_eq!(Ordering::Less, max_subnorm().total_cmp(&f16::MIN_POSITIVE));
+    assert_eq!(Ordering::Less, f16::MIN_POSITIVE.total_cmp(&0.5));
+    assert_eq!(Ordering::Less, 0.5_f16.total_cmp(&1.0));
+    assert_eq!(Ordering::Less, 1.0_f16.total_cmp(&1.5));
+    assert_eq!(Ordering::Less, 1.5_f16.total_cmp(&2.5));
+    assert_eq!(Ordering::Less, 2.5_f16.total_cmp(&f16::MAX));
+    assert_eq!(Ordering::Less, f16::MAX.total_cmp(&f16::INFINITY));
+    assert_eq!(Ordering::Less, f16::INFINITY.total_cmp(&s_nan()));
+    assert_eq!(Ordering::Less, s_nan().total_cmp(&q_nan()));
+
+    assert_eq!(Ordering::Greater, (-s_nan()).total_cmp(&-q_nan()));
+    assert_eq!(Ordering::Greater, (-f16::INFINITY).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Greater, (-f16::MAX).total_cmp(&-f16::INFINITY));
+    assert_eq!(Ordering::Greater, (-2.5_f16).total_cmp(&-f16::MAX));
+    assert_eq!(Ordering::Greater, (-1.5_f16).total_cmp(&-2.5));
+    assert_eq!(Ordering::Greater, (-1.0_f16).total_cmp(&-1.5));
+    assert_eq!(Ordering::Greater, (-0.5_f16).total_cmp(&-1.0));
+    assert_eq!(Ordering::Greater, (-f16::MIN_POSITIVE).total_cmp(&-0.5));
+    // assert_eq!(Ordering::Greater, (-max_subnorm()).total_cmp(&-f16::MIN_POSITIVE));
+    // assert_eq!(Ordering::Greater, (-min_subnorm()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Greater, (-0.0_f16).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Greater, 0.0_f16.total_cmp(&-0.0));
+    // assert_eq!(Ordering::Greater, min_subnorm().total_cmp(&0.0));
+    // assert_eq!(Ordering::Greater, max_subnorm().total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Greater, f16::MIN_POSITIVE.total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Greater, 0.5_f16.total_cmp(&f16::MIN_POSITIVE));
+    assert_eq!(Ordering::Greater, 1.0_f16.total_cmp(&0.5));
+    assert_eq!(Ordering::Greater, 1.5_f16.total_cmp(&1.0));
+    assert_eq!(Ordering::Greater, 2.5_f16.total_cmp(&1.5));
+    assert_eq!(Ordering::Greater, f16::MAX.total_cmp(&2.5));
+    assert_eq!(Ordering::Greater, f16::INFINITY.total_cmp(&f16::MAX));
+    assert_eq!(Ordering::Greater, s_nan().total_cmp(&f16::INFINITY));
+    assert_eq!(Ordering::Greater, q_nan().total_cmp(&s_nan()));
+
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-s_nan()));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-f16::INFINITY));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-f16::MAX));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-2.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-1.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-1.0));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-0.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-f16::MIN_POSITIVE));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&-0.0));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&0.0));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&f16::MIN_POSITIVE));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&0.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&1.0));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&1.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&2.5));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&f16::MAX));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&f16::INFINITY));
+    assert_eq!(Ordering::Less, (-q_nan()).total_cmp(&s_nan()));
+
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f16::INFINITY));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f16::MAX));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-2.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-1.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-1.0));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-0.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-f16::MIN_POSITIVE));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-max_subnorm()));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-min_subnorm()));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&-0.0));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&0.0));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&min_subnorm()));
+    // assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&max_subnorm()));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&f16::MIN_POSITIVE));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&0.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&1.0));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&1.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&2.5));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&f16::MAX));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&f16::INFINITY));
+    assert_eq!(Ordering::Less, (-s_nan()).total_cmp(&s_nan()));
+}
diff --git a/std/src/f32/tests.rs b/std/src/f32/tests.rs
index 9ca4e8f2f45fe..63e65698374c8 100644
--- a/std/src/f32/tests.rs
+++ b/std/src/f32/tests.rs
@@ -2,6 +2,45 @@ use crate::f32::consts;
 use crate::num::FpCategory as Fp;
 use crate::num::*;
 
+/// Smallest number
+#[allow(dead_code)] // unused on x86
+const TINY_BITS: u32 = 0x1;
+
+/// Next smallest number
+#[allow(dead_code)] // unused on x86
+const TINY_UP_BITS: u32 = 0x2;
+
+/// Exponent = 0b11...10, Sifnificand 0b1111..10. Min val > 0
+#[allow(dead_code)] // unused on x86
+const MAX_DOWN_BITS: u32 = 0x7f7f_fffe;
+
+/// Zeroed exponent, full significant
+#[allow(dead_code)] // unused on x86
+const LARGEST_SUBNORMAL_BITS: u32 = 0x007f_ffff;
+
+/// Exponent = 0b1, zeroed significand
+#[allow(dead_code)] // unused on x86
+const SMALLEST_NORMAL_BITS: u32 = 0x0080_0000;
+
+/// First pattern over the mantissa
+#[allow(dead_code)] // unused on x86
+const NAN_MASK1: u32 = 0x002a_aaaa;
+
+/// Second pattern over the mantissa
+#[allow(dead_code)] // unused on x86
+const NAN_MASK2: u32 = 0x0055_5555;
+
+#[allow(unused_macros)]
+macro_rules! assert_f32_biteq {
+    ($left : expr, $right : expr) => {
+        let l: &f32 = &$left;
+        let r: &f32 = &$right;
+        let lb = l.to_bits();
+        let rb = r.to_bits();
+        assert_eq!(lb, rb, "float {l} ({lb:#010x}) is not bitequal to {r} ({rb:#010x})");
+    };
+}
+
 #[test]
 fn test_num_f32() {
     test_num(10f32, 2f32);
@@ -315,27 +354,16 @@ fn test_is_sign_negative() {
     assert!((-f32::NAN).is_sign_negative());
 }
 
-#[allow(unused_macros)]
-macro_rules! assert_f32_biteq {
-    ($left : expr, $right : expr) => {
-        let l: &f32 = &$left;
-        let r: &f32 = &$right;
-        let lb = l.to_bits();
-        let rb = r.to_bits();
-        assert_eq!(lb, rb, "float {} ({:#x}) is not equal to {} ({:#x})", *l, lb, *r, rb);
-    };
-}
-
 // Ignore test on x87 floating point, these platforms do not guarantee NaN
 // payloads are preserved and flush denormals to zero, failing the tests.
 #[cfg(not(target_arch = "x86"))]
 #[test]
 fn test_next_up() {
-    let tiny = f32::from_bits(1);
-    let tiny_up = f32::from_bits(2);
-    let max_down = f32::from_bits(0x7f7f_fffe);
-    let largest_subnormal = f32::from_bits(0x007f_ffff);
-    let smallest_normal = f32::from_bits(0x0080_0000);
+    let tiny = f32::from_bits(TINY_BITS);
+    let tiny_up = f32::from_bits(TINY_UP_BITS);
+    let max_down = f32::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f32::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f32::from_bits(SMALLEST_NORMAL_BITS);
     assert_f32_biteq!(f32::NEG_INFINITY.next_up(), f32::MIN);
     assert_f32_biteq!(f32::MIN.next_up(), -max_down);
     assert_f32_biteq!((-1.0 - f32::EPSILON).next_up(), -1.0);
@@ -352,8 +380,8 @@ fn test_next_up() {
 
     // Check that NaNs roundtrip.
     let nan0 = f32::NAN;
-    let nan1 = f32::from_bits(f32::NAN.to_bits() ^ 0x002a_aaaa);
-    let nan2 = f32::from_bits(f32::NAN.to_bits() ^ 0x0055_5555);
+    let nan1 = f32::from_bits(f32::NAN.to_bits() ^ NAN_MASK1);
+    let nan2 = f32::from_bits(f32::NAN.to_bits() ^ NAN_MASK2);
     assert_f32_biteq!(nan0.next_up(), nan0);
     assert_f32_biteq!(nan1.next_up(), nan1);
     assert_f32_biteq!(nan2.next_up(), nan2);
@@ -364,11 +392,11 @@ fn test_next_up() {
 #[cfg(not(target_arch = "x86"))]
 #[test]
 fn test_next_down() {
-    let tiny = f32::from_bits(1);
-    let tiny_up = f32::from_bits(2);
-    let max_down = f32::from_bits(0x7f7f_fffe);
-    let largest_subnormal = f32::from_bits(0x007f_ffff);
-    let smallest_normal = f32::from_bits(0x0080_0000);
+    let tiny = f32::from_bits(TINY_BITS);
+    let tiny_up = f32::from_bits(TINY_UP_BITS);
+    let max_down = f32::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f32::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f32::from_bits(SMALLEST_NORMAL_BITS);
     assert_f32_biteq!(f32::NEG_INFINITY.next_down(), f32::NEG_INFINITY);
     assert_f32_biteq!(f32::MIN.next_down(), f32::NEG_INFINITY);
     assert_f32_biteq!((-max_down).next_down(), f32::MIN);
@@ -386,8 +414,8 @@ fn test_next_down() {
 
     // Check that NaNs roundtrip.
     let nan0 = f32::NAN;
-    let nan1 = f32::from_bits(f32::NAN.to_bits() ^ 0x002a_aaaa);
-    let nan2 = f32::from_bits(f32::NAN.to_bits() ^ 0x0055_5555);
+    let nan1 = f32::from_bits(f32::NAN.to_bits() ^ NAN_MASK1);
+    let nan2 = f32::from_bits(f32::NAN.to_bits() ^ NAN_MASK2);
     assert_f32_biteq!(nan0.next_down(), nan0);
     assert_f32_biteq!(nan1.next_down(), nan1);
     assert_f32_biteq!(nan2.next_down(), nan2);
@@ -734,8 +762,8 @@ fn test_float_bits_conv() {
 
     // Check that NaNs roundtrip their bits regardless of signaling-ness
     // 0xA is 0b1010; 0x5 is 0b0101 -- so these two together clobbers all the mantissa bits
-    let masked_nan1 = f32::NAN.to_bits() ^ 0x002A_AAAA;
-    let masked_nan2 = f32::NAN.to_bits() ^ 0x0055_5555;
+    let masked_nan1 = f32::NAN.to_bits() ^ NAN_MASK1;
+    let masked_nan2 = f32::NAN.to_bits() ^ NAN_MASK2;
     assert!(f32::from_bits(masked_nan1).is_nan());
     assert!(f32::from_bits(masked_nan2).is_nan());
 
diff --git a/std/src/f64.rs b/std/src/f64.rs
index f8c66a3e71752..1ca2b32e241c9 100644
--- a/std/src/f64.rs
+++ b/std/src/f64.rs
@@ -520,7 +520,7 @@ impl f64 {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn ln(self) -> f64 {
-        crate::sys::log_wrapper(self, |n| unsafe { intrinsics::logf64(n) })
+        unsafe { intrinsics::logf64(self) }
     }
 
     /// Returns the logarithm of the number with respect to an arbitrary base.
@@ -574,7 +574,7 @@ impl f64 {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn log2(self) -> f64 {
-        crate::sys::log_wrapper(self, crate::sys::log2f64)
+        crate::sys::log2f64(self)
     }
 
     /// Returns the base 10 logarithm of the number.
@@ -599,7 +599,7 @@ impl f64 {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn log10(self) -> f64 {
-        crate::sys::log_wrapper(self, |n| unsafe { intrinsics::log10f64(n) })
+        unsafe { intrinsics::log10f64(self) }
     }
 
     /// The positive difference of two numbers.
diff --git a/std/src/f64/tests.rs b/std/src/f64/tests.rs
index f88d01593b5e4..d9e17fd601d2d 100644
--- a/std/src/f64/tests.rs
+++ b/std/src/f64/tests.rs
@@ -2,6 +2,45 @@ use crate::f64::consts;
 use crate::num::FpCategory as Fp;
 use crate::num::*;
 
+/// Smallest number
+#[allow(dead_code)] // unused on x86
+const TINY_BITS: u64 = 0x1;
+
+/// Next smallest number
+#[allow(dead_code)] // unused on x86
+const TINY_UP_BITS: u64 = 0x2;
+
+/// Exponent = 0b11...10, Sifnificand 0b1111..10. Min val > 0
+#[allow(dead_code)] // unused on x86
+const MAX_DOWN_BITS: u64 = 0x7fef_ffff_ffff_fffe;
+
+/// Zeroed exponent, full significant
+#[allow(dead_code)] // unused on x86
+const LARGEST_SUBNORMAL_BITS: u64 = 0x000f_ffff_ffff_ffff;
+
+/// Exponent = 0b1, zeroed significand
+#[allow(dead_code)] // unused on x86
+const SMALLEST_NORMAL_BITS: u64 = 0x0010_0000_0000_0000;
+
+/// First pattern over the mantissa
+#[allow(dead_code)] // unused on x86
+const NAN_MASK1: u64 = 0x000a_aaaa_aaaa_aaaa;
+
+/// Second pattern over the mantissa
+#[allow(dead_code)] // unused on x86
+const NAN_MASK2: u64 = 0x0005_5555_5555_5555;
+
+#[allow(unused_macros)]
+macro_rules! assert_f64_biteq {
+    ($left : expr, $right : expr) => {
+        let l: &f64 = &$left;
+        let r: &f64 = &$right;
+        let lb = l.to_bits();
+        let rb = r.to_bits();
+        assert_eq!(lb, rb, "float {l} ({lb:#018x}) is not bitequal to {r} ({rb:#018x})");
+    };
+}
+
 #[test]
 fn test_num_f64() {
     test_num(10f64, 2f64);
@@ -305,27 +344,16 @@ fn test_is_sign_negative() {
     assert!((-f64::NAN).is_sign_negative());
 }
 
-#[allow(unused_macros)]
-macro_rules! assert_f64_biteq {
-    ($left : expr, $right : expr) => {
-        let l: &f64 = &$left;
-        let r: &f64 = &$right;
-        let lb = l.to_bits();
-        let rb = r.to_bits();
-        assert_eq!(lb, rb, "float {} ({:#x}) is not equal to {} ({:#x})", *l, lb, *r, rb);
-    };
-}
-
 // Ignore test on x87 floating point, these platforms do not guarantee NaN
 // payloads are preserved and flush denormals to zero, failing the tests.
 #[cfg(not(target_arch = "x86"))]
 #[test]
 fn test_next_up() {
-    let tiny = f64::from_bits(1);
-    let tiny_up = f64::from_bits(2);
-    let max_down = f64::from_bits(0x7fef_ffff_ffff_fffe);
-    let largest_subnormal = f64::from_bits(0x000f_ffff_ffff_ffff);
-    let smallest_normal = f64::from_bits(0x0010_0000_0000_0000);
+    let tiny = f64::from_bits(TINY_BITS);
+    let tiny_up = f64::from_bits(TINY_UP_BITS);
+    let max_down = f64::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f64::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f64::from_bits(SMALLEST_NORMAL_BITS);
     assert_f64_biteq!(f64::NEG_INFINITY.next_up(), f64::MIN);
     assert_f64_biteq!(f64::MIN.next_up(), -max_down);
     assert_f64_biteq!((-1.0 - f64::EPSILON).next_up(), -1.0);
@@ -341,8 +369,8 @@ fn test_next_up() {
     assert_f64_biteq!(f64::INFINITY.next_up(), f64::INFINITY);
 
     let nan0 = f64::NAN;
-    let nan1 = f64::from_bits(f64::NAN.to_bits() ^ 0x000a_aaaa_aaaa_aaaa);
-    let nan2 = f64::from_bits(f64::NAN.to_bits() ^ 0x0005_5555_5555_5555);
+    let nan1 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK1);
+    let nan2 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK2);
     assert_f64_biteq!(nan0.next_up(), nan0);
     assert_f64_biteq!(nan1.next_up(), nan1);
     assert_f64_biteq!(nan2.next_up(), nan2);
@@ -353,11 +381,11 @@ fn test_next_up() {
 #[cfg(not(target_arch = "x86"))]
 #[test]
 fn test_next_down() {
-    let tiny = f64::from_bits(1);
-    let tiny_up = f64::from_bits(2);
-    let max_down = f64::from_bits(0x7fef_ffff_ffff_fffe);
-    let largest_subnormal = f64::from_bits(0x000f_ffff_ffff_ffff);
-    let smallest_normal = f64::from_bits(0x0010_0000_0000_0000);
+    let tiny = f64::from_bits(TINY_BITS);
+    let tiny_up = f64::from_bits(TINY_UP_BITS);
+    let max_down = f64::from_bits(MAX_DOWN_BITS);
+    let largest_subnormal = f64::from_bits(LARGEST_SUBNORMAL_BITS);
+    let smallest_normal = f64::from_bits(SMALLEST_NORMAL_BITS);
     assert_f64_biteq!(f64::NEG_INFINITY.next_down(), f64::NEG_INFINITY);
     assert_f64_biteq!(f64::MIN.next_down(), f64::NEG_INFINITY);
     assert_f64_biteq!((-max_down).next_down(), f64::MIN);
@@ -374,8 +402,8 @@ fn test_next_down() {
     assert_f64_biteq!(f64::INFINITY.next_down(), f64::MAX);
 
     let nan0 = f64::NAN;
-    let nan1 = f64::from_bits(f64::NAN.to_bits() ^ 0x000a_aaaa_aaaa_aaaa);
-    let nan2 = f64::from_bits(f64::NAN.to_bits() ^ 0x0005_5555_5555_5555);
+    let nan1 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK1);
+    let nan2 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK2);
     assert_f64_biteq!(nan0.next_down(), nan0);
     assert_f64_biteq!(nan1.next_down(), nan1);
     assert_f64_biteq!(nan2.next_down(), nan2);
@@ -715,9 +743,8 @@ fn test_float_bits_conv() {
     assert_approx_eq!(f64::from_bits(0xc02c800000000000), -14.25);
 
     // Check that NaNs roundtrip their bits regardless of signaling-ness
-    // 0xA is 0b1010; 0x5 is 0b0101 -- so these two together clobbers all the mantissa bits
-    let masked_nan1 = f64::NAN.to_bits() ^ 0x000A_AAAA_AAAA_AAAA;
-    let masked_nan2 = f64::NAN.to_bits() ^ 0x0005_5555_5555_5555;
+    let masked_nan1 = f64::NAN.to_bits() ^ NAN_MASK1;
+    let masked_nan2 = f64::NAN.to_bits() ^ NAN_MASK2;
     assert!(f64::from_bits(masked_nan1).is_nan());
     assert!(f64::from_bits(masked_nan2).is_nan());
 
diff --git a/std/src/ffi/os_str.rs b/std/src/ffi/os_str.rs
index 9dd3d7d3fa16a..f9dba08da4c3c 100644
--- a/std/src/ffi/os_str.rs
+++ b/std/src/ffi/os_str.rs
@@ -184,7 +184,7 @@ impl OsString {
     #[inline]
     #[stable(feature = "os_str_bytes", since = "1.74.0")]
     pub unsafe fn from_encoded_bytes_unchecked(bytes: Vec<u8>) -> Self {
-        OsString { inner: Buf::from_encoded_bytes_unchecked(bytes) }
+        OsString { inner: unsafe { Buf::from_encoded_bytes_unchecked(bytes) } }
     }
 
     /// Converts to an [`OsStr`] slice.
@@ -533,10 +533,39 @@ impl OsString {
         unsafe { Box::from_raw(rw) }
     }
 
-    /// Part of a hack to make PathBuf::push/pop more efficient.
+    /// Consumes and leaks the `OsString`, returning a mutable reference to the contents,
+    /// `&'a mut OsStr`.
+    ///
+    /// The caller has free choice over the returned lifetime, including 'static.
+    /// Indeed, this function is ideally used for data that lives for the remainder of
+    /// the program’s life, as dropping the returned reference will cause a memory leak.
+    ///
+    /// It does not reallocate or shrink the `OsString`, so the leaked allocation may include
+    /// unused capacity that is not part of the returned slice. If you want to discard excess
+    /// capacity, call [`into_boxed_os_str`], and then [`Box::leak`] instead.
+    /// However, keep in mind that trimming the capacity may result in a reallocation and copy.
+    ///
+    /// [`into_boxed_os_str`]: Self::into_boxed_os_str
+    #[unstable(feature = "os_string_pathbuf_leak", issue = "125965")]
+    #[inline]
+    pub fn leak<'a>(self) -> &'a mut OsStr {
+        OsStr::from_inner_mut(self.inner.leak())
+    }
+
+    /// Provides plumbing to core `Vec::truncate`.
+    /// More well behaving alternative to allowing outer types
+    /// full mutable access to the core `Vec`.
+    #[inline]
+    pub(crate) fn truncate(&mut self, len: usize) {
+        self.inner.truncate(len);
+    }
+
+    /// Provides plumbing to core `Vec::extend_from_slice`.
+    /// More well behaving alternative to allowing outer types
+    /// full mutable access to the core `Vec`.
     #[inline]
-    pub(crate) fn as_mut_vec_for_path_buf(&mut self) -> &mut Vec<u8> {
-        self.inner.as_mut_vec_for_path_buf()
+    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+        self.inner.extend_from_slice(other);
     }
 }
 
@@ -784,7 +813,7 @@ impl OsStr {
     #[inline]
     #[stable(feature = "os_str_bytes", since = "1.74.0")]
     pub unsafe fn from_encoded_bytes_unchecked(bytes: &[u8]) -> &Self {
-        Self::from_inner(Slice::from_encoded_bytes_unchecked(bytes))
+        Self::from_inner(unsafe { Slice::from_encoded_bytes_unchecked(bytes) })
     }
 
     #[inline]
diff --git a/std/src/ffi/os_str/tests.rs b/std/src/ffi/os_str/tests.rs
index b020e05eaab20..5b39b9e34d8c7 100644
--- a/std/src/ffi/os_str/tests.rs
+++ b/std/src/ffi/os_str/tests.rs
@@ -23,6 +23,15 @@ fn test_os_string_clear() {
     assert_eq!(0, os_string.inner.as_inner().len());
 }
 
+#[test]
+fn test_os_string_leak() {
+    let os_string = OsString::from("have a cake");
+    let (len, cap) = (os_string.len(), os_string.capacity());
+    let leaked = os_string.leak();
+    assert_eq!(leaked.as_encoded_bytes(), b"have a cake");
+    unsafe { drop(String::from_raw_parts(leaked as *mut OsStr as _, len, cap)) }
+}
+
 #[test]
 fn test_os_string_capacity() {
     let os_string = OsString::with_capacity(0);
diff --git a/std/src/fs.rs b/std/src/fs.rs
index 77e94365b08ec..6413b3515ecec 100644
--- a/std/src/fs.rs
+++ b/std/src/fs.rs
@@ -767,11 +767,33 @@ fn buffer_capacity_required(mut file: &File) -> Option<usize> {
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Read for &File {
+    /// Read some bytes from the file.
+    ///
+    /// See [`Read::read`] docs for more info.
+    ///
+    /// # Platform-specific behavior
+    ///
+    /// This function currently corresponds to the `read` function on Unix and
+    /// the `NtReadFile` function on Windows. Note that this [may change in
+    /// the future][changes].
+    ///
+    /// [changes]: io#platform-specific-behavior
     #[inline]
     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
         self.inner.read(buf)
     }
 
+    /// Like `read`, except that it reads into a slice of buffers.
+    ///
+    /// See [`Read::read_vectored`] docs for more info.
+    ///
+    /// # Platform-specific behavior
+    ///
+    /// This function currently corresponds to the `readv` function on Unix and
+    /// falls back to the `read` implementation on Windows. Note that this
+    /// [may change in the future][changes].
+    ///
+    /// [changes]: io#platform-specific-behavior
     #[inline]
     fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
         self.inner.read_vectored(bufs)
@@ -782,6 +804,16 @@ impl Read for &File {
         self.inner.read_buf(cursor)
     }
 
+    /// Determines if `File` has an efficient `read_vectored` implementation.
+    ///
+    /// See [`Read::is_read_vectored`] docs for more info.
+    ///
+    /// # Platform-specific behavior
+    ///
+    /// This function currently returns `true` on Unix an `false` on Windows.
+    /// Note that this [may change in the future][changes].
+    ///
+    /// [changes]: io#platform-specific-behavior
     #[inline]
     fn is_read_vectored(&self) -> bool {
         self.inner.is_read_vectored()
@@ -803,19 +835,63 @@ impl Read for &File {
 }
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Write for &File {
+    /// Write some bytes from the file.
+    ///
+    /// See [`Write::write`] docs for more info.
+    ///
+    /// # Platform-specific behavior
+    ///
+    /// This function currently corresponds to the `write` function on Unix and
+    /// the `NtWriteFile` function on Windows. Note that this [may change in
+    /// the future][changes].
+    ///
+    /// [changes]: io#platform-specific-behavior
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
         self.inner.write(buf)
     }
 
+    /// Like `write`, except that it writes into a slice of buffers.
+    ///
+    /// See [`Write::write_vectored`] docs for more info.
+    ///
+    /// # Platform-specific behavior
+    ///
+    /// This function currently corresponds to the `writev` function on Unix
+    /// and falls back to the `write` implementation on Windows. Note that this
+    /// [may change in the future][changes].
+    ///
+    /// [changes]: io#platform-specific-behavior
     fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
         self.inner.write_vectored(bufs)
     }
 
+    /// Determines if `File` has an efficient `write_vectored` implementation.
+    ///
+    /// See [`Write::is_write_vectored`] docs for more info.
+    ///
+    /// # Platform-specific behavior
+    ///
+    /// This function currently returns `true` on Unix an `false` on Windows.
+    /// Note that this [may change in the future][changes].
+    ///
+    /// [changes]: io#platform-specific-behavior
     #[inline]
     fn is_write_vectored(&self) -> bool {
         self.inner.is_write_vectored()
     }
 
+    /// Flushes the file, ensuring that all intermediately buffered contents
+    /// reach their destination.
+    ///
+    /// See [`Write::flush`] docs for more info.
+    ///
+    /// # Platform-specific behavior
+    ///
+    /// Since a `File` structure doesn't contain any buffers, this function is
+    /// currently a no-op on Unix and Windows. Note that this [may change in
+    /// the future][changes].
+    ///
+    /// [changes]: io#platform-specific-behavior
     #[inline]
     fn flush(&mut self) -> io::Result<()> {
         self.inner.flush()
@@ -2226,7 +2302,7 @@ pub fn read_link<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
 ///
 /// This function currently corresponds to the `realpath` function on Unix
 /// and the `CreateFile` and `GetFinalPathNameByHandle` functions on Windows.
-/// Note that, this [may change in the future][changes].
+/// Note that this [may change in the future][changes].
 ///
 /// On Windows, this converts the path to use [extended length path][path]
 /// syntax, which allows your program to use longer path names, but means you
@@ -2310,6 +2386,9 @@ pub fn create_dir<P: AsRef<Path>>(path: P) -> io::Result<()> {
 /// If this function returns an error, some of the parent components might have
 /// been created already.
 ///
+/// If the empty path is passed to this function, it always succeeds without
+/// creating any directories.
+///
 /// # Platform-specific behavior
 ///
 /// This function currently corresponds to multiple calls to the `mkdir`
@@ -2663,18 +2742,15 @@ impl AsInnerMut<fs_imp::DirBuilder> for DirBuilder {
 /// # Examples
 ///
 /// ```no_run
-/// #![feature(fs_try_exists)]
 /// use std::fs;
 ///
-/// assert!(!fs::try_exists("does_not_exist.txt").expect("Can't check existence of file does_not_exist.txt"));
-/// assert!(fs::try_exists("/root/secret_file.txt").is_err());
+/// assert!(!fs::exists("does_not_exist.txt").expect("Can't check existence of file does_not_exist.txt"));
+/// assert!(fs::exists("/root/secret_file.txt").is_err());
 /// ```
 ///
 /// [`Path::exists`]: crate::path::Path::exists
-// FIXME: stabilization should modify documentation of `exists()` to recommend this method
-// instead.
-#[unstable(feature = "fs_try_exists", issue = "83186")]
+#[stable(feature = "fs_try_exists", since = "CURRENT_RUSTC_VERSION")]
 #[inline]
-pub fn try_exists<P: AsRef<Path>>(path: P) -> io::Result<bool> {
-    fs_imp::try_exists(path.as_ref())
+pub fn exists<P: AsRef<Path>>(path: P) -> io::Result<bool> {
+    fs_imp::exists(path.as_ref())
 }
diff --git a/std/src/fs/tests.rs b/std/src/fs/tests.rs
index dfa05671ab0f1..5ca631399aa4a 100644
--- a/std/src/fs/tests.rs
+++ b/std/src/fs/tests.rs
@@ -406,7 +406,7 @@ fn file_test_read_buf() {
     let filename = &tmpdir.join("test");
     check!(fs::write(filename, &[1, 2, 3, 4]));
 
-    let mut buf: [MaybeUninit<u8>; 128] = MaybeUninit::uninit_array();
+    let mut buf: [MaybeUninit<u8>; 128] = [MaybeUninit::uninit(); 128];
     let mut buf = BorrowedBuf::from(buf.as_mut_slice());
     let mut file = check!(File::open(filename));
     check!(file.read_buf(buf.unfilled()));
@@ -1431,7 +1431,7 @@ fn metadata_access_times() {
     assert_eq!(check!(a.modified()), check!(a.modified()));
     assert_eq!(check!(b.accessed()), check!(b.modified()));
 
-    if cfg!(target_os = "macos") || cfg!(target_os = "windows") {
+    if cfg!(target_vendor = "apple") || cfg!(target_os = "windows") {
         check!(a.created());
         check!(b.created());
     }
@@ -1638,16 +1638,8 @@ fn rename_directory() {
 
 #[test]
 fn test_file_times() {
-    #[cfg(target_os = "ios")]
-    use crate::os::ios::fs::FileTimesExt;
-    #[cfg(target_os = "macos")]
-    use crate::os::macos::fs::FileTimesExt;
-    #[cfg(target_os = "tvos")]
-    use crate::os::tvos::fs::FileTimesExt;
-    #[cfg(target_os = "visionos")]
-    use crate::os::visionos::fs::FileTimesExt;
-    #[cfg(target_os = "watchos")]
-    use crate::os::watchos::fs::FileTimesExt;
+    #[cfg(target_vendor = "apple")]
+    use crate::os::darwin::fs::FileTimesExt;
     #[cfg(windows)]
     use crate::os::windows::fs::FileTimesExt;
 
@@ -1693,16 +1685,7 @@ fn test_file_times() {
 #[test]
 #[cfg(target_vendor = "apple")]
 fn test_file_times_pre_epoch_with_nanos() {
-    #[cfg(target_os = "ios")]
-    use crate::os::ios::fs::FileTimesExt;
-    #[cfg(target_os = "macos")]
-    use crate::os::macos::fs::FileTimesExt;
-    #[cfg(target_os = "tvos")]
-    use crate::os::tvos::fs::FileTimesExt;
-    #[cfg(target_os = "visionos")]
-    use crate::os::visionos::fs::FileTimesExt;
-    #[cfg(target_os = "watchos")]
-    use crate::os::watchos::fs::FileTimesExt;
+    use crate::os::darwin::fs::FileTimesExt;
 
     let tmp = tmpdir();
     let file = File::create(tmp.join("foo")).unwrap();
diff --git a/std/src/hash/random.rs b/std/src/hash/random.rs
index a1ccbb25369bf..0adf91e14ac6e 100644
--- a/std/src/hash/random.rs
+++ b/std/src/hash/random.rs
@@ -6,6 +6,7 @@
 //! outside this crate.
 //!
 //! [`collections`]: crate::collections
+
 #[allow(deprecated)]
 use super::{BuildHasher, Hasher, SipHasher13};
 use crate::cell::Cell;
diff --git a/std/src/io/buffered/bufreader/buffer.rs b/std/src/io/buffered/bufreader/buffer.rs
index e9e29d60ca282..796137c0123e7 100644
--- a/std/src/io/buffered/bufreader/buffer.rs
+++ b/std/src/io/buffered/bufreader/buffer.rs
@@ -1,13 +1,14 @@
-///! An encapsulation of `BufReader`'s buffer management logic.
-///
-/// This module factors out the basic functionality of `BufReader` in order to protect two core
-/// invariants:
-/// * `filled` bytes of `buf` are always initialized
-/// * `pos` is always <= `filled`
-/// Since this module encapsulates the buffer management logic, we can ensure that the range
-/// `pos..filled` is always a valid index into the initialized region of the buffer. This means
-/// that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so
-/// without encountering any runtime bounds checks.
+//! An encapsulation of `BufReader`'s buffer management logic.
+//!
+//! This module factors out the basic functionality of `BufReader` in order to protect two core
+//! invariants:
+//! * `filled` bytes of `buf` are always initialized
+//! * `pos` is always <= `filled`
+//! Since this module encapsulates the buffer management logic, we can ensure that the range
+//! `pos..filled` is always a valid index into the initialized region of the buffer. This means
+//! that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so
+//! without encountering any runtime bounds checks.
+
 use crate::cmp;
 use crate::io::{self, BorrowedBuf, Read};
 use crate::mem::MaybeUninit;
diff --git a/std/src/io/buffered/bufwriter.rs b/std/src/io/buffered/bufwriter.rs
index 2d13230ffbabd..a8680e9b6ead1 100644
--- a/std/src/io/buffered/bufwriter.rs
+++ b/std/src/io/buffered/bufwriter.rs
@@ -3,7 +3,7 @@ use crate::fmt;
 use crate::io::{
     self, ErrorKind, IntoInnerError, IoSlice, Seek, SeekFrom, Write, DEFAULT_BUF_SIZE,
 };
-use crate::mem;
+use crate::mem::{self, ManuallyDrop};
 use crate::ptr;
 
 /// Wraps a writer and buffers its output.
@@ -164,13 +164,13 @@ impl<W: Write> BufWriter<W> {
     /// assert_eq!(&buffered_data.unwrap(), b"ata");
     /// ```
     #[stable(feature = "bufwriter_into_parts", since = "1.56.0")]
-    pub fn into_parts(mut self) -> (W, Result<Vec<u8>, WriterPanicked>) {
-        let buf = mem::take(&mut self.buf);
-        let buf = if !self.panicked { Ok(buf) } else { Err(WriterPanicked { buf }) };
+    pub fn into_parts(self) -> (W, Result<Vec<u8>, WriterPanicked>) {
+        let mut this = ManuallyDrop::new(self);
+        let buf = mem::take(&mut this.buf);
+        let buf = if !this.panicked { Ok(buf) } else { Err(WriterPanicked { buf }) };
 
-        // SAFETY: forget(self) prevents double dropping inner
-        let inner = unsafe { ptr::read(&self.inner) };
-        mem::forget(self);
+        // SAFETY: double-drops are prevented by putting `this` in a ManuallyDrop that is never dropped
+        let inner = unsafe { ptr::read(&this.inner) };
 
         (inner, buf)
     }
@@ -433,9 +433,11 @@ impl<W: ?Sized + Write> BufWriter<W> {
         let old_len = self.buf.len();
         let buf_len = buf.len();
         let src = buf.as_ptr();
-        let dst = self.buf.as_mut_ptr().add(old_len);
-        ptr::copy_nonoverlapping(src, dst, buf_len);
-        self.buf.set_len(old_len + buf_len);
+        unsafe {
+            let dst = self.buf.as_mut_ptr().add(old_len);
+            ptr::copy_nonoverlapping(src, dst, buf_len);
+            self.buf.set_len(old_len + buf_len);
+        }
     }
 
     #[inline]
diff --git a/std/src/io/buffered/tests.rs b/std/src/io/buffered/tests.rs
index ee0db30e22c2e..ab66deaf31d22 100644
--- a/std/src/io/buffered/tests.rs
+++ b/std/src/io/buffered/tests.rs
@@ -1067,3 +1067,13 @@ fn bufreader_full_initialize() {
     // But we initialized the whole buffer!
     assert_eq!(reader.initialized(), reader.capacity());
 }
+
+/// This is a regression test for https://github.com/rust-lang/rust/issues/127584.
+#[test]
+fn bufwriter_aliasing() {
+    use crate::io::{BufWriter, Cursor};
+    let mut v = vec![0; 1024];
+    let c = Cursor::new(&mut v);
+    let w = BufWriter::new(Box::new(c));
+    let _ = w.into_parts();
+}
diff --git a/std/src/io/cursor.rs b/std/src/io/cursor.rs
index a1a8b2a3505c7..2ed64a40495ef 100644
--- a/std/src/io/cursor.rs
+++ b/std/src/io/cursor.rs
@@ -482,7 +482,7 @@ where
     A: Allocator,
 {
     debug_assert!(vec.capacity() >= pos + buf.len());
-    vec.as_mut_ptr().add(pos).copy_from(buf.as_ptr(), buf.len());
+    unsafe { vec.as_mut_ptr().add(pos).copy_from(buf.as_ptr(), buf.len()) };
     pos + buf.len()
 }
 
diff --git a/std/src/io/error/repr_bitpacked.rs b/std/src/io/error/repr_bitpacked.rs
index 6f8d5e3777568..fbb74967df3f1 100644
--- a/std/src/io/error/repr_bitpacked.rs
+++ b/std/src/io/error/repr_bitpacked.rs
@@ -28,7 +28,7 @@
 //!
 //! # Layout
 //! Tagged values are 64 bits, with the 2 least significant bits used for the
-//! tag. This means there are there are 4 "variants":
+//! tag. This means there are 4 "variants":
 //!
 //! - **Tag 0b00**: The first variant is equivalent to
 //!   `ErrorData::SimpleMessage`, and holds a `&'static SimpleMessage` directly.
@@ -104,7 +104,6 @@
 
 use super::{Custom, ErrorData, ErrorKind, RawOsError, SimpleMessage};
 use core::marker::PhantomData;
-use core::mem::{align_of, size_of};
 use core::ptr::{self, NonNull};
 
 // The 2 least-significant bits are used as tag.
@@ -268,11 +267,14 @@ where
                 // Using this rather than unwrap meaningfully improves the code
                 // for callers which only care about one variant (usually
                 // `Custom`)
-                core::hint::unreachable_unchecked();
+                unsafe { core::hint::unreachable_unchecked() };
             });
             ErrorData::Simple(kind)
         }
-        TAG_SIMPLE_MESSAGE => ErrorData::SimpleMessage(&*ptr.cast::<SimpleMessage>().as_ptr()),
+        TAG_SIMPLE_MESSAGE => {
+            // SAFETY: per tag
+            unsafe { ErrorData::SimpleMessage(&*ptr.cast::<SimpleMessage>().as_ptr()) }
+        }
         TAG_CUSTOM => {
             // It would be correct for us to use `ptr::byte_sub` here (see the
             // comment above the `wrapping_add` call in `new_custom` for why),
diff --git a/std/src/io/mod.rs b/std/src/io/mod.rs
index f55ec1588f91d..1345a30361e28 100644
--- a/std/src/io/mod.rs
+++ b/std/src/io/mod.rs
@@ -382,11 +382,11 @@ pub(crate) unsafe fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize
 where
     F: FnOnce(&mut Vec<u8>) -> Result<usize>,
 {
-    let mut g = Guard { len: buf.len(), buf: buf.as_mut_vec() };
+    let mut g = Guard { len: buf.len(), buf: unsafe { buf.as_mut_vec() } };
     let ret = f(g.buf);
 
     // SAFETY: the caller promises to only append data to `buf`
-    let appended = g.buf.get_unchecked(g.len..);
+    let appended = unsafe { g.buf.get_unchecked(g.len..) };
     if str::from_utf8(appended).is_err() {
         ret.and_then(|_| Err(Error::INVALID_UTF8))
     } else {
@@ -1256,8 +1256,6 @@ impl<'a> IoSliceMut<'a> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(io_slice_advance)]
-    ///
     /// use std::io::IoSliceMut;
     /// use std::ops::Deref;
     ///
@@ -1268,7 +1266,7 @@ impl<'a> IoSliceMut<'a> {
     /// buf.advance(3);
     /// assert_eq!(buf.deref(), [1; 5].as_ref());
     /// ```
-    #[unstable(feature = "io_slice_advance", issue = "62726")]
+    #[stable(feature = "io_slice_advance", since = "CURRENT_RUSTC_VERSION")]
     #[inline]
     pub fn advance(&mut self, n: usize) {
         self.0.advance(n)
@@ -1290,8 +1288,6 @@ impl<'a> IoSliceMut<'a> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(io_slice_advance)]
-    ///
     /// use std::io::IoSliceMut;
     /// use std::ops::Deref;
     ///
@@ -1309,7 +1305,7 @@ impl<'a> IoSliceMut<'a> {
     /// assert_eq!(bufs[0].deref(), [2; 14].as_ref());
     /// assert_eq!(bufs[1].deref(), [3; 8].as_ref());
     /// ```
-    #[unstable(feature = "io_slice_advance", issue = "62726")]
+    #[stable(feature = "io_slice_advance", since = "CURRENT_RUSTC_VERSION")]
     #[inline]
     pub fn advance_slices(bufs: &mut &mut [IoSliceMut<'a>], n: usize) {
         // Number of buffers to remove.
@@ -1400,8 +1396,6 @@ impl<'a> IoSlice<'a> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(io_slice_advance)]
-    ///
     /// use std::io::IoSlice;
     /// use std::ops::Deref;
     ///
@@ -1412,7 +1406,7 @@ impl<'a> IoSlice<'a> {
     /// buf.advance(3);
     /// assert_eq!(buf.deref(), [1; 5].as_ref());
     /// ```
-    #[unstable(feature = "io_slice_advance", issue = "62726")]
+    #[stable(feature = "io_slice_advance", since = "CURRENT_RUSTC_VERSION")]
     #[inline]
     pub fn advance(&mut self, n: usize) {
         self.0.advance(n)
@@ -1434,8 +1428,6 @@ impl<'a> IoSlice<'a> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(io_slice_advance)]
-    ///
     /// use std::io::IoSlice;
     /// use std::ops::Deref;
     ///
@@ -1452,7 +1444,7 @@ impl<'a> IoSlice<'a> {
     /// IoSlice::advance_slices(&mut bufs, 10);
     /// assert_eq!(bufs[0].deref(), [2; 14].as_ref());
     /// assert_eq!(bufs[1].deref(), [3; 8].as_ref());
-    #[unstable(feature = "io_slice_advance", issue = "62726")]
+    #[stable(feature = "io_slice_advance", since = "CURRENT_RUSTC_VERSION")]
     #[inline]
     pub fn advance_slices(bufs: &mut &mut [IoSlice<'a>], n: usize) {
         // Number of buffers to remove.
@@ -2058,7 +2050,7 @@ pub trait Seek {
     /// ```
     ///
     /// [`BufReader`]: crate::io::BufReader
-    #[stable(feature = "seek_seek_relative", since = "CURRENT_RUSTC_VERSION")]
+    #[stable(feature = "seek_seek_relative", since = "1.80.0")]
     fn seek_relative(&mut self, offset: i64) -> Result<()> {
         self.seek(SeekFrom::Current(offset))?;
         Ok(())
diff --git a/std/src/io/stdio.rs b/std/src/io/stdio.rs
index c8968b74b12d1..9aee2bb5e1c5c 100644
--- a/std/src/io/stdio.rs
+++ b/std/src/io/stdio.rs
@@ -1190,9 +1190,8 @@ pub trait IsTerminal: crate::sealed::Sealed {
     ///
     /// - If you run this example by piping some text to it, e.g. `echo "foo" | path/to/executable`
     ///   it will print: `Hello foo`.
-    /// - If you instead run the example interactively by running the executable directly, it will
-    ///   panic with the message "Expected input to be piped to the process".
-    ///
+    /// - If you instead run the example interactively by running `path/to/executable` directly, it will
+    ///   prompt for input.
     ///
     /// [changes]: io#platform-specific-behavior
     /// [`Stdin`]: crate::io::Stdin
diff --git a/std/src/lib.rs b/std/src/lib.rs
index 4a18db3d5a3fc..f0a73a308a4a4 100644
--- a/std/src/lib.rs
+++ b/std/src/lib.rs
@@ -252,6 +252,7 @@
 #![allow(internal_features)]
 #![deny(rustc::existing_doc_keyword)]
 #![deny(fuzzy_provenance_casts)]
+#![deny(unsafe_op_in_unsafe_fn)]
 #![allow(rustdoc::redundant_explicit_links)]
 // Ensure that std can be linked against panic_abort despite compiled with `-C panic=unwind`
 #![deny(ffi_unwind_calls)]
@@ -266,6 +267,7 @@
 )]
 #![cfg_attr(any(windows, target_os = "uefi"), feature(round_char_boundary))]
 #![cfg_attr(target_family = "wasm", feature(stdarch_wasm_atomic_wait))]
+#![cfg_attr(target_arch = "wasm64", feature(simd_wasm64))]
 #![cfg_attr(
     all(any(target_arch = "x86_64", target_arch = "x86"), target_os = "uefi"),
     feature(stdarch_x86_has_cpuid)
@@ -273,18 +275,17 @@
 //
 // Language features:
 // tidy-alphabetical-start
+#![cfg_attr(bootstrap, feature(c_unwind))]
 #![feature(alloc_error_handler)]
 #![feature(allocator_internals)]
 #![feature(allow_internal_unsafe)]
 #![feature(allow_internal_unstable)]
 #![feature(asm_experimental_arch)]
-#![feature(c_unwind)]
 #![feature(cfg_sanitizer_cfi)]
 #![feature(cfg_target_thread_local)]
 #![feature(cfi_encoding)]
 #![feature(concat_idents)]
 #![feature(const_mut_refs)]
-#![feature(const_trait_impl)]
 #![feature(decl_macro)]
 #![feature(deprecated_suggestion)]
 #![feature(doc_cfg)]
@@ -324,7 +325,6 @@
 #![feature(core_io_borrowed_buf)]
 #![feature(duration_constants)]
 #![feature(error_generic_member_access)]
-#![feature(error_in_core)]
 #![feature(error_iter)]
 #![feature(exact_size_is_empty)]
 #![feature(exclusive_wrapper)]
@@ -336,13 +336,10 @@
 #![feature(fmt_internals)]
 #![feature(hasher_prefixfree_extras)]
 #![feature(hashmap_internals)]
-#![feature(hint_assert_unchecked)]
 #![feature(ip)]
 #![feature(maybe_uninit_slice)]
-#![feature(maybe_uninit_uninit_array)]
 #![feature(maybe_uninit_write_slice)]
 #![feature(panic_can_unwind)]
-#![feature(panic_info_message)]
 #![feature(panic_internals)]
 #![feature(pointer_is_aligned_to)]
 #![feature(portable_simd)]
@@ -395,7 +392,6 @@
 #![feature(edition_panic)]
 #![feature(format_args_nl)]
 #![feature(get_many_mut)]
-#![feature(lazy_cell)]
 #![feature(log_syntax)]
 #![feature(test)]
 #![feature(trace_macros)]
@@ -410,7 +406,6 @@
 #![feature(const_ip)]
 #![feature(const_ipv4)]
 #![feature(const_ipv6)]
-#![feature(const_maybe_uninit_uninit_array)]
 #![feature(const_waker)]
 #![feature(thread_local_internals)]
 // tidy-alphabetical-end
@@ -475,7 +470,6 @@ pub mod rt;
 // The Rust prelude
 pub mod prelude;
 
-// Public module declarations and re-exports
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use alloc_crate::borrow;
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -670,7 +664,7 @@ pub mod alloc;
 mod panicking;
 
 #[path = "../../backtrace/src/lib.rs"]
-#[allow(dead_code, unused_attributes, fuzzy_provenance_casts)]
+#[allow(dead_code, unused_attributes, fuzzy_provenance_casts, unsafe_op_in_unsafe_fn)]
 mod backtrace_rs;
 
 // Re-export macros defined in core.
diff --git a/std/src/macros.rs b/std/src/macros.rs
index 58df83bd79d23..972b6015932db 100644
--- a/std/src/macros.rs
+++ b/std/src/macros.rs
@@ -373,10 +373,17 @@ macro_rules! dbg {
     };
 }
 
+/// Verify that floats are within a tolerance of each other, 1.0e-6 by default.
 #[cfg(test)]
 macro_rules! assert_approx_eq {
-    ($a:expr, $b:expr) => {{
+    ($a:expr, $b:expr) => {{ assert_approx_eq!($a, $b, 1.0e-6) }};
+    ($a:expr, $b:expr, $lim:expr) => {{
         let (a, b) = (&$a, &$b);
-        assert!((*a - *b).abs() < 1.0e-6, "{} is not approximately equal to {}", *a, *b);
+        let diff = (*a - *b).abs();
+        assert!(
+            diff < $lim,
+            "{a:?} is not approximately equal to {b:?} (threshold {lim:?}, actual {diff:?})",
+            lim = $lim
+        );
     }};
 }
diff --git a/std/src/net/mod.rs b/std/src/net/mod.rs
index bcab15db35b5c..858776f14466a 100644
--- a/std/src/net/mod.rs
+++ b/std/src/net/mod.rs
@@ -27,7 +27,7 @@ use crate::io::{self, ErrorKind};
 pub use self::ip_addr::{IpAddr, Ipv4Addr, Ipv6Addr, Ipv6MulticastScope};
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use self::socket_addr::{SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs};
-#[unstable(feature = "tcplistener_into_incoming", issue = "88339")]
+#[unstable(feature = "tcplistener_into_incoming", issue = "88373")]
 pub use self::tcp::IntoIncoming;
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use self::tcp::{Incoming, TcpListener, TcpStream};
diff --git a/std/src/net/tcp.rs b/std/src/net/tcp.rs
index 9667d5f920e43..6336354239b02 100644
--- a/std/src/net/tcp.rs
+++ b/std/src/net/tcp.rs
@@ -105,7 +105,7 @@ pub struct Incoming<'a> {
 ///
 /// [`accept`]: TcpListener::accept
 #[derive(Debug)]
-#[unstable(feature = "tcplistener_into_incoming", issue = "88339")]
+#[unstable(feature = "tcplistener_into_incoming", issue = "88373")]
 pub struct IntoIncoming {
     listener: TcpListener,
 }
@@ -894,7 +894,7 @@ impl TcpListener {
     /// }
     /// ```
     #[must_use = "`self` will be dropped if the result is not used"]
-    #[unstable(feature = "tcplistener_into_incoming", issue = "88339")]
+    #[unstable(feature = "tcplistener_into_incoming", issue = "88373")]
     pub fn into_incoming(self) -> IntoIncoming {
         IntoIncoming { listener: self }
     }
@@ -1033,7 +1033,7 @@ impl<'a> Iterator for Incoming<'a> {
 #[stable(feature = "tcp_listener_incoming_fused_iterator", since = "1.64.0")]
 impl FusedIterator for Incoming<'_> {}
 
-#[unstable(feature = "tcplistener_into_incoming", issue = "88339")]
+#[unstable(feature = "tcplistener_into_incoming", issue = "88373")]
 impl Iterator for IntoIncoming {
     type Item = io::Result<TcpStream>;
     fn next(&mut self) -> Option<io::Result<TcpStream>> {
@@ -1041,7 +1041,7 @@ impl Iterator for IntoIncoming {
     }
 }
 
-#[unstable(feature = "tcplistener_into_incoming", issue = "88339")]
+#[unstable(feature = "tcplistener_into_incoming", issue = "88373")]
 impl FusedIterator for IntoIncoming {}
 
 impl AsInner<net_imp::TcpListener> for TcpListener {
diff --git a/std/src/net/tcp/tests.rs b/std/src/net/tcp/tests.rs
index ec8b62f968754..3ad046733a634 100644
--- a/std/src/net/tcp/tests.rs
+++ b/std/src/net/tcp/tests.rs
@@ -301,7 +301,7 @@ fn read_buf() {
         });
 
         let mut s = t!(srv.accept()).0;
-        let mut buf: [MaybeUninit<u8>; 128] = MaybeUninit::uninit_array();
+        let mut buf: [MaybeUninit<u8>; 128] = [MaybeUninit::uninit(); 128];
         let mut buf = BorrowedBuf::from(buf.as_mut_slice());
         t!(s.read_buf(buf.unfilled()));
         assert_eq!(buf.filled(), &[1, 2, 3, 4]);
diff --git a/std/src/os/macos/fs.rs b/std/src/os/darwin/fs.rs
similarity index 98%
rename from std/src/os/macos/fs.rs
rename to std/src/os/darwin/fs.rs
index 573426d1a8646..2032cca311a15 100644
--- a/std/src/os/macos/fs.rs
+++ b/std/src/os/darwin/fs.rs
@@ -1,4 +1,4 @@
-#![stable(feature = "metadata_ext", since = "1.1.0")]
+#![allow(dead_code)]
 
 use crate::fs::{self, Metadata};
 use crate::sealed::Sealed;
@@ -6,7 +6,7 @@ use crate::sys_common::{AsInner, AsInnerMut, IntoInner};
 use crate::time::SystemTime;
 
 #[allow(deprecated)]
-use crate::os::macos::raw;
+use super::raw;
 
 /// OS-specific extensions to [`fs::Metadata`].
 ///
@@ -70,6 +70,7 @@ pub trait MetadataExt {
     fn st_gen(&self) -> u32;
     #[stable(feature = "metadata_ext2", since = "1.8.0")]
     fn st_lspare(&self) -> u32;
+    #[cfg(target_os = "macos")]
     #[stable(feature = "metadata_ext2", since = "1.8.0")]
     fn st_qspare(&self) -> [u64; 2];
 }
@@ -143,6 +144,7 @@ impl MetadataExt for Metadata {
     fn st_lspare(&self) -> u32 {
         self.as_inner().as_inner().st_lspare as u32
     }
+    #[cfg(target_os = "macos")]
     fn st_qspare(&self) -> [u64; 2] {
         let qspare = self.as_inner().as_inner().st_qspare;
         [qspare[0] as u64, qspare[1] as u64]
diff --git a/std/src/os/darwin/mod.rs b/std/src/os/darwin/mod.rs
new file mode 100644
index 0000000000000..03401fe8895b9
--- /dev/null
+++ b/std/src/os/darwin/mod.rs
@@ -0,0 +1,20 @@
+//! Platform-specific extensions to `std` for Darwin / Apple platforms.
+//!
+//! This is available on the following operating systems:
+//! - macOS
+//! - iOS
+//! - tvOS
+//! - watchOS
+//! - visionOS
+//!
+//! Note: This module is called "Darwin" as that's the name of the underlying
+//! core OS of the above operating systems, but it should not be confused with
+//! the `-darwin` suffix in the `x86_64-apple-darwin` and
+//! `aarch64-apple-darwin` target names, which are mostly named that way for
+//! legacy reasons.
+
+pub(crate) mod fs;
+// deprecated, but used for public reexport under `std::os::unix::raw`, as
+// well as `std::os::macos`/`std::os::ios`, because those modules precede the
+// decision to remove these.
+pub(super) mod raw;
diff --git a/std/src/os/ios/raw.rs b/std/src/os/darwin/raw.rs
similarity index 87%
rename from std/src/os/ios/raw.rs
rename to std/src/os/darwin/raw.rs
index af12aeebe5d0c..047727f45325f 100644
--- a/std/src/os/ios/raw.rs
+++ b/std/src/os/darwin/raw.rs
@@ -1,15 +1,4 @@
-//! iOS-specific raw type definitions
-
-#![stable(feature = "raw_ext", since = "1.1.0")]
-#![deprecated(
-    since = "1.8.0",
-    note = "these type aliases are no longer supported by \
-            the standard library, the `libc` crate on \
-            crates.io should be used instead for the correct \
-            definitions"
-)]
-#![allow(deprecated)]
-
+//! Apple-specific raw type definitions
 use crate::os::raw::c_long;
 
 #[stable(feature = "raw_ext", since = "1.1.0")]
@@ -35,6 +24,7 @@ pub type pthread_t = usize;
 #[repr(C)]
 #[derive(Clone)]
 #[stable(feature = "raw_ext", since = "1.1.0")]
+#[allow(dead_code)]
 pub struct stat {
     #[stable(feature = "raw_ext", since = "1.1.0")]
     pub st_dev: i32,
diff --git a/std/src/os/fd/owned.rs b/std/src/os/fd/owned.rs
index 8c7fc4cb2e453..a1f83029d2727 100644
--- a/std/src/os/fd/owned.rs
+++ b/std/src/os/fd/owned.rs
@@ -175,6 +175,11 @@ impl Drop for OwnedFd {
             // the file descriptor was closed or not, and if we retried (for
             // something like EINTR), we might close another valid file descriptor
             // opened after we closed ours.
+            // However, this is usually justified, as some of the major Unices
+            // do make sure to always close the FD, even when `close()` is interrupted,
+            // and the scenario is rare to begin with.
+            // Helpful link to an epic discussion by POSIX workgroup:
+            // http://austingroupbugs.net/view.php?id=529
             #[cfg(not(target_os = "hermit"))]
             {
                 #[cfg(unix)]
diff --git a/std/src/os/fortanix_sgx/mod.rs b/std/src/os/fortanix_sgx/mod.rs
index 39a42f4e17fec..b31dc06f8dfbd 100644
--- a/std/src/os/fortanix_sgx/mod.rs
+++ b/std/src/os/fortanix_sgx/mod.rs
@@ -28,7 +28,6 @@ pub mod usercalls {
         pub use crate::sys::abi::usercalls::raw::{do_usercall, Usercalls as UsercallNrs};
         pub use crate::sys::abi::usercalls::raw::{Register, RegisterArgument, ReturnValue};
 
-        // fortanix-sgx-abi re-exports
         pub use crate::sys::abi::usercalls::raw::Error;
         pub use crate::sys::abi::usercalls::raw::{
             ByteBuffer, Cancel, FifoDescriptor, Return, Usercall,
diff --git a/std/src/os/hermit/io/mod.rs b/std/src/os/hermit/io/mod.rs
index 524dfae0d63ae..df93f63a003cf 100644
--- a/std/src/os/hermit/io/mod.rs
+++ b/std/src/os/hermit/io/mod.rs
@@ -1,13 +1,4 @@
-#![stable(feature = "os_fd", since = "1.66.0")]
+#![stable(feature = "rust1", since = "1.0.0")]
 
-mod net;
-#[path = "../../fd/owned.rs"]
-mod owned;
-#[path = "../../fd/raw.rs"]
-mod raw;
-
-// Export the types and traits for the public API.
-#[stable(feature = "os_fd", since = "1.66.0")]
-pub use owned::*;
-#[stable(feature = "os_fd", since = "1.66.0")]
-pub use raw::*;
+#[stable(feature = "rust1", since = "1.0.0")]
+pub use crate::os::fd::*;
diff --git a/std/src/os/ios/fs.rs b/std/src/os/ios/fs.rs
deleted file mode 100644
index e5df4de0b7f71..0000000000000
--- a/std/src/os/ios/fs.rs
+++ /dev/null
@@ -1,160 +0,0 @@
-#![stable(feature = "metadata_ext", since = "1.1.0")]
-
-use crate::fs::{self, Metadata};
-use crate::sealed::Sealed;
-use crate::sys_common::{AsInner, AsInnerMut, IntoInner};
-use crate::time::SystemTime;
-
-#[allow(deprecated)]
-use super::raw;
-
-/// OS-specific extensions to [`fs::Metadata`].
-///
-/// [`fs::Metadata`]: crate::fs::Metadata
-#[stable(feature = "metadata_ext", since = "1.1.0")]
-pub trait MetadataExt {
-    /// Gain a reference to the underlying `stat` structure which contains
-    /// the raw information returned by the OS.
-    ///
-    /// The contents of the returned `stat` are **not** consistent across
-    /// Unix platforms. The `os::unix::fs::MetadataExt` trait contains the
-    /// cross-Unix abstractions contained within the raw stat.
-    #[stable(feature = "metadata_ext", since = "1.1.0")]
-    #[deprecated(
-        since = "1.8.0",
-        note = "deprecated in favor of the accessor \
-                methods of this trait"
-    )]
-    #[allow(deprecated)]
-    fn as_raw_stat(&self) -> &raw::stat;
-
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_dev(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ino(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mode(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_nlink(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_uid(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_gid(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_rdev(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_size(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_atime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_atime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mtime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mtime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ctime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ctime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_birthtime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_birthtime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_blksize(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_blocks(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_flags(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_gen(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_lspare(&self) -> u32;
-}
-
-#[stable(feature = "metadata_ext", since = "1.1.0")]
-impl MetadataExt for Metadata {
-    #[allow(deprecated)]
-    fn as_raw_stat(&self) -> &raw::stat {
-        unsafe { &*(self.as_inner().as_inner() as *const libc::stat as *const raw::stat) }
-    }
-    fn st_dev(&self) -> u64 {
-        self.as_inner().as_inner().st_dev as u64
-    }
-    fn st_ino(&self) -> u64 {
-        self.as_inner().as_inner().st_ino as u64
-    }
-    fn st_mode(&self) -> u32 {
-        self.as_inner().as_inner().st_mode as u32
-    }
-    fn st_nlink(&self) -> u64 {
-        self.as_inner().as_inner().st_nlink as u64
-    }
-    fn st_uid(&self) -> u32 {
-        self.as_inner().as_inner().st_uid as u32
-    }
-    fn st_gid(&self) -> u32 {
-        self.as_inner().as_inner().st_gid as u32
-    }
-    fn st_rdev(&self) -> u64 {
-        self.as_inner().as_inner().st_rdev as u64
-    }
-    fn st_size(&self) -> u64 {
-        self.as_inner().as_inner().st_size as u64
-    }
-    fn st_atime(&self) -> i64 {
-        self.as_inner().as_inner().st_atime as i64
-    }
-    fn st_atime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_atime_nsec as i64
-    }
-    fn st_mtime(&self) -> i64 {
-        self.as_inner().as_inner().st_mtime as i64
-    }
-    fn st_mtime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_mtime_nsec as i64
-    }
-    fn st_ctime(&self) -> i64 {
-        self.as_inner().as_inner().st_ctime as i64
-    }
-    fn st_ctime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_ctime_nsec as i64
-    }
-    fn st_birthtime(&self) -> i64 {
-        self.as_inner().as_inner().st_birthtime as i64
-    }
-    fn st_birthtime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_birthtime_nsec as i64
-    }
-    fn st_blksize(&self) -> u64 {
-        self.as_inner().as_inner().st_blksize as u64
-    }
-    fn st_blocks(&self) -> u64 {
-        self.as_inner().as_inner().st_blocks as u64
-    }
-    fn st_gen(&self) -> u32 {
-        self.as_inner().as_inner().st_gen as u32
-    }
-    fn st_flags(&self) -> u32 {
-        self.as_inner().as_inner().st_flags as u32
-    }
-    fn st_lspare(&self) -> u32 {
-        self.as_inner().as_inner().st_lspare as u32
-    }
-}
-
-/// OS-specific extensions to [`fs::FileTimes`].
-#[stable(feature = "file_set_times", since = "1.75.0")]
-pub trait FileTimesExt: Sealed {
-    /// Set the creation time of a file.
-    #[stable(feature = "file_set_times", since = "1.75.0")]
-    fn set_created(self, t: SystemTime) -> Self;
-}
-
-#[stable(feature = "file_set_times", since = "1.75.0")]
-impl FileTimesExt for fs::FileTimes {
-    fn set_created(mut self, t: SystemTime) -> Self {
-        self.as_inner_mut().set_created(t.into_inner());
-        self
-    }
-}
diff --git a/std/src/os/ios/mod.rs b/std/src/os/ios/mod.rs
index fdefa1f6b21c4..5e130d77b7bfd 100644
--- a/std/src/os/ios/mod.rs
+++ b/std/src/os/ios/mod.rs
@@ -2,5 +2,29 @@
 
 #![stable(feature = "raw_ext", since = "1.1.0")]
 
-pub mod fs;
-pub mod raw;
+#[stable(feature = "metadata_ext", since = "1.1.0")]
+pub mod fs {
+    #[doc(inline)]
+    #[stable(feature = "file_set_times", since = "1.75.0")]
+    pub use crate::os::darwin::fs::FileTimesExt;
+
+    #[doc(inline)]
+    #[stable(feature = "metadata_ext", since = "1.1.0")]
+    pub use crate::os::darwin::fs::MetadataExt;
+}
+
+/// iOS-specific raw type definitions
+#[stable(feature = "raw_ext", since = "1.1.0")]
+#[deprecated(
+    since = "1.8.0",
+    note = "these type aliases are no longer supported by \
+            the standard library, the `libc` crate on \
+            crates.io should be used instead for the correct \
+            definitions"
+)]
+#[allow(deprecated)]
+pub mod raw {
+    #[doc(inline)]
+    #[stable(feature = "raw_ext", since = "1.1.0")]
+    pub use crate::os::darwin::raw::*;
+}
diff --git a/std/src/os/linux/process.rs b/std/src/os/linux/process.rs
index 2ba67a6dd1aa9..9195909479729 100644
--- a/std/src/os/linux/process.rs
+++ b/std/src/os/linux/process.rs
@@ -6,20 +6,20 @@
 
 use crate::io::Result;
 use crate::os::unix::io::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
-use crate::process;
+use crate::process::{self, ExitStatus};
 use crate::sealed::Sealed;
 #[cfg(not(doc))]
-use crate::sys::fd::FileDesc;
+use crate::sys::{fd::FileDesc, linux::pidfd::PidFd as InnerPidFd};
 use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
 
 #[cfg(doc)]
-struct FileDesc;
+struct InnerPidFd;
 
 /// This type represents a file descriptor that refers to a process.
 ///
 /// A `PidFd` can be obtained by setting the corresponding option on [`Command`]
 /// with [`create_pidfd`]. Subsequently, the created pidfd can be retrieved
-/// from the [`Child`] by calling [`pidfd`] or [`take_pidfd`].
+/// from the [`Child`] by calling [`pidfd`] or [`into_pidfd`].
 ///
 /// Example:
 /// ```no_run
@@ -33,7 +33,7 @@ struct FileDesc;
 ///     .expect("Failed to spawn child");
 ///
 /// let pidfd = child
-///     .take_pidfd()
+///     .into_pidfd()
 ///     .expect("Failed to retrieve pidfd");
 ///
 /// // The file descriptor will be closed when `pidfd` is dropped.
@@ -44,28 +44,63 @@ struct FileDesc;
 /// [`create_pidfd`]: CommandExt::create_pidfd
 /// [`Child`]: process::Child
 /// [`pidfd`]: fn@ChildExt::pidfd
-/// [`take_pidfd`]: ChildExt::take_pidfd
+/// [`into_pidfd`]: ChildExt::into_pidfd
 /// [`pidfd_open(2)`]: https://man7.org/linux/man-pages/man2/pidfd_open.2.html
 #[derive(Debug)]
+#[repr(transparent)]
 pub struct PidFd {
-    inner: FileDesc,
+    inner: InnerPidFd,
 }
 
-impl AsInner<FileDesc> for PidFd {
+impl PidFd {
+    /// Forces the child process to exit.
+    ///
+    /// Unlike [`Child::kill`] it is possible to attempt to kill
+    /// reaped children since PidFd does not suffer from pid recycling
+    /// races. But doing so will return an Error.
+    ///
+    /// [`Child::kill`]: process::Child::kill
+    pub fn kill(&self) -> Result<()> {
+        self.inner.kill()
+    }
+
+    /// Waits for the child to exit completely, returning the status that it exited with.
+    ///
+    /// Unlike [`Child::wait`] it does not ensure that the stdin handle is closed.
+    /// Additionally it will not return an `ExitStatus` if the child
+    /// has already been reaped. Instead an error will be returned.
+    ///
+    /// [`Child::wait`]: process::Child::wait
+    pub fn wait(&self) -> Result<ExitStatus> {
+        self.inner.wait().map(FromInner::from_inner)
+    }
+
+    /// Attempts to collect the exit status of the child if it has already exited.
+    ///
+    /// Unlike [`Child::try_wait`] this method will return an Error
+    /// if the child has already been reaped.
+    ///
+    /// [`Child::try_wait`]: process::Child::try_wait
+    pub fn try_wait(&self) -> Result<Option<ExitStatus>> {
+        Ok(self.inner.try_wait()?.map(FromInner::from_inner))
+    }
+}
+
+impl AsInner<InnerPidFd> for PidFd {
     #[inline]
-    fn as_inner(&self) -> &FileDesc {
+    fn as_inner(&self) -> &InnerPidFd {
         &self.inner
     }
 }
 
-impl FromInner<FileDesc> for PidFd {
-    fn from_inner(inner: FileDesc) -> PidFd {
+impl FromInner<InnerPidFd> for PidFd {
+    fn from_inner(inner: InnerPidFd) -> PidFd {
         PidFd { inner }
     }
 }
 
-impl IntoInner<FileDesc> for PidFd {
-    fn into_inner(self) -> FileDesc {
+impl IntoInner<InnerPidFd> for PidFd {
+    fn into_inner(self) -> InnerPidFd {
         self.inner
     }
 }
@@ -73,37 +108,37 @@ impl IntoInner<FileDesc> for PidFd {
 impl AsRawFd for PidFd {
     #[inline]
     fn as_raw_fd(&self) -> RawFd {
-        self.as_inner().as_raw_fd()
+        self.as_inner().as_inner().as_raw_fd()
     }
 }
 
 impl FromRawFd for PidFd {
     unsafe fn from_raw_fd(fd: RawFd) -> Self {
-        Self::from_inner(FileDesc::from_raw_fd(fd))
+        Self::from_inner(InnerPidFd::from_raw_fd(fd))
     }
 }
 
 impl IntoRawFd for PidFd {
     fn into_raw_fd(self) -> RawFd {
-        self.into_inner().into_raw_fd()
+        self.into_inner().into_inner().into_raw_fd()
     }
 }
 
 impl AsFd for PidFd {
     fn as_fd(&self) -> BorrowedFd<'_> {
-        self.as_inner().as_fd()
+        self.as_inner().as_inner().as_fd()
     }
 }
 
 impl From<OwnedFd> for PidFd {
     fn from(fd: OwnedFd) -> Self {
-        Self::from_inner(FileDesc::from_inner(fd))
+        Self::from_inner(InnerPidFd::from_inner(FileDesc::from_inner(fd)))
     }
 }
 
 impl From<PidFd> for OwnedFd {
     fn from(pid_fd: PidFd) -> Self {
-        pid_fd.into_inner().into_inner()
+        pid_fd.into_inner().into_inner().into_inner()
     }
 }
 
@@ -124,18 +159,26 @@ pub trait ChildExt: Sealed {
     /// [`Child`]: process::Child
     fn pidfd(&self) -> Result<&PidFd>;
 
-    /// Takes ownership of the [`PidFd`] created for this [`Child`], if available.
+    /// Returns the [`PidFd`] created for this [`Child`], if available.
+    /// Otherwise self is returned.
     ///
     /// A pidfd will only be available if its creation was requested with
     /// [`create_pidfd`] when the corresponding [`Command`] was created.
     ///
+    /// Taking ownership of the PidFd consumes the Child to avoid pid reuse
+    /// races. Use [`pidfd`] and [`BorrowedFd::try_clone_to_owned`] if
+    /// you don't want to disassemble the Child yet.
+    ///
     /// Even if requested, a pidfd may not be available due to an older
     /// version of Linux being in use, or if some other error occurred.
     ///
     /// [`Command`]: process::Command
     /// [`create_pidfd`]: CommandExt::create_pidfd
+    /// [`pidfd`]: ChildExt::pidfd
     /// [`Child`]: process::Child
-    fn take_pidfd(&mut self) -> Result<PidFd>;
+    fn into_pidfd(self) -> crate::result::Result<PidFd, Self>
+    where
+        Self: Sized;
 }
 
 /// Os-specific extensions for [`Command`]
@@ -146,7 +189,7 @@ pub trait CommandExt: Sealed {
     /// spawned by this [`Command`].
     /// By default, no pidfd will be created.
     ///
-    /// The pidfd can be retrieved from the child with [`pidfd`] or [`take_pidfd`].
+    /// The pidfd can be retrieved from the child with [`pidfd`] or [`into_pidfd`].
     ///
     /// A pidfd will only be created if it is possible to do so
     /// in a guaranteed race-free manner. Otherwise, [`pidfd`] will return an error.
@@ -160,7 +203,7 @@ pub trait CommandExt: Sealed {
     /// [`Command`]: process::Command
     /// [`Child`]: process::Child
     /// [`pidfd`]: fn@ChildExt::pidfd
-    /// [`take_pidfd`]: ChildExt::take_pidfd
+    /// [`into_pidfd`]: ChildExt::into_pidfd
     fn create_pidfd(&mut self, val: bool) -> &mut process::Command;
 }
 
diff --git a/std/src/os/linux/raw.rs b/std/src/os/linux/raw.rs
index c29dd62bc06f0..d53674d3c5f2c 100644
--- a/std/src/os/linux/raw.rs
+++ b/std/src/os/linux/raw.rs
@@ -244,7 +244,11 @@ mod arch {
     pub use libc::{blkcnt_t, blksize_t, ino_t, nlink_t, off_t, stat, time_t};
 }
 
-#[cfg(target_arch = "aarch64")]
+#[cfg(any(
+    target_arch = "aarch64",
+    // Arm64EC is Windows-only, but docs are always build as Linux, so re-use AArch64 for Arm64EC.
+    all(doc, target_arch = "arm64ec")
+))]
 mod arch {
     use crate::os::raw::{c_int, c_long};
 
diff --git a/std/src/os/macos/mod.rs b/std/src/os/macos/mod.rs
index 791d703b142cf..3638406b1807d 100644
--- a/std/src/os/macos/mod.rs
+++ b/std/src/os/macos/mod.rs
@@ -2,5 +2,29 @@
 
 #![stable(feature = "raw_ext", since = "1.1.0")]
 
-pub mod fs;
-pub mod raw;
+#[stable(feature = "metadata_ext", since = "1.1.0")]
+pub mod fs {
+    #[doc(inline)]
+    #[stable(feature = "file_set_times", since = "1.75.0")]
+    pub use crate::os::darwin::fs::FileTimesExt;
+
+    #[doc(inline)]
+    #[stable(feature = "metadata_ext", since = "1.1.0")]
+    pub use crate::os::darwin::fs::MetadataExt;
+}
+
+/// macOS-specific raw type definitions
+#[stable(feature = "raw_ext", since = "1.1.0")]
+#[deprecated(
+    since = "1.8.0",
+    note = "these type aliases are no longer supported by \
+            the standard library, the `libc` crate on \
+            crates.io should be used instead for the correct \
+            definitions"
+)]
+#[allow(deprecated)]
+pub mod raw {
+    #[doc(inline)]
+    #[stable(feature = "raw_ext", since = "1.1.0")]
+    pub use crate::os::darwin::raw::*;
+}
diff --git a/std/src/os/macos/raw.rs b/std/src/os/macos/raw.rs
deleted file mode 100644
index 0b21f6ee5e498..0000000000000
--- a/std/src/os/macos/raw.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-//! macOS-specific raw type definitions
-
-#![stable(feature = "raw_ext", since = "1.1.0")]
-#![deprecated(
-    since = "1.8.0",
-    note = "these type aliases are no longer supported by \
-            the standard library, the `libc` crate on \
-            crates.io should be used instead for the correct \
-            definitions"
-)]
-#![allow(deprecated)]
-
-use crate::os::raw::c_long;
-
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type blkcnt_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type blksize_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type dev_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type ino_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type mode_t = u32;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type nlink_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type off_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type time_t = i64;
-
-#[stable(feature = "pthread_t", since = "1.8.0")]
-pub type pthread_t = usize;
-
-#[repr(C)]
-#[derive(Clone)]
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub struct stat {
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_dev: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mode: u16,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_nlink: u16,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ino: u64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_uid: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_gid: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_rdev: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_atime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_atime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mtime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mtime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ctime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ctime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_birthtime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_birthtime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_size: i64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_blocks: i64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_blksize: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_flags: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_gen: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_lspare: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_qspare: [i64; 2],
-}
diff --git a/std/src/os/mod.rs b/std/src/os/mod.rs
index ca3584e82f918..020a8b324f410 100644
--- a/std/src/os/mod.rs
+++ b/std/src/os/mod.rs
@@ -2,6 +2,7 @@
 
 #![stable(feature = "os", since = "1.0.0")]
 #![allow(missing_docs, nonstandard_style, missing_debug_implementations)]
+#![allow(unsafe_op_in_unsafe_fn)]
 
 pub mod raw;
 
@@ -14,7 +15,7 @@ pub mod raw;
 // documented don't compile (missing things in `libc` which is empty),
 // so just omit them with an empty module and add the "unstable" attribute.
 
-// Unix, linux, wasi and windows are handled a bit differently.
+// unix, linux, wasi and windows are handled a bit differently.
 #[cfg(all(
     doc,
     any(
@@ -104,6 +105,8 @@ pub mod windows;
 pub mod aix;
 #[cfg(target_os = "android")]
 pub mod android;
+#[cfg(target_vendor = "apple")]
+pub(crate) mod darwin;
 #[cfg(target_os = "dragonfly")]
 pub mod dragonfly;
 #[cfg(target_os = "emscripten")]
@@ -144,23 +147,16 @@ pub mod redox;
 pub mod solaris;
 #[cfg(target_os = "solid_asp3")]
 pub mod solid;
-#[cfg(target_os = "tvos")]
-#[path = "ios/mod.rs"]
-pub(crate) mod tvos;
 #[cfg(target_os = "uefi")]
 pub mod uefi;
-#[cfg(target_os = "visionos")]
-pub(crate) mod visionos;
 #[cfg(target_os = "vita")]
 pub mod vita;
 #[cfg(target_os = "vxworks")]
 pub mod vxworks;
-#[cfg(target_os = "watchos")]
-pub(crate) mod watchos;
 #[cfg(target_os = "xous")]
 pub mod xous;
 
-#[cfg(any(unix, target_os = "wasi", doc))]
+#[cfg(any(unix, target_os = "hermit", target_os = "wasi", doc))]
 pub mod fd;
 
 #[cfg(any(target_os = "linux", target_os = "android", doc))]
diff --git a/std/src/os/uefi/mod.rs b/std/src/os/uefi/mod.rs
index 8ef05eee1f4e7..b42d796b28f69 100644
--- a/std/src/os/uefi/mod.rs
+++ b/std/src/os/uefi/mod.rs
@@ -2,6 +2,7 @@
 
 #![unstable(feature = "uefi_std", issue = "100499")]
 #![doc(cfg(target_os = "uefi"))]
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 pub mod env;
 #[path = "../windows/ffi.rs"]
diff --git a/std/src/os/unix/mod.rs b/std/src/os/unix/mod.rs
index d7a622012a5ac..c6581b9c4c8c8 100644
--- a/std/src/os/unix/mod.rs
+++ b/std/src/os/unix/mod.rs
@@ -41,6 +41,8 @@ mod platform {
     pub use crate::os::aix::*;
     #[cfg(target_os = "android")]
     pub use crate::os::android::*;
+    #[cfg(target_vendor = "apple")]
+    pub(super) use crate::os::darwin::*;
     #[cfg(target_os = "dragonfly")]
     pub use crate::os::dragonfly::*;
     #[cfg(target_os = "emscripten")]
@@ -59,14 +61,10 @@ mod platform {
     pub use crate::os::hurd::*;
     #[cfg(target_os = "illumos")]
     pub use crate::os::illumos::*;
-    #[cfg(target_os = "ios")]
-    pub use crate::os::ios::*;
     #[cfg(target_os = "l4re")]
     pub use crate::os::l4re::*;
     #[cfg(target_os = "linux")]
     pub use crate::os::linux::*;
-    #[cfg(target_os = "macos")]
-    pub use crate::os::macos::*;
     #[cfg(target_os = "netbsd")]
     pub use crate::os::netbsd::*;
     #[cfg(target_os = "nto")]
@@ -77,16 +75,10 @@ mod platform {
     pub use crate::os::redox::*;
     #[cfg(target_os = "solaris")]
     pub use crate::os::solaris::*;
-    #[cfg(target_os = "tvos")]
-    pub use crate::os::tvos::*;
-    #[cfg(target_os = "visionos")]
-    pub use crate::os::visionos::*;
     #[cfg(target_os = "vita")]
     pub use crate::os::vita::*;
     #[cfg(target_os = "vxworks")]
     pub use crate::os::vxworks::*;
-    #[cfg(target_os = "watchos")]
-    pub use crate::os::watchos::*;
 }
 
 pub mod ffi;
diff --git a/std/src/os/unix/net/ancillary.rs b/std/src/os/unix/net/ancillary.rs
index 0597fdcbd7289..fe8e2be93724e 100644
--- a/std/src/os/unix/net/ancillary.rs
+++ b/std/src/os/unix/net/ancillary.rs
@@ -3,7 +3,7 @@
 use super::{sockaddr_un, SocketAddr};
 use crate::io::{self, IoSlice, IoSliceMut};
 use crate::marker::PhantomData;
-use crate::mem::{size_of, zeroed};
+use crate::mem::zeroed;
 use crate::os::unix::io::RawFd;
 use crate::path::Path;
 use crate::ptr::{eq, read_unaligned};
diff --git a/std/src/os/visionos/fs.rs b/std/src/os/visionos/fs.rs
deleted file mode 100644
index e5df4de0b7f71..0000000000000
--- a/std/src/os/visionos/fs.rs
+++ /dev/null
@@ -1,160 +0,0 @@
-#![stable(feature = "metadata_ext", since = "1.1.0")]
-
-use crate::fs::{self, Metadata};
-use crate::sealed::Sealed;
-use crate::sys_common::{AsInner, AsInnerMut, IntoInner};
-use crate::time::SystemTime;
-
-#[allow(deprecated)]
-use super::raw;
-
-/// OS-specific extensions to [`fs::Metadata`].
-///
-/// [`fs::Metadata`]: crate::fs::Metadata
-#[stable(feature = "metadata_ext", since = "1.1.0")]
-pub trait MetadataExt {
-    /// Gain a reference to the underlying `stat` structure which contains
-    /// the raw information returned by the OS.
-    ///
-    /// The contents of the returned `stat` are **not** consistent across
-    /// Unix platforms. The `os::unix::fs::MetadataExt` trait contains the
-    /// cross-Unix abstractions contained within the raw stat.
-    #[stable(feature = "metadata_ext", since = "1.1.0")]
-    #[deprecated(
-        since = "1.8.0",
-        note = "deprecated in favor of the accessor \
-                methods of this trait"
-    )]
-    #[allow(deprecated)]
-    fn as_raw_stat(&self) -> &raw::stat;
-
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_dev(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ino(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mode(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_nlink(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_uid(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_gid(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_rdev(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_size(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_atime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_atime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mtime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mtime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ctime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ctime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_birthtime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_birthtime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_blksize(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_blocks(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_flags(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_gen(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_lspare(&self) -> u32;
-}
-
-#[stable(feature = "metadata_ext", since = "1.1.0")]
-impl MetadataExt for Metadata {
-    #[allow(deprecated)]
-    fn as_raw_stat(&self) -> &raw::stat {
-        unsafe { &*(self.as_inner().as_inner() as *const libc::stat as *const raw::stat) }
-    }
-    fn st_dev(&self) -> u64 {
-        self.as_inner().as_inner().st_dev as u64
-    }
-    fn st_ino(&self) -> u64 {
-        self.as_inner().as_inner().st_ino as u64
-    }
-    fn st_mode(&self) -> u32 {
-        self.as_inner().as_inner().st_mode as u32
-    }
-    fn st_nlink(&self) -> u64 {
-        self.as_inner().as_inner().st_nlink as u64
-    }
-    fn st_uid(&self) -> u32 {
-        self.as_inner().as_inner().st_uid as u32
-    }
-    fn st_gid(&self) -> u32 {
-        self.as_inner().as_inner().st_gid as u32
-    }
-    fn st_rdev(&self) -> u64 {
-        self.as_inner().as_inner().st_rdev as u64
-    }
-    fn st_size(&self) -> u64 {
-        self.as_inner().as_inner().st_size as u64
-    }
-    fn st_atime(&self) -> i64 {
-        self.as_inner().as_inner().st_atime as i64
-    }
-    fn st_atime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_atime_nsec as i64
-    }
-    fn st_mtime(&self) -> i64 {
-        self.as_inner().as_inner().st_mtime as i64
-    }
-    fn st_mtime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_mtime_nsec as i64
-    }
-    fn st_ctime(&self) -> i64 {
-        self.as_inner().as_inner().st_ctime as i64
-    }
-    fn st_ctime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_ctime_nsec as i64
-    }
-    fn st_birthtime(&self) -> i64 {
-        self.as_inner().as_inner().st_birthtime as i64
-    }
-    fn st_birthtime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_birthtime_nsec as i64
-    }
-    fn st_blksize(&self) -> u64 {
-        self.as_inner().as_inner().st_blksize as u64
-    }
-    fn st_blocks(&self) -> u64 {
-        self.as_inner().as_inner().st_blocks as u64
-    }
-    fn st_gen(&self) -> u32 {
-        self.as_inner().as_inner().st_gen as u32
-    }
-    fn st_flags(&self) -> u32 {
-        self.as_inner().as_inner().st_flags as u32
-    }
-    fn st_lspare(&self) -> u32 {
-        self.as_inner().as_inner().st_lspare as u32
-    }
-}
-
-/// OS-specific extensions to [`fs::FileTimes`].
-#[stable(feature = "file_set_times", since = "1.75.0")]
-pub trait FileTimesExt: Sealed {
-    /// Set the creation time of a file.
-    #[stable(feature = "file_set_times", since = "1.75.0")]
-    fn set_created(self, t: SystemTime) -> Self;
-}
-
-#[stable(feature = "file_set_times", since = "1.75.0")]
-impl FileTimesExt for fs::FileTimes {
-    fn set_created(mut self, t: SystemTime) -> Self {
-        self.as_inner_mut().set_created(t.into_inner());
-        self
-    }
-}
diff --git a/std/src/os/visionos/mod.rs b/std/src/os/visionos/mod.rs
deleted file mode 100644
index f4b061ffda898..0000000000000
--- a/std/src/os/visionos/mod.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-//! visionos-specific definitions
-
-#![stable(feature = "raw_ext", since = "1.1.0")]
-
-pub mod fs;
-pub mod raw;
diff --git a/std/src/os/visionos/raw.rs b/std/src/os/visionos/raw.rs
deleted file mode 100644
index 2b3eca6f493df..0000000000000
--- a/std/src/os/visionos/raw.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-//! visionos-specific raw type definitions
-
-#![stable(feature = "raw_ext", since = "1.1.0")]
-#![deprecated(
-    since = "1.8.0",
-    note = "these type aliases are no longer supported by \
-            the standard library, the `libc` crate on \
-            crates.io should be used instead for the correct \
-            definitions"
-)]
-#![allow(deprecated)]
-
-use crate::os::raw::c_long;
-
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type blkcnt_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type blksize_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type dev_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type ino_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type mode_t = u32;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type nlink_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type off_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type time_t = i64;
-
-#[stable(feature = "pthread_t", since = "1.8.0")]
-pub type pthread_t = usize;
-
-#[repr(C)]
-#[derive(Clone)]
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub struct stat {
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_dev: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mode: u16,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_nlink: u16,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ino: u64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_uid: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_gid: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_rdev: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_atime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_atime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mtime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mtime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ctime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ctime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_birthtime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_birthtime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_size: i64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_blocks: i64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_blksize: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_flags: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_gen: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_lspare: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_qspare: [i64; 2],
-}
diff --git a/std/src/os/watchos/fs.rs b/std/src/os/watchos/fs.rs
deleted file mode 100644
index ee215dd598441..0000000000000
--- a/std/src/os/watchos/fs.rs
+++ /dev/null
@@ -1,160 +0,0 @@
-#![stable(feature = "metadata_ext", since = "1.1.0")]
-
-use crate::fs::{self, Metadata};
-use crate::sealed::Sealed;
-use crate::sys_common::{AsInner, AsInnerMut, IntoInner};
-use crate::time::SystemTime;
-
-#[allow(deprecated)]
-use crate::os::watchos::raw;
-
-/// OS-specific extensions to [`fs::Metadata`].
-///
-/// [`fs::Metadata`]: crate::fs::Metadata
-#[stable(feature = "metadata_ext", since = "1.1.0")]
-pub trait MetadataExt {
-    /// Gain a reference to the underlying `stat` structure which contains
-    /// the raw information returned by the OS.
-    ///
-    /// The contents of the returned `stat` are **not** consistent across
-    /// Unix platforms. The `os::unix::fs::MetadataExt` trait contains the
-    /// cross-Unix abstractions contained within the raw stat.
-    #[stable(feature = "metadata_ext", since = "1.1.0")]
-    #[deprecated(
-        since = "1.8.0",
-        note = "deprecated in favor of the accessor \
-                  methods of this trait"
-    )]
-    #[allow(deprecated)]
-    fn as_raw_stat(&self) -> &raw::stat;
-
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_dev(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ino(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mode(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_nlink(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_uid(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_gid(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_rdev(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_size(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_atime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_atime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mtime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_mtime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ctime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_ctime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_birthtime(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_birthtime_nsec(&self) -> i64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_blksize(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_blocks(&self) -> u64;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_flags(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_gen(&self) -> u32;
-    #[stable(feature = "metadata_ext2", since = "1.8.0")]
-    fn st_lspare(&self) -> u32;
-}
-
-#[stable(feature = "metadata_ext", since = "1.1.0")]
-impl MetadataExt for Metadata {
-    #[allow(deprecated)]
-    fn as_raw_stat(&self) -> &raw::stat {
-        unsafe { &*(self.as_inner().as_inner() as *const libc::stat as *const raw::stat) }
-    }
-    fn st_dev(&self) -> u64 {
-        self.as_inner().as_inner().st_dev as u64
-    }
-    fn st_ino(&self) -> u64 {
-        self.as_inner().as_inner().st_ino as u64
-    }
-    fn st_mode(&self) -> u32 {
-        self.as_inner().as_inner().st_mode as u32
-    }
-    fn st_nlink(&self) -> u64 {
-        self.as_inner().as_inner().st_nlink as u64
-    }
-    fn st_uid(&self) -> u32 {
-        self.as_inner().as_inner().st_uid as u32
-    }
-    fn st_gid(&self) -> u32 {
-        self.as_inner().as_inner().st_gid as u32
-    }
-    fn st_rdev(&self) -> u64 {
-        self.as_inner().as_inner().st_rdev as u64
-    }
-    fn st_size(&self) -> u64 {
-        self.as_inner().as_inner().st_size as u64
-    }
-    fn st_atime(&self) -> i64 {
-        self.as_inner().as_inner().st_atime as i64
-    }
-    fn st_atime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_atime_nsec as i64
-    }
-    fn st_mtime(&self) -> i64 {
-        self.as_inner().as_inner().st_mtime as i64
-    }
-    fn st_mtime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_mtime_nsec as i64
-    }
-    fn st_ctime(&self) -> i64 {
-        self.as_inner().as_inner().st_ctime as i64
-    }
-    fn st_ctime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_ctime_nsec as i64
-    }
-    fn st_birthtime(&self) -> i64 {
-        self.as_inner().as_inner().st_birthtime as i64
-    }
-    fn st_birthtime_nsec(&self) -> i64 {
-        self.as_inner().as_inner().st_birthtime_nsec as i64
-    }
-    fn st_blksize(&self) -> u64 {
-        self.as_inner().as_inner().st_blksize as u64
-    }
-    fn st_blocks(&self) -> u64 {
-        self.as_inner().as_inner().st_blocks as u64
-    }
-    fn st_gen(&self) -> u32 {
-        self.as_inner().as_inner().st_gen as u32
-    }
-    fn st_flags(&self) -> u32 {
-        self.as_inner().as_inner().st_flags as u32
-    }
-    fn st_lspare(&self) -> u32 {
-        self.as_inner().as_inner().st_lspare as u32
-    }
-}
-
-/// OS-specific extensions to [`fs::FileTimes`].
-#[stable(feature = "file_set_times", since = "1.75.0")]
-pub trait FileTimesExt: Sealed {
-    /// Set the creation time of a file.
-    #[stable(feature = "file_set_times", since = "1.75.0")]
-    fn set_created(self, t: SystemTime) -> Self;
-}
-
-#[stable(feature = "file_set_times", since = "1.75.0")]
-impl FileTimesExt for fs::FileTimes {
-    fn set_created(mut self, t: SystemTime) -> Self {
-        self.as_inner_mut().set_created(t.into_inner());
-        self
-    }
-}
diff --git a/std/src/os/watchos/mod.rs b/std/src/os/watchos/mod.rs
deleted file mode 100644
index cd6454ebbf99b..0000000000000
--- a/std/src/os/watchos/mod.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-//! watchOS-specific definitions
-
-#![stable(feature = "raw_ext", since = "1.1.0")]
-
-pub mod fs;
-pub mod raw;
diff --git a/std/src/os/watchos/raw.rs b/std/src/os/watchos/raw.rs
deleted file mode 100644
index 630a533d9aaf2..0000000000000
--- a/std/src/os/watchos/raw.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-//! watchOS-specific raw type definitions
-
-#![stable(feature = "raw_ext", since = "1.1.0")]
-#![deprecated(
-    since = "1.8.0",
-    note = "these type aliases are no longer supported by \
-              the standard library, the `libc` crate on \
-              crates.io should be used instead for the correct \
-              definitions"
-)]
-#![allow(deprecated)]
-
-use crate::os::raw::c_long;
-
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type blkcnt_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type blksize_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type dev_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type ino_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type mode_t = u32;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type nlink_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type off_t = u64;
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub type time_t = i64;
-
-#[stable(feature = "pthread_t", since = "1.8.0")]
-pub type pthread_t = usize;
-
-#[repr(C)]
-#[derive(Clone)]
-#[stable(feature = "raw_ext", since = "1.1.0")]
-pub struct stat {
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_dev: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mode: u16,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_nlink: u16,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ino: u64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_uid: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_gid: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_rdev: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_atime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_atime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mtime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_mtime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ctime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_ctime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_birthtime: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_birthtime_nsec: c_long,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_size: i64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_blocks: i64,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_blksize: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_flags: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_gen: u32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_lspare: i32,
-    #[stable(feature = "raw_ext", since = "1.1.0")]
-    pub st_qspare: [i64; 2],
-}
diff --git a/std/src/os/windows/io/raw.rs b/std/src/os/windows/io/raw.rs
index 770583a9ce3e0..343cc6e4a8a5a 100644
--- a/std/src/os/windows/io/raw.rs
+++ b/std/src/os/windows/io/raw.rs
@@ -159,10 +159,12 @@ fn stdio_handle(raw: RawHandle) -> RawHandle {
 impl FromRawHandle for fs::File {
     #[inline]
     unsafe fn from_raw_handle(handle: RawHandle) -> fs::File {
-        let handle = handle as sys::c::HANDLE;
-        fs::File::from_inner(sys::fs::File::from_inner(FromInner::from_inner(
-            OwnedHandle::from_raw_handle(handle),
-        )))
+        unsafe {
+            let handle = handle as sys::c::HANDLE;
+            fs::File::from_inner(sys::fs::File::from_inner(FromInner::from_inner(
+                OwnedHandle::from_raw_handle(handle),
+            )))
+        }
     }
 }
 
@@ -260,24 +262,30 @@ impl AsRawSocket for net::UdpSocket {
 impl FromRawSocket for net::TcpStream {
     #[inline]
     unsafe fn from_raw_socket(sock: RawSocket) -> net::TcpStream {
-        let sock = sys::net::Socket::from_inner(OwnedSocket::from_raw_socket(sock));
-        net::TcpStream::from_inner(sys_common::net::TcpStream::from_inner(sock))
+        unsafe {
+            let sock = sys::net::Socket::from_inner(OwnedSocket::from_raw_socket(sock));
+            net::TcpStream::from_inner(sys_common::net::TcpStream::from_inner(sock))
+        }
     }
 }
 #[stable(feature = "from_raw_os", since = "1.1.0")]
 impl FromRawSocket for net::TcpListener {
     #[inline]
     unsafe fn from_raw_socket(sock: RawSocket) -> net::TcpListener {
-        let sock = sys::net::Socket::from_inner(OwnedSocket::from_raw_socket(sock));
-        net::TcpListener::from_inner(sys_common::net::TcpListener::from_inner(sock))
+        unsafe {
+            let sock = sys::net::Socket::from_inner(OwnedSocket::from_raw_socket(sock));
+            net::TcpListener::from_inner(sys_common::net::TcpListener::from_inner(sock))
+        }
     }
 }
 #[stable(feature = "from_raw_os", since = "1.1.0")]
 impl FromRawSocket for net::UdpSocket {
     #[inline]
     unsafe fn from_raw_socket(sock: RawSocket) -> net::UdpSocket {
-        let sock = sys::net::Socket::from_inner(OwnedSocket::from_raw_socket(sock));
-        net::UdpSocket::from_inner(sys_common::net::UdpSocket::from_inner(sock))
+        unsafe {
+            let sock = sys::net::Socket::from_inner(OwnedSocket::from_raw_socket(sock));
+            net::UdpSocket::from_inner(sys_common::net::UdpSocket::from_inner(sock))
+        }
     }
 }
 
diff --git a/std/src/os/windows/io/socket.rs b/std/src/os/windows/io/socket.rs
index 6ffdf907c8ed3..4334d041439d9 100644
--- a/std/src/os/windows/io/socket.rs
+++ b/std/src/os/windows/io/socket.rs
@@ -76,7 +76,7 @@ impl BorrowedSocket<'_> {
     #[stable(feature = "io_safety", since = "1.63.0")]
     pub const unsafe fn borrow_raw(socket: RawSocket) -> Self {
         assert!(socket != sys::c::INVALID_SOCKET as RawSocket);
-        Self { socket, _phantom: PhantomData }
+        unsafe { Self { socket, _phantom: PhantomData } }
     }
 }
 
@@ -201,8 +201,10 @@ impl IntoRawSocket for OwnedSocket {
 impl FromRawSocket for OwnedSocket {
     #[inline]
     unsafe fn from_raw_socket(socket: RawSocket) -> Self {
-        debug_assert_ne!(socket, sys::c::INVALID_SOCKET as RawSocket);
-        Self { socket }
+        unsafe {
+            debug_assert_ne!(socket, sys::c::INVALID_SOCKET as RawSocket);
+            Self { socket }
+        }
     }
 }
 
diff --git a/std/src/os/windows/mod.rs b/std/src/os/windows/mod.rs
index 52eb3b7c06769..f452403ee8426 100644
--- a/std/src/os/windows/mod.rs
+++ b/std/src/os/windows/mod.rs
@@ -24,6 +24,7 @@
 
 #![stable(feature = "rust1", since = "1.0.0")]
 #![doc(cfg(windows))]
+#![deny(unsafe_op_in_unsafe_fn)]
 
 pub mod ffi;
 pub mod fs;
diff --git a/std/src/os/windows/process.rs b/std/src/os/windows/process.rs
index 9cca27fa5dd5b..3927b2ed9bb5c 100644
--- a/std/src/os/windows/process.rs
+++ b/std/src/os/windows/process.rs
@@ -16,7 +16,7 @@ use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
 #[stable(feature = "process_extensions", since = "1.2.0")]
 impl FromRawHandle for process::Stdio {
     unsafe fn from_raw_handle(handle: RawHandle) -> process::Stdio {
-        let handle = sys::handle::Handle::from_raw_handle(handle as *mut _);
+        let handle = unsafe { sys::handle::Handle::from_raw_handle(handle as *mut _) };
         let io = sys::process::Stdio::Handle(handle);
         process::Stdio::from_inner(io)
     }
@@ -181,6 +181,14 @@ pub trait CommandExt: Sealed {
     #[stable(feature = "windows_process_extensions", since = "1.16.0")]
     fn creation_flags(&mut self, flags: u32) -> &mut process::Command;
 
+    /// Sets the field `wShowWindow` of [STARTUPINFO][1] that is passed to `CreateProcess`.
+    /// Allowed values are the ones listed in
+    /// <https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-showwindow>
+    ///
+    /// [1]: <https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/ns-processthreadsapi-startupinfow>
+    #[unstable(feature = "windows_process_extensions_show_window", issue = "127544")]
+    fn show_window(&mut self, cmd_show: u16) -> &mut process::Command;
+
     /// Forces all arguments to be wrapped in quote (`"`) characters.
     ///
     /// This is useful for passing arguments to [MSYS2/Cygwin][1] based
@@ -370,6 +378,11 @@ impl CommandExt for process::Command {
         self
     }
 
+    fn show_window(&mut self, cmd_show: u16) -> &mut process::Command {
+        self.as_inner_mut().show_window(Some(cmd_show));
+        self
+    }
+
     fn force_quotes(&mut self, enabled: bool) -> &mut process::Command {
         self.as_inner_mut().force_quotes(enabled);
         self
@@ -394,7 +407,7 @@ impl CommandExt for process::Command {
         attribute: usize,
         value: T,
     ) -> &mut process::Command {
-        self.as_inner_mut().raw_attribute(attribute, value);
+        unsafe { self.as_inner_mut().raw_attribute(attribute, value) };
         self
     }
 }
diff --git a/std/src/os/xous/mod.rs b/std/src/os/xous/mod.rs
index 153694a89a78d..4b21695c4ac7e 100644
--- a/std/src/os/xous/mod.rs
+++ b/std/src/os/xous/mod.rs
@@ -1,5 +1,6 @@
 #![stable(feature = "rust1", since = "1.0.0")]
 #![doc(cfg(target_os = "xous"))]
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 pub mod ffi;
 
diff --git a/std/src/panic.rs b/std/src/panic.rs
index e63b46ab70548..c5d1a893ee809 100644
--- a/std/src/panic.rs
+++ b/std/src/panic.rs
@@ -4,11 +4,212 @@
 
 use crate::any::Any;
 use crate::collections;
+use crate::fmt;
 use crate::panicking;
 use crate::sync::atomic::{AtomicU8, Ordering};
 use crate::sync::{Condvar, Mutex, RwLock};
 use crate::thread::Result;
 
+#[stable(feature = "panic_hooks", since = "1.10.0")]
+#[deprecated(
+    since = "1.82.0",
+    note = "use `PanicHookInfo` instead",
+    suggestion = "std::panic::PanicHookInfo"
+)]
+/// A struct providing information about a panic.
+///
+/// `PanicInfo` has been renamed to [`PanicHookInfo`] to avoid confusion with
+/// [`core::panic::PanicInfo`].
+pub type PanicInfo<'a> = PanicHookInfo<'a>;
+
+/// A struct providing information about a panic.
+///
+/// `PanicHookInfo` structure is passed to a panic hook set by the [`set_hook`] function.
+///
+/// # Examples
+///
+/// ```should_panic
+/// use std::panic;
+///
+/// panic::set_hook(Box::new(|panic_info| {
+///     println!("panic occurred: {panic_info}");
+/// }));
+///
+/// panic!("critical system failure");
+/// ```
+///
+/// [`set_hook`]: ../../std/panic/fn.set_hook.html
+#[stable(feature = "panic_hook_info", since = "CURRENT_RUSTC_VERSION")]
+#[derive(Debug)]
+pub struct PanicHookInfo<'a> {
+    payload: &'a (dyn Any + Send),
+    location: &'a Location<'a>,
+    can_unwind: bool,
+    force_no_backtrace: bool,
+}
+
+impl<'a> PanicHookInfo<'a> {
+    #[inline]
+    pub(crate) fn new(
+        location: &'a Location<'a>,
+        payload: &'a (dyn Any + Send),
+        can_unwind: bool,
+        force_no_backtrace: bool,
+    ) -> Self {
+        PanicHookInfo { payload, location, can_unwind, force_no_backtrace }
+    }
+
+    /// Returns the payload associated with the panic.
+    ///
+    /// This will commonly, but not always, be a `&'static str` or [`String`].
+    ///
+    /// A invocation of the `panic!()` macro in Rust 2021 or later will always result in a
+    /// panic payload of type `&'static str` or `String`.
+    ///
+    /// Only an invocation of [`panic_any`]
+    /// (or, in Rust 2018 and earlier, `panic!(x)` where `x` is something other than a string)
+    /// can result in a panic payload other than a `&'static str` or `String`.
+    ///
+    /// [`String`]: ../../std/string/struct.String.html
+    ///
+    /// # Examples
+    ///
+    /// ```should_panic
+    /// use std::panic;
+    ///
+    /// panic::set_hook(Box::new(|panic_info| {
+    ///     if let Some(s) = panic_info.payload().downcast_ref::<&str>() {
+    ///         println!("panic occurred: {s:?}");
+    ///     } else if let Some(s) = panic_info.payload().downcast_ref::<String>() {
+    ///         println!("panic occurred: {s:?}");
+    ///     } else {
+    ///         println!("panic occurred");
+    ///     }
+    /// }));
+    ///
+    /// panic!("Normal panic");
+    /// ```
+    #[must_use]
+    #[inline]
+    #[stable(feature = "panic_hooks", since = "1.10.0")]
+    pub fn payload(&self) -> &(dyn Any + Send) {
+        self.payload
+    }
+
+    /// Returns the payload associated with the panic, if it is a string.
+    ///
+    /// This returns the payload if it is of type `&'static str` or `String`.
+    ///
+    /// A invocation of the `panic!()` macro in Rust 2021 or later will always result in a
+    /// panic payload where `payload_as_str` returns `Some`.
+    ///
+    /// Only an invocation of [`panic_any`]
+    /// (or, in Rust 2018 and earlier, `panic!(x)` where `x` is something other than a string)
+    /// can result in a panic payload where `payload_as_str` returns `None`.
+    ///
+    /// # Example
+    ///
+    /// ```should_panic
+    /// #![feature(panic_payload_as_str)]
+    ///
+    /// std::panic::set_hook(Box::new(|panic_info| {
+    ///     if let Some(s) = panic_info.payload_as_str() {
+    ///         println!("panic occurred: {s:?}");
+    ///     } else {
+    ///         println!("panic occurred");
+    ///     }
+    /// }));
+    ///
+    /// panic!("Normal panic");
+    /// ```
+    #[must_use]
+    #[inline]
+    #[unstable(feature = "panic_payload_as_str", issue = "125175")]
+    pub fn payload_as_str(&self) -> Option<&str> {
+        if let Some(s) = self.payload.downcast_ref::<&str>() {
+            Some(s)
+        } else if let Some(s) = self.payload.downcast_ref::<String>() {
+            Some(s)
+        } else {
+            None
+        }
+    }
+
+    /// Returns information about the location from which the panic originated,
+    /// if available.
+    ///
+    /// This method will currently always return [`Some`], but this may change
+    /// in future versions.
+    ///
+    /// # Examples
+    ///
+    /// ```should_panic
+    /// use std::panic;
+    ///
+    /// panic::set_hook(Box::new(|panic_info| {
+    ///     if let Some(location) = panic_info.location() {
+    ///         println!("panic occurred in file '{}' at line {}",
+    ///             location.file(),
+    ///             location.line(),
+    ///         );
+    ///     } else {
+    ///         println!("panic occurred but can't get location information...");
+    ///     }
+    /// }));
+    ///
+    /// panic!("Normal panic");
+    /// ```
+    #[must_use]
+    #[inline]
+    #[stable(feature = "panic_hooks", since = "1.10.0")]
+    pub fn location(&self) -> Option<&Location<'_>> {
+        // NOTE: If this is changed to sometimes return None,
+        // deal with that case in std::panicking::default_hook and core::panicking::panic_fmt.
+        Some(&self.location)
+    }
+
+    /// Returns whether the panic handler is allowed to unwind the stack from
+    /// the point where the panic occurred.
+    ///
+    /// This is true for most kinds of panics with the exception of panics
+    /// caused by trying to unwind out of a `Drop` implementation or a function
+    /// whose ABI does not support unwinding.
+    ///
+    /// It is safe for a panic handler to unwind even when this function returns
+    /// false, however this will simply cause the panic handler to be called
+    /// again.
+    #[must_use]
+    #[inline]
+    #[unstable(feature = "panic_can_unwind", issue = "92988")]
+    pub fn can_unwind(&self) -> bool {
+        self.can_unwind
+    }
+
+    #[unstable(
+        feature = "panic_internals",
+        reason = "internal details of the implementation of the `panic!` and related macros",
+        issue = "none"
+    )]
+    #[doc(hidden)]
+    #[inline]
+    pub fn force_no_backtrace(&self) -> bool {
+        self.force_no_backtrace
+    }
+}
+
+#[stable(feature = "panic_hook_display", since = "1.26.0")]
+impl fmt::Display for PanicHookInfo<'_> {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        formatter.write_str("panicked at ")?;
+        self.location.fmt(formatter)?;
+        if let Some(payload) = self.payload_as_str() {
+            formatter.write_str(":\n")?;
+            formatter.write_str(payload)?;
+        }
+        Ok(())
+    }
+}
+
 #[doc(hidden)]
 #[unstable(feature = "edition_panic", issue = "none", reason = "use panic!() instead")]
 #[allow_internal_unstable(libstd_sys_internals, const_format_args, panic_internals, rt)]
@@ -43,7 +244,7 @@ pub use crate::panicking::{set_hook, take_hook};
 pub use crate::panicking::update_hook;
 
 #[stable(feature = "panic_hooks", since = "1.10.0")]
-pub use core::panic::{Location, PanicInfo};
+pub use core::panic::Location;
 
 #[stable(feature = "catch_unwind", since = "1.9.0")]
 pub use core::panic::{AssertUnwindSafe, RefUnwindSafe, UnwindSafe};
@@ -53,7 +254,7 @@ pub use core::panic::{AssertUnwindSafe, RefUnwindSafe, UnwindSafe};
 /// The message can be of any (`Any + Send`) type, not just strings.
 ///
 /// The message is wrapped in a `Box<'static + Any + Send>`, which can be
-/// accessed later using [`PanicInfo::payload`].
+/// accessed later using [`PanicHookInfo::payload`].
 ///
 /// See the [`panic!`] macro for more information about panicking.
 #[stable(feature = "panic_any", since = "1.51.0")]
diff --git a/std/src/panicking.rs b/std/src/panicking.rs
index 5699937cdb49b..418a855fb728e 100644
--- a/std/src/panicking.rs
+++ b/std/src/panicking.rs
@@ -9,8 +9,8 @@
 
 #![deny(unsafe_op_in_unsafe_fn)]
 
-use crate::panic::BacktraceStyle;
-use core::panic::{Location, PanicInfo, PanicPayload};
+use crate::panic::{BacktraceStyle, PanicHookInfo};
+use core::panic::{Location, PanicPayload};
 
 use crate::any::Any;
 use crate::fmt;
@@ -19,8 +19,8 @@ use crate::mem::{self, ManuallyDrop};
 use crate::process;
 use crate::sync::atomic::{AtomicBool, Ordering};
 use crate::sync::{PoisonError, RwLock};
+use crate::sys::backtrace;
 use crate::sys::stdio::panic_output;
-use crate::sys_common::backtrace;
 use crate::thread;
 
 #[cfg(not(test))]
@@ -70,12 +70,12 @@ extern "C" fn __rust_foreign_exception() -> ! {
 
 enum Hook {
     Default,
-    Custom(Box<dyn Fn(&PanicInfo<'_>) + 'static + Sync + Send>),
+    Custom(Box<dyn Fn(&PanicHookInfo<'_>) + 'static + Sync + Send>),
 }
 
 impl Hook {
     #[inline]
-    fn into_box(self) -> Box<dyn Fn(&PanicInfo<'_>) + 'static + Sync + Send> {
+    fn into_box(self) -> Box<dyn Fn(&PanicHookInfo<'_>) + 'static + Sync + Send> {
         match self {
             Hook::Default => Box::new(default_hook),
             Hook::Custom(hook) => hook,
@@ -105,7 +105,7 @@ static HOOK: RwLock<Hook> = RwLock::new(Hook::Default);
 ///
 /// [`take_hook`]: ./fn.take_hook.html
 ///
-/// The hook is provided with a `PanicInfo` struct which contains information
+/// The hook is provided with a `PanicHookInfo` struct which contains information
 /// about the origin of the panic, including the payload passed to `panic!` and
 /// the source code location from which the panic originated.
 ///
@@ -129,7 +129,7 @@ static HOOK: RwLock<Hook> = RwLock::new(Hook::Default);
 /// panic!("Normal panic");
 /// ```
 #[stable(feature = "panic_hooks", since = "1.10.0")]
-pub fn set_hook(hook: Box<dyn Fn(&PanicInfo<'_>) + 'static + Sync + Send>) {
+pub fn set_hook(hook: Box<dyn Fn(&PanicHookInfo<'_>) + 'static + Sync + Send>) {
     if thread::panicking() {
         panic!("cannot modify the panic hook from a panicking thread");
     }
@@ -173,7 +173,7 @@ pub fn set_hook(hook: Box<dyn Fn(&PanicInfo<'_>) + 'static + Sync + Send>) {
 /// ```
 #[must_use]
 #[stable(feature = "panic_hooks", since = "1.10.0")]
-pub fn take_hook() -> Box<dyn Fn(&PanicInfo<'_>) + 'static + Sync + Send> {
+pub fn take_hook() -> Box<dyn Fn(&PanicHookInfo<'_>) + 'static + Sync + Send> {
     if thread::panicking() {
         panic!("cannot modify the panic hook from a panicking thread");
     }
@@ -219,7 +219,7 @@ pub fn take_hook() -> Box<dyn Fn(&PanicInfo<'_>) + 'static + Sync + Send> {
 #[unstable(feature = "panic_update_hook", issue = "92649")]
 pub fn update_hook<F>(hook_fn: F)
 where
-    F: Fn(&(dyn Fn(&PanicInfo<'_>) + Send + Sync + 'static), &PanicInfo<'_>)
+    F: Fn(&(dyn Fn(&PanicHookInfo<'_>) + Send + Sync + 'static), &PanicHookInfo<'_>)
         + Sync
         + Send
         + 'static,
@@ -234,7 +234,7 @@ where
 }
 
 /// The default panic handler.
-fn default_hook(info: &PanicInfo<'_>) {
+fn default_hook(info: &PanicHookInfo<'_>) {
     // If this is a double panic, make sure that we print a backtrace
     // for this panic. Otherwise only print it if logging is enabled.
     let backtrace = if info.force_no_backtrace() {
@@ -248,27 +248,25 @@ fn default_hook(info: &PanicInfo<'_>) {
     // The current implementation always returns `Some`.
     let location = info.location().unwrap();
 
-    let msg = match info.payload().downcast_ref::<&'static str>() {
-        Some(s) => *s,
-        None => match info.payload().downcast_ref::<String>() {
-            Some(s) => &s[..],
-            None => "Box<dyn Any>",
-        },
-    };
+    let msg = payload_as_str(info.payload());
     let thread = thread::try_current();
     let name = thread.as_ref().and_then(|t| t.name()).unwrap_or("<unnamed>");
 
     let write = |err: &mut dyn crate::io::Write| {
+        // Use a lock to prevent mixed output in multithreading context.
+        // Some platforms also require it when printing a backtrace, like `SymFromAddr` on Windows.
+        let mut lock = backtrace::lock();
         let _ = writeln!(err, "thread '{name}' panicked at {location}:\n{msg}");
 
         static FIRST_PANIC: AtomicBool = AtomicBool::new(true);
 
         match backtrace {
+            // SAFETY: we took out a lock just a second ago.
             Some(BacktraceStyle::Short) => {
-                drop(backtrace::print(err, crate::backtrace_rs::PrintFmt::Short))
+                drop(lock.print(err, crate::backtrace_rs::PrintFmt::Short))
             }
             Some(BacktraceStyle::Full) => {
-                drop(backtrace::print(err, crate::backtrace_rs::PrintFmt::Full))
+                drop(lock.print(err, crate::backtrace_rs::PrintFmt::Full))
             }
             Some(BacktraceStyle::Off) => {
                 if FIRST_PANIC.swap(false, Ordering::Relaxed) {
@@ -597,31 +595,26 @@ pub fn panicking() -> bool {
 /// Entry point of panics from the core crate (`panic_impl` lang item).
 #[cfg(not(any(test, doctest)))]
 #[panic_handler]
-pub fn begin_panic_handler(info: &PanicInfo<'_>) -> ! {
+pub fn begin_panic_handler(info: &core::panic::PanicInfo<'_>) -> ! {
     struct FormatStringPayload<'a> {
-        inner: &'a fmt::Arguments<'a>,
+        inner: &'a core::panic::PanicMessage<'a>,
         string: Option<String>,
     }
 
-    impl<'a> FormatStringPayload<'a> {
-        fn new(inner: &'a fmt::Arguments<'a>) -> Self {
-            Self { inner, string: None }
-        }
-
+    impl FormatStringPayload<'_> {
         fn fill(&mut self) -> &mut String {
-            use crate::fmt::Write;
-
             let inner = self.inner;
             // Lazily, the first time this gets called, run the actual string formatting.
             self.string.get_or_insert_with(|| {
                 let mut s = String::new();
-                let _err = s.write_fmt(*inner);
+                let mut fmt = fmt::Formatter::new(&mut s);
+                let _err = fmt::Display::fmt(&inner, &mut fmt);
                 s
             })
         }
     }
 
-    unsafe impl<'a> PanicPayload for FormatStringPayload<'a> {
+    unsafe impl PanicPayload for FormatStringPayload<'_> {
         fn take_box(&mut self) -> *mut (dyn Any + Send) {
             // We do two allocations here, unfortunately. But (a) they're required with the current
             // scheme, and (b) we don't handle panic + OOM properly anyway (see comment in
@@ -635,6 +628,16 @@ pub fn begin_panic_handler(info: &PanicInfo<'_>) -> ! {
         }
     }
 
+    impl fmt::Display for FormatStringPayload<'_> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            if let Some(s) = &self.string {
+                f.write_str(s)
+            } else {
+                fmt::Display::fmt(&self.inner, f)
+            }
+        }
+    }
+
     struct StaticStrPayload(&'static str);
 
     unsafe impl PanicPayload for StaticStrPayload {
@@ -645,25 +648,31 @@ pub fn begin_panic_handler(info: &PanicInfo<'_>) -> ! {
         fn get(&mut self) -> &(dyn Any + Send) {
             &self.0
         }
+
+        fn as_str(&mut self) -> Option<&str> {
+            Some(self.0)
+        }
+    }
+
+    impl fmt::Display for StaticStrPayload {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            f.write_str(self.0)
+        }
     }
 
     let loc = info.location().unwrap(); // The current implementation always returns Some
-    let msg = info.message().unwrap(); // The current implementation always returns Some
-    crate::sys_common::backtrace::__rust_end_short_backtrace(move || {
-        // FIXME: can we just pass `info` along rather than taking it apart here, only to have
-        // `rust_panic_with_hook` construct a new `PanicInfo`?
-        if let Some(msg) = msg.as_str() {
+    let msg = info.message();
+    crate::sys::backtrace::__rust_end_short_backtrace(move || {
+        if let Some(s) = msg.as_str() {
             rust_panic_with_hook(
-                &mut StaticStrPayload(msg),
-                info.message(),
+                &mut StaticStrPayload(s),
                 loc,
                 info.can_unwind(),
                 info.force_no_backtrace(),
             );
         } else {
             rust_panic_with_hook(
-                &mut FormatStringPayload::new(msg),
-                info.message(),
+                &mut FormatStringPayload { inner: &msg, string: None },
                 loc,
                 info.can_unwind(),
                 info.force_no_backtrace(),
@@ -689,27 +698,10 @@ pub const fn begin_panic<M: Any + Send>(msg: M) -> ! {
         intrinsics::abort()
     }
 
-    let loc = Location::caller();
-    return crate::sys_common::backtrace::__rust_end_short_backtrace(move || {
-        rust_panic_with_hook(
-            &mut Payload::new(msg),
-            None,
-            loc,
-            /* can_unwind */ true,
-            /* force_no_backtrace */ false,
-        )
-    });
-
     struct Payload<A> {
         inner: Option<A>,
     }
 
-    impl<A: Send + 'static> Payload<A> {
-        fn new(inner: A) -> Payload<A> {
-            Payload { inner: Some(inner) }
-        }
-    }
-
     unsafe impl<A: Send + 'static> PanicPayload for Payload<A> {
         fn take_box(&mut self) -> *mut (dyn Any + Send) {
             // Note that this should be the only allocation performed in this code path. Currently
@@ -731,6 +723,35 @@ pub const fn begin_panic<M: Any + Send>(msg: M) -> ! {
             }
         }
     }
+
+    impl<A: 'static> fmt::Display for Payload<A> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            match &self.inner {
+                Some(a) => f.write_str(payload_as_str(a)),
+                None => process::abort(),
+            }
+        }
+    }
+
+    let loc = Location::caller();
+    crate::sys::backtrace::__rust_end_short_backtrace(move || {
+        rust_panic_with_hook(
+            &mut Payload { inner: Some(msg) },
+            loc,
+            /* can_unwind */ true,
+            /* force_no_backtrace */ false,
+        )
+    })
+}
+
+fn payload_as_str(payload: &dyn Any) -> &str {
+    if let Some(&s) = payload.downcast_ref::<&'static str>() {
+        s
+    } else if let Some(s) = payload.downcast_ref::<String>() {
+        s.as_str()
+    } else {
+        "Box<dyn Any>"
+    }
 }
 
 /// Central point for dispatching panics.
@@ -740,7 +761,6 @@ pub const fn begin_panic<M: Any + Send>(msg: M) -> ! {
 /// abort or unwind.
 fn rust_panic_with_hook(
     payload: &mut dyn PanicPayload,
-    message: Option<&fmt::Arguments<'_>>,
     location: &Location<'_>,
     can_unwind: bool,
     force_no_backtrace: bool,
@@ -754,35 +774,21 @@ fn rust_panic_with_hook(
                 // Don't try to format the message in this case, perhaps that is causing the
                 // recursive panics. However if the message is just a string, no user-defined
                 // code is involved in printing it, so that is risk-free.
-                let msg_str = message.and_then(|m| m.as_str()).map(|m| [m]);
-                let message = msg_str.as_ref().map(|m| fmt::Arguments::new_const(m));
-                let panicinfo = PanicInfo::internal_constructor(
-                    message.as_ref(),
-                    location,
-                    can_unwind,
-                    force_no_backtrace,
+                let message: &str = payload.as_str().unwrap_or_default();
+                rtprintpanic!(
+                    "panicked at {location}:\n{message}\nthread panicked while processing panic. aborting.\n"
                 );
-                rtprintpanic!("{panicinfo}\nthread panicked while processing panic. aborting.\n");
             }
             panic_count::MustAbort::AlwaysAbort => {
                 // Unfortunately, this does not print a backtrace, because creating
                 // a `Backtrace` will allocate, which we must avoid here.
-                let panicinfo = PanicInfo::internal_constructor(
-                    message,
-                    location,
-                    can_unwind,
-                    force_no_backtrace,
-                );
-                rtprintpanic!("{panicinfo}\npanicked after panic::always_abort(), aborting.\n");
+                rtprintpanic!("aborting due to panic at {location}:\n{payload}\n");
             }
         }
         crate::sys::abort_internal();
     }
 
-    let mut info =
-        PanicInfo::internal_constructor(message, location, can_unwind, force_no_backtrace);
-    let hook = HOOK.read().unwrap_or_else(PoisonError::into_inner);
-    match *hook {
+    match *HOOK.read().unwrap_or_else(PoisonError::into_inner) {
         // Some platforms (like wasm) know that printing to stderr won't ever actually
         // print anything, and if that's the case we can skip the default
         // hook. Since string formatting happens lazily when calling `payload`
@@ -791,15 +797,17 @@ fn rust_panic_with_hook(
         // formatting.)
         Hook::Default if panic_output().is_none() => {}
         Hook::Default => {
-            info.set_payload(payload.get());
-            default_hook(&info);
+            default_hook(&PanicHookInfo::new(
+                location,
+                payload.get(),
+                can_unwind,
+                force_no_backtrace,
+            ));
         }
         Hook::Custom(ref hook) => {
-            info.set_payload(payload.get());
-            hook(&info);
+            hook(&PanicHookInfo::new(location, payload.get(), can_unwind, force_no_backtrace));
         }
-    };
-    drop(hook);
+    }
 
     // Indicate that we have finished executing the panic hook. After this point
     // it is fine if there is a panic while executing destructors, as long as it
@@ -835,6 +843,12 @@ pub fn rust_panic_without_hook(payload: Box<dyn Any + Send>) -> ! {
         }
     }
 
+    impl fmt::Display for RewrapBox {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            f.write_str(payload_as_str(&self.0))
+        }
+    }
+
     rust_panic(&mut RewrapBox(payload))
 }
 
diff --git a/std/src/path.rs b/std/src/path.rs
index f835b69f0cfb5..d5121a554bf6c 100644
--- a/std/src/path.rs
+++ b/std/src/path.rs
@@ -1163,11 +1163,6 @@ pub struct PathBuf {
 }
 
 impl PathBuf {
-    #[inline]
-    fn as_mut_vec(&mut self) -> &mut Vec<u8> {
-        self.inner.as_mut_vec_for_path_buf()
-    }
-
     /// Allocates an empty `PathBuf`.
     ///
     /// # Examples
@@ -1226,6 +1221,25 @@ impl PathBuf {
         self
     }
 
+    /// Consumes and leaks the `PathBuf`, returning a mutable reference to the contents,
+    /// `&'a mut Path`.
+    ///
+    /// The caller has free choice over the returned lifetime, including 'static.
+    /// Indeed, this function is ideally used for data that lives for the remainder of
+    /// the program’s life, as dropping the returned reference will cause a memory leak.
+    ///
+    /// It does not reallocate or shrink the `PathBuf`, so the leaked allocation may include
+    /// unused capacity that is not part of the returned slice. If you want to discard excess
+    /// capacity, call [`into_boxed_path`], and then [`Box::leak`] instead.
+    /// However, keep in mind that trimming the capacity may result in a reallocation and copy.
+    ///
+    /// [`into_boxed_path`]: Self::into_boxed_path
+    #[unstable(feature = "os_string_pathbuf_leak", issue = "125965")]
+    #[inline]
+    pub fn leak<'a>(self) -> &'a mut Path {
+        Path::from_inner_mut(self.inner.leak())
+    }
+
     /// Extends `self` with `path`.
     ///
     /// If `path` is absolute, it replaces the current path.
@@ -1271,7 +1285,8 @@ impl PathBuf {
 
     fn _push(&mut self, path: &Path) {
         // in general, a separator is needed if the rightmost byte is not a separator
-        let mut need_sep = self.as_mut_vec().last().map(|c| !is_sep_byte(*c)).unwrap_or(false);
+        let buf = self.inner.as_encoded_bytes();
+        let mut need_sep = buf.last().map(|c| !is_sep_byte(*c)).unwrap_or(false);
 
         // in the special case of `C:` on Windows, do *not* add a separator
         let comps = self.components();
@@ -1285,7 +1300,7 @@ impl PathBuf {
 
         // absolute `path` replaces `self`
         if path.is_absolute() || path.prefix().is_some() {
-            self.as_mut_vec().truncate(0);
+            self.inner.truncate(0);
 
         // verbatim paths need . and .. removed
         } else if comps.prefix_verbatim() && !path.inner.is_empty() {
@@ -1330,7 +1345,7 @@ impl PathBuf {
         // `path` has a root but no prefix, e.g., `\windows` (Windows only)
         } else if path.has_root() {
             let prefix_len = self.components().prefix_remaining();
-            self.as_mut_vec().truncate(prefix_len);
+            self.inner.truncate(prefix_len);
 
         // `path` is a pure relative path
         } else if need_sep {
@@ -1363,7 +1378,7 @@ impl PathBuf {
     pub fn pop(&mut self) -> bool {
         match self.parent().map(|p| p.as_u8_slice().len()) {
             Some(len) => {
-                self.as_mut_vec().truncate(len);
+                self.inner.truncate(len);
                 true
             }
             None => false,
@@ -1425,6 +1440,11 @@ impl PathBuf {
     /// If `extension` is the empty string, [`self.extension`] will be [`None`]
     /// afterwards, not `Some("")`.
     ///
+    /// # Panics
+    ///
+    /// Panics if the passed extension contains a path separator (see
+    /// [`is_separator`]).
+    ///
     /// # Caveats
     ///
     /// The new `extension` may contain dots and will be used in its entirety,
@@ -1470,6 +1490,14 @@ impl PathBuf {
     }
 
     fn _set_extension(&mut self, extension: &OsStr) -> bool {
+        for &b in extension.as_encoded_bytes() {
+            if b < 128 {
+                if is_separator(b as char) {
+                    panic!("extension cannot contain path separators: {:?}", extension);
+                }
+            }
+        }
+
         let file_stem = match self.file_stem() {
             None => return false,
             Some(f) => f.as_encoded_bytes(),
@@ -1478,15 +1506,82 @@ impl PathBuf {
         // truncate until right after the file stem
         let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr();
         let start = self.inner.as_encoded_bytes().as_ptr().addr();
-        let v = self.as_mut_vec();
-        v.truncate(end_file_stem.wrapping_sub(start));
+        self.inner.truncate(end_file_stem.wrapping_sub(start));
 
         // add the new extension, if any
-        let new = extension.as_encoded_bytes();
+        let new = extension;
+        if !new.is_empty() {
+            self.inner.reserve_exact(new.len() + 1);
+            self.inner.push(OsStr::new("."));
+            self.inner.push(new);
+        }
+
+        true
+    }
+
+    /// Append [`self.extension`] with `extension`.
+    ///
+    /// Returns `false` and does nothing if [`self.file_name`] is [`None`],
+    /// returns `true` and updates the extension otherwise.
+    ///
+    /// # Caveats
+    ///
+    /// The appended `extension` may contain dots and will be used in its entirety,
+    /// but only the part after the final dot will be reflected in
+    /// [`self.extension`].
+    ///
+    /// See the examples below.
+    ///
+    /// [`self.file_name`]: Path::file_name
+    /// [`self.extension`]: Path::extension
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(path_add_extension)]
+    ///
+    /// use std::path::{Path, PathBuf};
+    ///
+    /// let mut p = PathBuf::from("/feel/the");
+    ///
+    /// p.add_extension("formatted");
+    /// assert_eq!(Path::new("/feel/the.formatted"), p.as_path());
+    ///
+    /// p.add_extension("dark.side");
+    /// assert_eq!(Path::new("/feel/the.formatted.dark.side"), p.as_path());
+    ///
+    /// p.set_extension("cookie");
+    /// assert_eq!(Path::new("/feel/the.formatted.dark.cookie"), p.as_path());
+    ///
+    /// p.set_extension("");
+    /// assert_eq!(Path::new("/feel/the.formatted.dark"), p.as_path());
+    ///
+    /// p.add_extension("");
+    /// assert_eq!(Path::new("/feel/the.formatted.dark"), p.as_path());
+    /// ```
+    #[unstable(feature = "path_add_extension", issue = "127292")]
+    pub fn add_extension<S: AsRef<OsStr>>(&mut self, extension: S) -> bool {
+        self._add_extension(extension.as_ref())
+    }
+
+    fn _add_extension(&mut self, extension: &OsStr) -> bool {
+        let file_name = match self.file_name() {
+            None => return false,
+            Some(f) => f.as_encoded_bytes(),
+        };
+
+        let new = extension;
         if !new.is_empty() {
-            v.reserve_exact(new.len() + 1);
-            v.push(b'.');
-            v.extend_from_slice(new);
+            // truncate until right after the file name
+            // this is necessary for trimming the trailing slash
+            let end_file_name = file_name[file_name.len()..].as_ptr().addr();
+            let start = self.inner.as_encoded_bytes().as_ptr().addr();
+            self.inner.truncate(end_file_name.wrapping_sub(start));
+
+            // append the new extension
+            self.inner.reserve_exact(new.len() + 1);
+            self.inner.push(OsStr::new("."));
+            self.inner.push(new);
         }
 
         true
@@ -2613,22 +2708,48 @@ impl Path {
             None => {
                 // Enough capacity for the extension and the dot
                 let capacity = self_len + extension.len() + 1;
-                let whole_path = self_bytes.iter();
+                let whole_path = self_bytes;
                 (capacity, whole_path)
             }
             Some(previous_extension) => {
                 let capacity = self_len + extension.len() - previous_extension.len();
-                let path_till_dot = self_bytes[..self_len - previous_extension.len()].iter();
+                let path_till_dot = &self_bytes[..self_len - previous_extension.len()];
                 (capacity, path_till_dot)
             }
         };
 
         let mut new_path = PathBuf::with_capacity(new_capacity);
-        new_path.as_mut_vec().extend(slice_to_copy);
+        new_path.inner.extend_from_slice(slice_to_copy);
         new_path.set_extension(extension);
         new_path
     }
 
+    /// Creates an owned [`PathBuf`] like `self` but with the extension added.
+    ///
+    /// See [`PathBuf::add_extension`] for more details.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(path_add_extension)]
+    ///
+    /// use std::path::{Path, PathBuf};
+    ///
+    /// let path = Path::new("foo.rs");
+    /// assert_eq!(path.with_added_extension("txt"), PathBuf::from("foo.rs.txt"));
+    ///
+    /// let path = Path::new("foo.tar.gz");
+    /// assert_eq!(path.with_added_extension(""), PathBuf::from("foo.tar.gz"));
+    /// assert_eq!(path.with_added_extension("xz"), PathBuf::from("foo.tar.gz.xz"));
+    /// assert_eq!(path.with_added_extension("").with_added_extension("txt"), PathBuf::from("foo.tar.gz.txt"));
+    /// ```
+    #[unstable(feature = "path_add_extension", issue = "127292")]
+    pub fn with_added_extension<S: AsRef<OsStr>>(&self, extension: S) -> PathBuf {
+        let mut new_path = self.to_path_buf();
+        new_path.add_extension(extension);
+        new_path
+    }
+
     /// Produces an iterator over the [`Component`]s of the path.
     ///
     /// When parsing the path, there is a small amount of normalization:
@@ -2875,6 +2996,8 @@ impl Path {
     /// prevent time-of-check to time-of-use (TOCTOU) bugs. You should only use it in scenarios
     /// where those bugs are not an issue.
     ///
+    /// This is an alias for [`std::fs::exists`](crate::fs::exists).
+    ///
     /// # Examples
     ///
     /// ```no_run
@@ -2887,7 +3010,7 @@ impl Path {
     #[stable(feature = "path_try_exists", since = "1.63.0")]
     #[inline]
     pub fn try_exists(&self) -> io::Result<bool> {
-        fs::try_exists(self)
+        fs::exists(self)
     }
 
     /// Returns `true` if the path exists on disk and is pointing at a regular file.
@@ -3069,15 +3192,19 @@ impl Hash for Path {
         let bytes = &bytes[prefix_len..];
 
         let mut component_start = 0;
-        let mut bytes_hashed = 0;
+        // track some extra state to avoid prefix collisions.
+        // ["foo", "bar"] and ["foobar"], will have the same payload bytes
+        // but result in different chunk_bits
+        let mut chunk_bits: usize = 0;
 
         for i in 0..bytes.len() {
             let is_sep = if verbatim { is_verbatim_sep(bytes[i]) } else { is_sep_byte(bytes[i]) };
             if is_sep {
                 if i > component_start {
                     let to_hash = &bytes[component_start..i];
+                    chunk_bits = chunk_bits.wrapping_add(to_hash.len());
+                    chunk_bits = chunk_bits.rotate_right(2);
                     h.write(to_hash);
-                    bytes_hashed += to_hash.len();
                 }
 
                 // skip over separator and optionally a following CurDir item
@@ -3098,11 +3225,12 @@ impl Hash for Path {
 
         if component_start < bytes.len() {
             let to_hash = &bytes[component_start..];
+            chunk_bits = chunk_bits.wrapping_add(to_hash.len());
+            chunk_bits = chunk_bits.rotate_right(2);
             h.write(to_hash);
-            bytes_hashed += to_hash.len();
         }
 
-        h.write_usize(bytes_hashed);
+        h.write_usize(chunk_bits);
     }
 }
 
@@ -3313,14 +3441,33 @@ impl Error for StripPrefixError {
 /// Makes the path absolute without accessing the filesystem.
 ///
 /// If the path is relative, the current directory is used as the base directory.
-/// All intermediate components will be resolved according to platforms-specific
-/// rules but unlike [`canonicalize`][crate::fs::canonicalize] this does not
+/// All intermediate components will be resolved according to platform-specific
+/// rules, but unlike [`canonicalize`][crate::fs::canonicalize], this does not
 /// resolve symlinks and may succeed even if the path does not exist.
 ///
 /// If the `path` is empty or getting the
-/// [current directory][crate::env::current_dir] fails then an error will be
+/// [current directory][crate::env::current_dir] fails, then an error will be
 /// returned.
 ///
+/// # Platform-specific behavior
+///
+/// On POSIX platforms, the path is resolved using [POSIX semantics][posix-semantics],
+/// except that it stops short of resolving symlinks. This means it will keep `..`
+/// components and trailing slashes.
+///
+/// On Windows, for verbatim paths, this will simply return the path as given. For other
+/// paths, this is currently equivalent to calling
+/// [`GetFullPathNameW`][windows-path].
+///
+/// Note that these [may change in the future][changes].
+///
+/// # Errors
+///
+/// This function may return an error in the following situations:
+///
+/// * If `path` is syntactically invalid; in particular, if it is empty.
+/// * If getting the [current directory][crate::env::current_dir] fails.
+///
 /// # Examples
 ///
 /// ## POSIX paths
@@ -3328,50 +3475,42 @@ impl Error for StripPrefixError {
 /// ```
 /// # #[cfg(unix)]
 /// fn main() -> std::io::Result<()> {
-///   use std::path::{self, Path};
+///     use std::path::{self, Path};
 ///
-///   // Relative to absolute
-///   let absolute = path::absolute("foo/./bar")?;
-///   assert!(absolute.ends_with("foo/bar"));
+///     // Relative to absolute
+///     let absolute = path::absolute("foo/./bar")?;
+///     assert!(absolute.ends_with("foo/bar"));
 ///
-///   // Absolute to absolute
-///   let absolute = path::absolute("/foo//test/.././bar.rs")?;
-///   assert_eq!(absolute, Path::new("/foo/test/../bar.rs"));
-///   Ok(())
+///     // Absolute to absolute
+///     let absolute = path::absolute("/foo//test/.././bar.rs")?;
+///     assert_eq!(absolute, Path::new("/foo/test/../bar.rs"));
+///     Ok(())
 /// }
 /// # #[cfg(not(unix))]
 /// # fn main() {}
 /// ```
 ///
-/// The path is resolved using [POSIX semantics][posix-semantics] except that
-/// it stops short of resolving symlinks. This means it will keep `..`
-/// components and trailing slashes.
-///
 /// ## Windows paths
 ///
 /// ```
 /// # #[cfg(windows)]
 /// fn main() -> std::io::Result<()> {
-///   use std::path::{self, Path};
+///     use std::path::{self, Path};
 ///
-///   // Relative to absolute
-///   let absolute = path::absolute("foo/./bar")?;
-///   assert!(absolute.ends_with(r"foo\bar"));
+///     // Relative to absolute
+///     let absolute = path::absolute("foo/./bar")?;
+///     assert!(absolute.ends_with(r"foo\bar"));
 ///
-///   // Absolute to absolute
-///   let absolute = path::absolute(r"C:\foo//test\..\./bar.rs")?;
+///     // Absolute to absolute
+///     let absolute = path::absolute(r"C:\foo//test\..\./bar.rs")?;
 ///
-///   assert_eq!(absolute, Path::new(r"C:\foo\bar.rs"));
-///   Ok(())
+///     assert_eq!(absolute, Path::new(r"C:\foo\bar.rs"));
+///     Ok(())
 /// }
 /// # #[cfg(not(windows))]
 /// # fn main() {}
 /// ```
 ///
-/// For verbatim paths this will simply return the path as given. For other
-/// paths this is currently equivalent to calling
-/// [`GetFullPathNameW`][windows-path].
-///
 /// Note that this [may change in the future][changes].
 ///
 /// [changes]: io#platform-specific-behavior
diff --git a/std/src/path/tests.rs b/std/src/path/tests.rs
index fde6ed4f0c057..3ade4fb892f5e 100644
--- a/std/src/path/tests.rs
+++ b/std/src/path/tests.rs
@@ -126,6 +126,16 @@ fn into() {
     assert_eq!(static_cow_path, owned_cow_path);
 }
 
+#[test]
+fn test_pathbuf_leak() {
+    let string = "/have/a/cake".to_owned();
+    let (len, cap) = (string.len(), string.capacity());
+    let buf = PathBuf::from(string);
+    let leaked = buf.leak();
+    assert_eq!(leaked.as_os_str().as_encoded_bytes(), b"/have/a/cake");
+    unsafe { drop(String::from_raw_parts(leaked.as_mut_os_str() as *mut OsStr as _, len, cap)) }
+}
+
 #[test]
 #[cfg(unix)]
 pub fn test_decompositions_unix() {
@@ -1391,6 +1401,37 @@ pub fn test_set_extension() {
     tfe!("/", "foo", "/", false);
 }
 
+#[test]
+pub fn test_add_extension() {
+    macro_rules! tfe (
+        ($path:expr, $ext:expr, $expected:expr, $output:expr) => ({
+            let mut p = PathBuf::from($path);
+            let output = p.add_extension($ext);
+            assert!(p.to_str() == Some($expected) && output == $output,
+                    "adding extension of {:?} to {:?}: Expected {:?}/{:?}, got {:?}/{:?}",
+                    $path, $ext, $expected, $output,
+                    p.to_str().unwrap(), output);
+        });
+    );
+
+    tfe!("foo", "txt", "foo.txt", true);
+    tfe!("foo.bar", "txt", "foo.bar.txt", true);
+    tfe!("foo.bar.baz", "txt", "foo.bar.baz.txt", true);
+    tfe!(".test", "txt", ".test.txt", true);
+    tfe!("foo.txt", "", "foo.txt", true);
+    tfe!("foo", "", "foo", true);
+    tfe!("", "foo", "", false);
+    tfe!(".", "foo", ".", false);
+    tfe!("foo/", "bar", "foo.bar", true);
+    tfe!("foo/.", "bar", "foo.bar", true);
+    tfe!("..", "foo", "..", false);
+    tfe!("foo/..", "bar", "foo/..", false);
+    tfe!("/", "foo", "/", false);
+
+    // edge cases
+    tfe!("/foo.ext////", "bar", "/foo.ext.bar", true);
+}
+
 #[test]
 pub fn test_with_extension() {
     macro_rules! twe (
@@ -1431,6 +1472,49 @@ pub fn test_with_extension() {
     twe!("ccc.bbb_bbb", "aaa_aaa_aaa", "ccc.aaa_aaa_aaa");
 }
 
+#[test]
+pub fn test_with_added_extension() {
+    macro_rules! twe (
+        ($input:expr, $extension:expr, $expected:expr) => ({
+            let input = Path::new($input);
+            let output = input.with_added_extension($extension);
+
+            assert!(
+                output.to_str() == Some($expected),
+                "calling Path::new({:?}).with_added_extension({:?}): Expected {:?}, got {:?}",
+                $input, $extension, $expected, output,
+            );
+        });
+    );
+
+    twe!("foo", "txt", "foo.txt");
+    twe!("foo.bar", "txt", "foo.bar.txt");
+    twe!("foo.bar.baz", "txt", "foo.bar.baz.txt");
+    twe!(".test", "txt", ".test.txt");
+    twe!("foo.txt", "", "foo.txt");
+    twe!("foo", "", "foo");
+    twe!("", "foo", "");
+    twe!(".", "foo", ".");
+    twe!("foo/", "bar", "foo.bar");
+    twe!("foo/.", "bar", "foo.bar");
+    twe!("..", "foo", "..");
+    twe!("foo/..", "bar", "foo/..");
+    twe!("/", "foo", "/");
+
+    // edge cases
+    twe!("/foo.ext////", "bar", "/foo.ext.bar");
+
+    // New extension is smaller than file name
+    twe!("aaa_aaa_aaa", "bbb_bbb", "aaa_aaa_aaa.bbb_bbb");
+    // New extension is greater than file name
+    twe!("bbb_bbb", "aaa_aaa_aaa", "bbb_bbb.aaa_aaa_aaa");
+
+    // New extension is smaller than previous extension
+    twe!("ccc.aaa_aaa_aaa", "bbb_bbb", "ccc.aaa_aaa_aaa.bbb_bbb");
+    // New extension is greater than previous extension
+    twe!("ccc.bbb_bbb", "aaa_aaa_aaa", "ccc.bbb_bbb.aaa_aaa_aaa");
+}
+
 #[test]
 fn test_eq_receivers() {
     use crate::borrow::Cow;
@@ -1535,6 +1619,20 @@ pub fn test_compare() {
     relative_from: Some("")
     );
 
+    tc!("foo//", "foo",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
+    tc!("foo///", "foo",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
     tc!("foo/.", "foo",
     eq: true,
     starts_with: true,
@@ -1549,6 +1647,20 @@ pub fn test_compare() {
     relative_from: Some("")
     );
 
+    tc!("foo/.//bar", "foo/bar",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
+    tc!("foo//./bar", "foo/bar",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
     tc!("foo/bar", "foo",
     eq: false,
     starts_with: true,
@@ -1556,6 +1668,13 @@ pub fn test_compare() {
     relative_from: Some("bar")
     );
 
+    tc!("foo/bar", "foobar",
+    eq: false,
+    starts_with: false,
+    ends_with: false,
+    relative_from: None
+    );
+
     tc!("foo/bar/baz", "foo/bar",
     eq: false,
     starts_with: true,
@@ -1803,6 +1922,29 @@ fn test_windows_absolute() {
     assert_eq!(absolute(r"COM1").unwrap().as_os_str(), Path::new(r"\\.\COM1").as_os_str());
 }
 
+#[test]
+#[should_panic = "path separator"]
+fn test_extension_path_sep() {
+    let mut path = PathBuf::from("path/to/file");
+    path.set_extension("d/../../../../../etc/passwd");
+}
+
+#[test]
+#[should_panic = "path separator"]
+#[cfg(windows)]
+fn test_extension_path_sep_alternate() {
+    let mut path = PathBuf::from("path/to/file");
+    path.set_extension("d\\test");
+}
+
+#[test]
+#[cfg(not(windows))]
+fn test_extension_path_sep_alternate() {
+    let mut path = PathBuf::from("path/to/file");
+    path.set_extension("d\\test");
+    assert_eq!(path, Path::new("path/to/file.d\\test"));
+}
+
 #[bench]
 #[cfg_attr(miri, ignore)] // Miri isn't fast...
 fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) {
diff --git a/std/src/prelude/common.rs b/std/src/prelude/common.rs
index 01936734d7548..b231bd871b3b4 100644
--- a/std/src/prelude/common.rs
+++ b/std/src/prelude/common.rs
@@ -2,6 +2,9 @@
 //!
 //! See the [module-level documentation](super) for more.
 
+// No formatting: this file is nothing but re-exports, and their order is worth preserving.
+#![cfg_attr(rustfmt, rustfmt::skip)]
+
 // Re-exported core operators
 #[stable(feature = "rust1", since = "1.0.0")]
 #[doc(no_inline)]
@@ -14,6 +17,9 @@ pub use crate::ops::{Drop, Fn, FnMut, FnOnce};
 #[stable(feature = "rust1", since = "1.0.0")]
 #[doc(no_inline)]
 pub use crate::mem::drop;
+#[stable(feature = "size_of_prelude", since = "1.80.0")]
+#[doc(no_inline)]
+pub use crate::mem::{align_of, align_of_val, size_of, size_of_val};
 
 // Re-exported types and traits
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/std/src/prelude/mod.rs b/std/src/prelude/mod.rs
index 0bdbab716adb4..0c610ba67e65c 100644
--- a/std/src/prelude/mod.rs
+++ b/std/src/prelude/mod.rs
@@ -36,6 +36,10 @@
 //!   operations for both destructors and overloading `()`.
 //! * <code>[std::mem]::[drop]</code>, a convenience function for explicitly
 //!   dropping a value.
+//! * <code>[std::mem]::{[size_of], [size_of_val]}</code>, to get the size of
+//!   a type or value.
+//! * <code>[std::mem]::{[align_of], [align_of_val]}</code>, to get the
+//!   alignment of a type or value.
 //! * <code>[std::boxed]::[Box]</code>, a way to allocate values on the heap.
 //! * <code>[std::borrow]::[ToOwned]</code>, the conversion trait that defines
 //!   [`to_owned`], the generic method for creating an owned type from a
@@ -91,6 +95,9 @@
 //! [book-enums]: ../../book/ch06-01-defining-an-enum.html
 //! [book-iter]: ../../book/ch13-02-iterators.html
 
+// No formatting: this file is nothing but re-exports, and their order is worth preserving.
+#![cfg_attr(rustfmt, rustfmt::skip)]
+
 #![stable(feature = "rust1", since = "1.0.0")]
 
 mod common;
diff --git a/std/src/process.rs b/std/src/process.rs
index c926c89f7a97f..fc86578a5ff2f 100644
--- a/std/src/process.rs
+++ b/std/src/process.rs
@@ -96,9 +96,9 @@
 //! child processes must agree on how the commandline string is encoded.
 //!
 //! Most programs use the standard C run-time `argv`, which in practice results
-//! in consistent argument handling. However some programs have their own way of
+//! in consistent argument handling. However, some programs have their own way of
 //! parsing the commandline string. In these cases using [`arg`] or [`args`] may
-//! result in the child process seeing a different array of arguments then the
+//! result in the child process seeing a different array of arguments than the
 //! parent process intended.
 //!
 //! Two ways of mitigating this are:
@@ -629,6 +629,25 @@ impl Command {
     ///     .spawn()
     ///     .expect("sh command failed to start");
     /// ```
+    ///
+    /// # Caveats
+    ///
+    /// [`Command::new`] is only intended to accept the path of the program. If you pass a program
+    /// path along with arguments like `Command::new("ls -l").spawn()`, it will try to search for
+    /// `ls -l` literally. The arguments need to be passed separately, such as via [`arg`] or
+    /// [`args`].
+    ///
+    /// ```no_run
+    /// use std::process::Command;
+    ///
+    /// Command::new("ls")
+    ///     .arg("-l") // arg passed separately
+    ///     .spawn()
+    ///     .expect("ls command failed to start");
+    /// ```
+    ///
+    /// [`arg`]: Self::arg
+    /// [`args`]: Self::args
     #[stable(feature = "process", since = "1.0.0")]
     pub fn new<S: AsRef<OsStr>>(program: S) -> Command {
         Command { inner: imp::Command::new(program.as_ref()) }
diff --git a/std/src/process/tests.rs b/std/src/process/tests.rs
index 07d4de5c1a26e..055601d030799 100644
--- a/std/src/process/tests.rs
+++ b/std/src/process/tests.rs
@@ -137,7 +137,7 @@ fn child_stdout_read_buf() {
     let child = cmd.spawn().unwrap();
 
     let mut stdout = child.stdout.unwrap();
-    let mut buf: [MaybeUninit<u8>; 128] = MaybeUninit::uninit_array();
+    let mut buf: [MaybeUninit<u8>; 128] = [MaybeUninit::uninit(); 128];
     let mut buf = BorrowedBuf::from(buf.as_mut_slice());
     stdout.read_buf(buf.unfilled()).unwrap();
 
@@ -385,29 +385,25 @@ fn test_interior_nul_in_env_value_is_error() {
 #[cfg(windows)]
 fn test_creation_flags() {
     use crate::os::windows::process::CommandExt;
-    use crate::sys::c::{BOOL, DWORD, INFINITE};
-    #[repr(C, packed)]
+    use crate::sys::c::{BOOL, INFINITE};
+    #[repr(C)]
     struct DEBUG_EVENT {
-        pub event_code: DWORD,
-        pub process_id: DWORD,
-        pub thread_id: DWORD,
+        pub event_code: u32,
+        pub process_id: u32,
+        pub thread_id: u32,
         // This is a union in the real struct, but we don't
         // need this data for the purposes of this test.
         pub _junk: [u8; 164],
     }
 
     extern "system" {
-        fn WaitForDebugEvent(lpDebugEvent: *mut DEBUG_EVENT, dwMilliseconds: DWORD) -> BOOL;
-        fn ContinueDebugEvent(
-            dwProcessId: DWORD,
-            dwThreadId: DWORD,
-            dwContinueStatus: DWORD,
-        ) -> BOOL;
+        fn WaitForDebugEvent(lpDebugEvent: *mut DEBUG_EVENT, dwMilliseconds: u32) -> BOOL;
+        fn ContinueDebugEvent(dwProcessId: u32, dwThreadId: u32, dwContinueStatus: u32) -> BOOL;
     }
 
-    const DEBUG_PROCESS: DWORD = 1;
-    const EXIT_PROCESS_DEBUG_EVENT: DWORD = 5;
-    const DBG_EXCEPTION_NOT_HANDLED: DWORD = 0x80010001;
+    const DEBUG_PROCESS: u32 = 1;
+    const EXIT_PROCESS_DEBUG_EVENT: u32 = 5;
+    const DBG_EXCEPTION_NOT_HANDLED: u32 = 0x80010001;
 
     let mut child =
         Command::new("cmd").creation_flags(DEBUG_PROCESS).stdin(Stdio::piped()).spawn().unwrap();
diff --git a/std/src/rt.rs b/std/src/rt.rs
index 46f691d7b7504..deb4a8fa7eed0 100644
--- a/std/src/rt.rs
+++ b/std/src/rt.rs
@@ -16,7 +16,6 @@
 #![deny(unsafe_op_in_unsafe_fn)]
 #![allow(unused_macros)]
 
-// Re-export some of our utilities which are expected by other crates.
 pub use crate::panicking::{begin_panic, panic_count};
 pub use core::panicking::{panic_display, panic_fmt};
 
@@ -90,13 +89,14 @@ macro_rules! rtunwrap {
 // `compiler/rustc_session/src/config/sigpipe.rs`.
 #[cfg_attr(test, allow(dead_code))]
 unsafe fn init(argc: isize, argv: *const *const u8, sigpipe: u8) {
+    #[cfg_attr(target_os = "teeos", allow(unused_unsafe))]
     unsafe {
-        sys::init(argc, argv, sigpipe);
+        sys::init(argc, argv, sigpipe)
+    };
 
-        // Set up the current thread to give it the right name.
-        let thread = Thread::new_main();
-        thread::set_current(thread);
-    }
+    // Set up the current thread to give it the right name.
+    let thread = Thread::new_main();
+    thread::set_current(thread);
 }
 
 // One-time runtime cleanup.
@@ -144,6 +144,9 @@ fn lang_start_internal(
             rtabort!("drop of the panic payload panicked");
         });
     panic::catch_unwind(cleanup).map_err(rt_abort)?;
+    // Guard against multple threads calling `libc::exit` concurrently.
+    // See the documentation for `unique_thread_exit` for more information.
+    panic::catch_unwind(|| crate::sys::exit_guard::unique_thread_exit()).map_err(rt_abort)?;
     ret_code
 }
 
@@ -156,7 +159,7 @@ fn lang_start<T: crate::process::Termination + 'static>(
     sigpipe: u8,
 ) -> isize {
     let Ok(v) = lang_start_internal(
-        &move || crate::sys_common::backtrace::__rust_begin_short_backtrace(main).report().to_i32(),
+        &move || crate::sys::backtrace::__rust_begin_short_backtrace(main).report().to_i32(),
         argc,
         argv,
         sigpipe,
diff --git a/std/src/sync/barrier.rs b/std/src/sync/barrier.rs
index b4bac081e7ab7..82cc13a74b7f1 100644
--- a/std/src/sync/barrier.rs
+++ b/std/src/sync/barrier.rs
@@ -20,7 +20,7 @@ use crate::sync::{Condvar, Mutex};
 ///     let c = Arc::clone(&barrier);
 ///     // The same messages will be printed together.
 ///     // You will NOT see any interleaving.
-///     handles.push(thread::spawn(move|| {
+///     handles.push(thread::spawn(move || {
 ///         println!("before wait");
 ///         c.wait();
 ///         println!("after wait");
@@ -115,7 +115,7 @@ impl Barrier {
     ///     let c = Arc::clone(&barrier);
     ///     // The same messages will be printed together.
     ///     // You will NOT see any interleaving.
-    ///     handles.push(thread::spawn(move|| {
+    ///     handles.push(thread::spawn(move || {
     ///         println!("before wait");
     ///         c.wait();
     ///         println!("after wait");
diff --git a/std/src/sync/condvar.rs b/std/src/sync/condvar.rs
index b20574e4f1493..08d46f356d9f2 100644
--- a/std/src/sync/condvar.rs
+++ b/std/src/sync/condvar.rs
@@ -35,6 +35,7 @@ impl WaitTimeoutResult {
     /// let pair = Arc::new((Mutex::new(false), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
+    /// # let handle =
     /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///
@@ -58,6 +59,8 @@ impl WaitTimeoutResult {
     ///         break
     ///     }
     /// }
+    /// # // Prevent leaks for Miri.
+    /// # let _ = handle.join();
     /// ```
     #[must_use]
     #[stable(feature = "wait_timeout", since = "1.5.0")]
@@ -88,7 +91,7 @@ impl WaitTimeoutResult {
 /// let pair2 = Arc::clone(&pair);
 ///
 /// // Inside of our lock, spawn a new thread, and then wait for it to start.
-/// thread::spawn(move|| {
+/// thread::spawn(move || {
 ///     let (lock, cvar) = &*pair2;
 ///     let mut started = lock.lock().unwrap();
 ///     *started = true;
@@ -166,7 +169,7 @@ impl Condvar {
     /// let pair = Arc::new((Mutex::new(false), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
-    /// thread::spawn(move|| {
+    /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///     let mut started = lock.lock().unwrap();
     ///     *started = true;
@@ -221,7 +224,7 @@ impl Condvar {
     /// let pair = Arc::new((Mutex::new(true), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
-    /// thread::spawn(move|| {
+    /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///     let mut pending = lock.lock().unwrap();
     ///     *pending = false;
@@ -280,7 +283,7 @@ impl Condvar {
     /// let pair = Arc::new((Mutex::new(false), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
-    /// thread::spawn(move|| {
+    /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///     let mut started = lock.lock().unwrap();
     ///     *started = true;
@@ -352,7 +355,7 @@ impl Condvar {
     /// let pair = Arc::new((Mutex::new(false), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
-    /// thread::spawn(move|| {
+    /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///     let mut started = lock.lock().unwrap();
     ///     *started = true;
@@ -420,7 +423,7 @@ impl Condvar {
     /// let pair = Arc::new((Mutex::new(true), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
-    /// thread::spawn(move|| {
+    /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///     let mut pending = lock.lock().unwrap();
     ///     *pending = false;
@@ -484,7 +487,7 @@ impl Condvar {
     /// let pair = Arc::new((Mutex::new(false), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
-    /// thread::spawn(move|| {
+    /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///     let mut started = lock.lock().unwrap();
     ///     *started = true;
@@ -524,7 +527,7 @@ impl Condvar {
     /// let pair = Arc::new((Mutex::new(false), Condvar::new()));
     /// let pair2 = Arc::clone(&pair);
     ///
-    /// thread::spawn(move|| {
+    /// thread::spawn(move || {
     ///     let (lock, cvar) = &*pair2;
     ///     let mut started = lock.lock().unwrap();
     ///     *started = true;
diff --git a/std/src/sync/lazy_lock.rs b/std/src/sync/lazy_lock.rs
index 27b59cfc8c24d..18906aceffa30 100644
--- a/std/src/sync/lazy_lock.rs
+++ b/std/src/sync/lazy_lock.rs
@@ -29,40 +29,28 @@ union Data<T, F> {
 /// # Examples
 ///
 /// Initialize static variables with `LazyLock`.
-///
 /// ```
-/// #![feature(lazy_cell)]
-///
-/// use std::collections::HashMap;
-///
 /// use std::sync::LazyLock;
 ///
-/// static HASHMAP: LazyLock<HashMap<i32, String>> = LazyLock::new(|| {
-///     println!("initializing");
-///     let mut m = HashMap::new();
-///     m.insert(13, "Spica".to_string());
-///     m.insert(74, "Hoyten".to_string());
-///     m
+/// // n.b. static items do not call [`Drop`] on program termination, so this won't be deallocated.
+/// // this is fine, as the OS can deallocate the terminated program faster than we can free memory
+/// // but tools like valgrind might report "memory leaks" as it isn't obvious this is intentional.
+/// static DEEP_THOUGHT: LazyLock<String> = LazyLock::new(|| {
+/// # mod another_crate {
+/// #     pub fn great_question() -> String { "42".to_string() }
+/// # }
+///     // M3 Ultra takes about 16 million years in --release config
+///     another_crate::great_question()
 /// });
 ///
-/// fn main() {
-///     println!("ready");
-///     std::thread::spawn(|| {
-///         println!("{:?}", HASHMAP.get(&13));
-///     }).join().unwrap();
-///     println!("{:?}", HASHMAP.get(&74));
-///
-///     // Prints:
-///     //   ready
-///     //   initializing
-///     //   Some("Spica")
-///     //   Some("Hoyten")
-/// }
+/// // The `String` is built, stored in the `LazyLock`, and returned as `&String`.
+/// let _ = &*DEEP_THOUGHT;
+/// // The `String` is retrieved from the `LazyLock` and returned as `&String`.
+/// let _ = &*DEEP_THOUGHT;
 /// ```
+///
 /// Initialize fields with `LazyLock`.
 /// ```
-/// #![feature(lazy_cell)]
-///
 /// use std::sync::LazyLock;
 ///
 /// #[derive(Debug)]
@@ -76,8 +64,7 @@ union Data<T, F> {
 ///     println!("{}", *data.number);
 /// }
 /// ```
-
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 pub struct LazyLock<T, F = fn() -> T> {
     once: Once,
     data: UnsafeCell<Data<T, F>>,
@@ -85,8 +72,21 @@ pub struct LazyLock<T, F = fn() -> T> {
 
 impl<T, F: FnOnce() -> T> LazyLock<T, F> {
     /// Creates a new lazy value with the given initializing function.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::sync::LazyLock;
+    ///
+    /// let hello = "Hello, World!".to_string();
+    ///
+    /// let lazy = LazyLock::new(|| hello.to_uppercase());
+    ///
+    /// assert_eq!(&*lazy, "HELLO, WORLD!");
+    /// ```
     #[inline]
-    #[unstable(feature = "lazy_cell", issue = "109736")]
+    #[stable(feature = "lazy_cell", since = "1.80.0")]
+    #[rustc_const_stable(feature = "lazy_cell", since = "1.80.0")]
     pub const fn new(f: F) -> LazyLock<T, F> {
         LazyLock { once: Once::new(), data: UnsafeCell::new(Data { f: ManuallyDrop::new(f) }) }
     }
@@ -107,8 +107,7 @@ impl<T, F: FnOnce() -> T> LazyLock<T, F> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(lazy_cell)]
-    /// #![feature(lazy_cell_consume)]
+    /// #![feature(lazy_cell_into_inner)]
     ///
     /// use std::sync::LazyLock;
     ///
@@ -119,7 +118,7 @@ impl<T, F: FnOnce() -> T> LazyLock<T, F> {
     /// assert_eq!(&*lazy, "HELLO, WORLD!");
     /// assert_eq!(LazyLock::into_inner(lazy).ok(), Some("HELLO, WORLD!".to_string()));
     /// ```
-    #[unstable(feature = "lazy_cell_consume", issue = "109736")]
+    #[unstable(feature = "lazy_cell_into_inner", issue = "125623")]
     pub fn into_inner(mut this: Self) -> Result<T, F> {
         let state = this.once.state();
         match state {
@@ -145,8 +144,6 @@ impl<T, F: FnOnce() -> T> LazyLock<T, F> {
     /// # Examples
     ///
     /// ```
-    /// #![feature(lazy_cell)]
-    ///
     /// use std::sync::LazyLock;
     ///
     /// let lazy = LazyLock::new(|| 92);
@@ -155,7 +152,7 @@ impl<T, F: FnOnce() -> T> LazyLock<T, F> {
     /// assert_eq!(&*lazy, &92);
     /// ```
     #[inline]
-    #[unstable(feature = "lazy_cell", issue = "109736")]
+    #[stable(feature = "lazy_cell", since = "1.80.0")]
     pub fn force(this: &LazyLock<T, F>) -> &T {
         this.once.call_once(|| {
             // SAFETY: `call_once` only runs this closure once, ever.
@@ -191,7 +188,7 @@ impl<T, F> LazyLock<T, F> {
     }
 }
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T, F> Drop for LazyLock<T, F> {
     fn drop(&mut self) {
         match self.once.state() {
@@ -204,7 +201,7 @@ impl<T, F> Drop for LazyLock<T, F> {
     }
 }
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T, F: FnOnce() -> T> Deref for LazyLock<T, F> {
     type Target = T;
 
@@ -219,7 +216,7 @@ impl<T, F: FnOnce() -> T> Deref for LazyLock<T, F> {
     }
 }
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T: Default> Default for LazyLock<T> {
     /// Creates a new lazy value using `Default` as the initializing function.
     #[inline]
@@ -228,7 +225,7 @@ impl<T: Default> Default for LazyLock<T> {
     }
 }
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T: fmt::Debug, F> fmt::Debug for LazyLock<T, F> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let mut d = f.debug_tuple("LazyLock");
@@ -242,13 +239,13 @@ impl<T: fmt::Debug, F> fmt::Debug for LazyLock<T, F> {
 
 // We never create a `&F` from a `&LazyLock<T, F>` so it is fine
 // to not impl `Sync` for `F`.
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 unsafe impl<T: Sync + Send, F: Send> Sync for LazyLock<T, F> {}
 // auto-derived `Send` impl is OK.
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T: RefUnwindSafe + UnwindSafe, F: UnwindSafe> RefUnwindSafe for LazyLock<T, F> {}
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 impl<T: UnwindSafe, F: UnwindSafe> UnwindSafe for LazyLock<T, F> {}
 
 #[cfg(test)]
diff --git a/std/src/sync/mod.rs b/std/src/sync/mod.rs
index e8c35bd48a70b..9a38c42f43a02 100644
--- a/std/src/sync/mod.rs
+++ b/std/src/sync/mod.rs
@@ -133,10 +133,14 @@
 //! - [`Mutex`]: Mutual Exclusion mechanism, which ensures that at
 //!   most one thread at a time is able to access some data.
 //!
-//! - [`Once`]: Used for a thread-safe, one-time global initialization routine
+//! - [`Once`]: Used for a thread-safe, one-time global initialization routine.
+//!   Mostly useful for implementing other types like `OnceLock`.
 //!
 //! - [`OnceLock`]: Used for thread-safe, one-time initialization of a
-//!   global variable.
+//!   variable, with potentially different initializers based on the caller.
+//!
+//! - [`LazyLock`]: Used for thread-safe, one-time initialization of a
+//!   variable, using one nullary initializer function provided at creation.
 //!
 //! - [`RwLock`]: Provides a mutual exclusion mechanism which allows
 //!   multiple readers at the same time, while allowing only one
@@ -179,7 +183,7 @@ pub use self::rwlock::{MappedRwLockReadGuard, MappedRwLockWriteGuard};
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use self::rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 
-#[unstable(feature = "lazy_cell", issue = "109736")]
+#[stable(feature = "lazy_cell", since = "1.80.0")]
 pub use self::lazy_lock::LazyLock;
 #[stable(feature = "once_cell", since = "1.70.0")]
 pub use self::once_lock::OnceLock;
diff --git a/std/src/sync/mpmc/array.rs b/std/src/sync/mpmc/array.rs
index 492e21d9bdb63..185319add745f 100644
--- a/std/src/sync/mpmc/array.rs
+++ b/std/src/sync/mpmc/array.rs
@@ -200,11 +200,12 @@ impl<T> Channel<T> {
             return Err(msg);
         }
 
-        let slot: &Slot<T> = &*(token.array.slot as *const Slot<T>);
-
         // Write the message into the slot and update the stamp.
-        slot.msg.get().write(MaybeUninit::new(msg));
-        slot.stamp.store(token.array.stamp, Ordering::Release);
+        unsafe {
+            let slot: &Slot<T> = &*(token.array.slot as *const Slot<T>);
+            slot.msg.get().write(MaybeUninit::new(msg));
+            slot.stamp.store(token.array.stamp, Ordering::Release);
+        }
 
         // Wake a sleeping receiver.
         self.receivers.notify();
@@ -291,11 +292,14 @@ impl<T> Channel<T> {
             return Err(());
         }
 
-        let slot: &Slot<T> = &*(token.array.slot as *const Slot<T>);
-
         // Read the message from the slot and update the stamp.
-        let msg = slot.msg.get().read().assume_init();
-        slot.stamp.store(token.array.stamp, Ordering::Release);
+        let msg = unsafe {
+            let slot: &Slot<T> = &*(token.array.slot as *const Slot<T>);
+
+            let msg = slot.msg.get().read().assume_init();
+            slot.stamp.store(token.array.stamp, Ordering::Release);
+            msg
+        };
 
         // Wake a sleeping sender.
         self.senders.notify();
@@ -471,7 +475,7 @@ impl<T> Channel<T> {
             false
         };
 
-        self.discard_all_messages(tail);
+        unsafe { self.discard_all_messages(tail) };
         disconnected
     }
 
diff --git a/std/src/sync/mpmc/counter.rs b/std/src/sync/mpmc/counter.rs
index a5a6bdc67f13f..3478cf41dc9d2 100644
--- a/std/src/sync/mpmc/counter.rs
+++ b/std/src/sync/mpmc/counter.rs
@@ -63,7 +63,7 @@ impl<C> Sender<C> {
             disconnect(&self.counter().chan);
 
             if self.counter().destroy.swap(true, Ordering::AcqRel) {
-                drop(Box::from_raw(self.counter));
+                drop(unsafe { Box::from_raw(self.counter) });
             }
         }
     }
@@ -116,7 +116,7 @@ impl<C> Receiver<C> {
             disconnect(&self.counter().chan);
 
             if self.counter().destroy.swap(true, Ordering::AcqRel) {
-                drop(Box::from_raw(self.counter));
+                drop(unsafe { Box::from_raw(self.counter) });
             }
         }
     }
diff --git a/std/src/sync/mpmc/list.rs b/std/src/sync/mpmc/list.rs
index 9e7148c716cda..edac7a0cb1835 100644
--- a/std/src/sync/mpmc/list.rs
+++ b/std/src/sync/mpmc/list.rs
@@ -91,7 +91,7 @@ impl<T> Block<T> {
         // It is not necessary to set the `DESTROY` bit in the last slot because that slot has
         // begun destruction of the block.
         for i in start..BLOCK_CAP - 1 {
-            let slot = (*this).slots.get_unchecked(i);
+            let slot = unsafe { (*this).slots.get_unchecked(i) };
 
             // Mark the `DESTROY` bit if a thread is still using the slot.
             if slot.state.load(Ordering::Acquire) & READ == 0
@@ -103,7 +103,7 @@ impl<T> Block<T> {
         }
 
         // No thread is using the block, now it is safe to destroy it.
-        drop(Box::from_raw(this));
+        drop(unsafe { Box::from_raw(this) });
     }
 }
 
@@ -265,9 +265,11 @@ impl<T> Channel<T> {
         // Write the message into the slot.
         let block = token.list.block as *mut Block<T>;
         let offset = token.list.offset;
-        let slot = (*block).slots.get_unchecked(offset);
-        slot.msg.get().write(MaybeUninit::new(msg));
-        slot.state.fetch_or(WRITE, Ordering::Release);
+        unsafe {
+            let slot = (*block).slots.get_unchecked(offset);
+            slot.msg.get().write(MaybeUninit::new(msg));
+            slot.state.fetch_or(WRITE, Ordering::Release);
+        }
 
         // Wake a sleeping receiver.
         self.receivers.notify();
@@ -369,19 +371,21 @@ impl<T> Channel<T> {
         // Read the message.
         let block = token.list.block as *mut Block<T>;
         let offset = token.list.offset;
-        let slot = (*block).slots.get_unchecked(offset);
-        slot.wait_write();
-        let msg = slot.msg.get().read().assume_init();
-
-        // Destroy the block if we've reached the end, or if another thread wanted to destroy but
-        // couldn't because we were busy reading from the slot.
-        if offset + 1 == BLOCK_CAP {
-            Block::destroy(block, 0);
-        } else if slot.state.fetch_or(READ, Ordering::AcqRel) & DESTROY != 0 {
-            Block::destroy(block, offset + 1);
-        }
+        unsafe {
+            let slot = (*block).slots.get_unchecked(offset);
+            slot.wait_write();
+            let msg = slot.msg.get().read().assume_init();
+
+            // Destroy the block if we've reached the end, or if another thread wanted to destroy but
+            // couldn't because we were busy reading from the slot.
+            if offset + 1 == BLOCK_CAP {
+                Block::destroy(block, 0);
+            } else if slot.state.fetch_or(READ, Ordering::AcqRel) & DESTROY != 0 {
+                Block::destroy(block, offset + 1);
+            }
 
-        Ok(msg)
+            Ok(msg)
+        }
     }
 
     /// Attempts to send a message into the channel.
diff --git a/std/src/sync/mpmc/zero.rs b/std/src/sync/mpmc/zero.rs
index 1b82713edc748..6d1c9d64e7a7a 100644
--- a/std/src/sync/mpmc/zero.rs
+++ b/std/src/sync/mpmc/zero.rs
@@ -103,9 +103,11 @@ impl<T> Channel<T> {
             return Err(msg);
         }
 
-        let packet = &*(token.zero.0 as *const Packet<T>);
-        packet.msg.get().write(Some(msg));
-        packet.ready.store(true, Ordering::Release);
+        unsafe {
+            let packet = &*(token.zero.0 as *const Packet<T>);
+            packet.msg.get().write(Some(msg));
+            packet.ready.store(true, Ordering::Release);
+        }
         Ok(())
     }
 
@@ -116,22 +118,24 @@ impl<T> Channel<T> {
             return Err(());
         }
 
-        let packet = &*(token.zero.0 as *const Packet<T>);
+        let packet = unsafe { &*(token.zero.0 as *const Packet<T>) };
 
         if packet.on_stack {
             // The message has been in the packet from the beginning, so there is no need to wait
             // for it. However, after reading the message, we need to set `ready` to `true` in
             // order to signal that the packet can be destroyed.
-            let msg = packet.msg.get().replace(None).unwrap();
+            let msg = unsafe { packet.msg.get().replace(None) }.unwrap();
             packet.ready.store(true, Ordering::Release);
             Ok(msg)
         } else {
             // Wait until the message becomes available, then read it and destroy the
             // heap-allocated packet.
             packet.wait_ready();
-            let msg = packet.msg.get().replace(None).unwrap();
-            drop(Box::from_raw(token.zero.0 as *mut Packet<T>));
-            Ok(msg)
+            unsafe {
+                let msg = packet.msg.get().replace(None).unwrap();
+                drop(Box::from_raw(token.zero.0 as *mut Packet<T>));
+                Ok(msg)
+            }
         }
     }
 
diff --git a/std/src/sync/mpsc/mod.rs b/std/src/sync/mpsc/mod.rs
index d353c7bd5de9e..feee6948db4fd 100644
--- a/std/src/sync/mpsc/mod.rs
+++ b/std/src/sync/mpsc/mod.rs
@@ -51,7 +51,7 @@
 //!
 //! // Create a simple streaming channel
 //! let (tx, rx) = channel();
-//! thread::spawn(move|| {
+//! thread::spawn(move || {
 //!     tx.send(10).unwrap();
 //! });
 //! assert_eq!(rx.recv().unwrap(), 10);
@@ -69,7 +69,7 @@
 //! let (tx, rx) = channel();
 //! for i in 0..10 {
 //!     let tx = tx.clone();
-//!     thread::spawn(move|| {
+//!     thread::spawn(move || {
 //!         tx.send(i).unwrap();
 //!     });
 //! }
@@ -99,7 +99,7 @@
 //! use std::sync::mpsc::sync_channel;
 //!
 //! let (tx, rx) = sync_channel::<i32>(0);
-//! thread::spawn(move|| {
+//! thread::spawn(move || {
 //!     // This will wait for the parent thread to start receiving
 //!     tx.send(53).unwrap();
 //! });
@@ -510,7 +510,7 @@ pub enum TrySendError<T> {
 /// let (sender, receiver) = channel();
 ///
 /// // Spawn off an expensive computation
-/// thread::spawn(move|| {
+/// thread::spawn(move || {
 /// #   fn expensive_computation() {}
 ///     sender.send(expensive_computation()).unwrap();
 /// });
@@ -561,7 +561,7 @@ pub fn channel<T>() -> (Sender<T>, Receiver<T>) {
 /// // this returns immediately
 /// sender.send(1).unwrap();
 ///
-/// thread::spawn(move|| {
+/// thread::spawn(move || {
 ///     // this will block until the previous message has been received
 ///     sender.send(2).unwrap();
 /// });
diff --git a/std/src/sync/once.rs b/std/src/sync/once.rs
index 608229fd674d8..9d969af8c6d84 100644
--- a/std/src/sync/once.rs
+++ b/std/src/sync/once.rs
@@ -10,9 +10,15 @@ use crate::fmt;
 use crate::panic::{RefUnwindSafe, UnwindSafe};
 use crate::sys::sync as sys;
 
-/// A synchronization primitive which can be used to run a one-time global
-/// initialization. Useful for one-time initialization for FFI or related
-/// functionality. This type can only be constructed with [`Once::new()`].
+/// A low-level synchronization primitive for one-time global execution.
+///
+/// Previously this was the only "execute once" synchronization in `std`.
+/// Other libraries implemented novel synchronizing types with `Once`, like
+/// [`OnceLock<T>`] or [`LazyLock<T, F>`], before those were added to `std`.
+/// `OnceLock<T>` in particular supersedes `Once` in functionality and should
+/// be preferred for the common case where the `Once` is associated with data.
+///
+/// This type can only be constructed with [`Once::new()`].
 ///
 /// # Examples
 ///
@@ -25,6 +31,9 @@ use crate::sys::sync as sys;
 ///     // run initialization here
 /// });
 /// ```
+///
+/// [`OnceLock<T>`]: crate::sync::OnceLock
+/// [`LazyLock<T, F>`]: crate::sync::LazyLock
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct Once {
     inner: sys::Once,
diff --git a/std/src/sync/once_lock.rs b/std/src/sync/once_lock.rs
index fc830baccedd2..94955beaf37b7 100644
--- a/std/src/sync/once_lock.rs
+++ b/std/src/sync/once_lock.rs
@@ -5,50 +5,20 @@ use crate::mem::MaybeUninit;
 use crate::panic::{RefUnwindSafe, UnwindSafe};
 use crate::sync::Once;
 
-/// A synchronization primitive which can be written to only once.
+/// A synchronization primitive which can nominally be written to only once.
 ///
 /// This type is a thread-safe [`OnceCell`], and can be used in statics.
+/// In many simple cases, you can use [`LazyLock<T, F>`] instead to get the benefits of this type
+/// with less effort: `LazyLock<T, F>` "looks like" `&T` because it initializes with `F` on deref!
+/// Where OnceLock shines is when LazyLock is too simple to support a given case, as LazyLock
+/// doesn't allow additional inputs to its function after you call [`LazyLock::new(|| ...)`].
 ///
 /// [`OnceCell`]: crate::cell::OnceCell
+/// [`LazyLock<T, F>`]: crate::sync::LazyLock
+/// [`LazyLock::new(|| ...)`]: crate::sync::LazyLock::new
 ///
 /// # Examples
 ///
-/// Using `OnceLock` to store a function’s previously computed value (a.k.a.
-/// ‘lazy static’ or ‘memoizing’):
-///
-/// ```
-/// use std::sync::OnceLock;
-///
-/// struct DeepThought {
-///     answer: String,
-/// }
-///
-/// impl DeepThought {
-/// #   fn great_question() -> String {
-/// #       "42".to_string()
-/// #   }
-/// #
-///     fn new() -> Self {
-///         Self {
-///             // M3 Ultra takes about 16 million years in --release config
-///             answer: Self::great_question(),
-///         }
-///     }
-/// }
-///
-/// fn computation() -> &'static DeepThought {
-///     // n.b. static items do not call [`Drop`] on program termination, so if
-///     // [`DeepThought`] impls Drop, that will not be used for this instance.
-///     static COMPUTATION: OnceLock<DeepThought> = OnceLock::new();
-///     COMPUTATION.get_or_init(|| DeepThought::new())
-/// }
-///
-/// // The `DeepThought` is built, stored in the `OnceLock`, and returned.
-/// let _ = computation().answer;
-/// // The `DeepThought` is retrieved from the `OnceLock` and returned.
-/// let _ = computation().answer;
-/// ```
-///
 /// Writing to a `OnceLock` from a separate thread:
 ///
 /// ```
@@ -73,6 +43,62 @@ use crate::sync::Once;
 ///     Some(&12345),
 /// );
 /// ```
+///
+/// You can use `OnceLock` to implement a type that requires "append-only" logic:
+///
+/// ```
+/// use std::sync::{OnceLock, atomic::{AtomicU32, Ordering}};
+/// use std::thread;
+///
+/// struct OnceList<T> {
+///     data: OnceLock<T>,
+///     next: OnceLock<Box<OnceList<T>>>,
+/// }
+/// impl<T> OnceList<T> {
+///     const fn new() -> OnceList<T> {
+///         OnceList { data: OnceLock::new(), next: OnceLock::new() }
+///     }
+///     fn push(&self, value: T) {
+///         // FIXME: this impl is concise, but is also slow for long lists or many threads.
+///         // as an exercise, consider how you might improve on it while preserving the behavior
+///         if let Err(value) = self.data.set(value) {
+///             let next = self.next.get_or_init(|| Box::new(OnceList::new()));
+///             next.push(value)
+///         };
+///     }
+///     fn contains(&self, example: &T) -> bool
+///     where
+///         T: PartialEq,
+///     {
+///         self.data.get().map(|item| item == example).filter(|v| *v).unwrap_or_else(|| {
+///             self.next.get().map(|next| next.contains(example)).unwrap_or(false)
+///         })
+///     }
+/// }
+///
+/// // Let's exercise this new Sync append-only list by doing a little counting
+/// static LIST: OnceList<u32> = OnceList::new();
+/// static COUNTER: AtomicU32 = AtomicU32::new(0);
+///
+/// # const LEN: u32 = if cfg!(miri) { 50 } else { 1000 };
+/// # /*
+/// const LEN: u32 = 1000;
+/// # */
+/// thread::scope(|s| {
+///     for _ in 0..thread::available_parallelism().unwrap().get() {
+///         s.spawn(|| {
+///             while let i @ 0..LEN = COUNTER.fetch_add(1, Ordering::Relaxed) {
+///                 LIST.push(i);
+///             }
+///         });
+///     }
+/// });
+///
+/// for i in 0..LEN {
+///     assert!(LIST.contains(&i));
+/// }
+///
+/// ```
 #[stable(feature = "once_cell", since = "1.70.0")]
 pub struct OnceLock<T> {
     once: Once,
@@ -476,7 +502,7 @@ impl<T> OnceLock<T> {
     #[inline]
     unsafe fn get_unchecked(&self) -> &T {
         debug_assert!(self.is_initialized());
-        (&*self.value.get()).assume_init_ref()
+        unsafe { (&*self.value.get()).assume_init_ref() }
     }
 
     /// # Safety
@@ -485,7 +511,7 @@ impl<T> OnceLock<T> {
     #[inline]
     unsafe fn get_unchecked_mut(&mut self) -> &mut T {
         debug_assert!(self.is_initialized());
-        (&mut *self.value.get()).assume_init_mut()
+        unsafe { (&mut *self.value.get()).assume_init_mut() }
     }
 }
 
diff --git a/std/src/sync/reentrant_lock.rs b/std/src/sync/reentrant_lock.rs
index 80b9e0cf15214..042c439394e06 100644
--- a/std/src/sync/reentrant_lock.rs
+++ b/std/src/sync/reentrant_lock.rs
@@ -116,6 +116,9 @@ pub struct ReentrantLockGuard<'a, T: ?Sized + 'a> {
 #[unstable(feature = "reentrant_lock", issue = "121440")]
 impl<T: ?Sized> !Send for ReentrantLockGuard<'_, T> {}
 
+#[unstable(feature = "reentrant_lock", issue = "121440")]
+unsafe impl<T: ?Sized + Sync> Sync for ReentrantLockGuard<'_, T> {}
+
 #[unstable(feature = "reentrant_lock", issue = "121440")]
 impl<T> ReentrantLock<T> {
     /// Creates a new re-entrant lock in an unlocked state ready for use.
@@ -241,7 +244,9 @@ impl<T: ?Sized> ReentrantLock<T> {
     }
 
     unsafe fn increment_lock_count(&self) -> Option<()> {
-        *self.lock_count.get() = (*self.lock_count.get()).checked_add(1)?;
+        unsafe {
+            *self.lock_count.get() = (*self.lock_count.get()).checked_add(1)?;
+        }
         Some(())
     }
 }
diff --git a/std/src/sync/rwlock.rs b/std/src/sync/rwlock.rs
index e0a8a7603d71a..a4ec52a4abe63 100644
--- a/std/src/sync/rwlock.rs
+++ b/std/src/sync/rwlock.rs
@@ -578,7 +578,7 @@ impl<'rwlock, T: ?Sized> RwLockReadGuard<'rwlock, T> {
     // successfully called from the same thread before instantiating this object.
     unsafe fn new(lock: &'rwlock RwLock<T>) -> LockResult<RwLockReadGuard<'rwlock, T>> {
         poison::map_result(lock.poison.borrow(), |()| RwLockReadGuard {
-            data: NonNull::new_unchecked(lock.data.get()),
+            data: unsafe { NonNull::new_unchecked(lock.data.get()) },
             inner_lock: &lock.inner,
         })
     }
diff --git a/std/src/sys_common/backtrace.rs b/std/src/sys/backtrace.rs
similarity index 84%
rename from std/src/sys_common/backtrace.rs
rename to std/src/sys/backtrace.rs
index 67711dbd5bc75..7401d8ce32087 100644
--- a/std/src/sys_common/backtrace.rs
+++ b/std/src/sys/backtrace.rs
@@ -1,50 +1,47 @@
+//! Common code for printing backtraces.
+
 use crate::backtrace_rs::{self, BacktraceFmt, BytesOrWideString, PrintFmt};
 use crate::borrow::Cow;
-/// Common code for printing the backtrace in the same way across the different
-/// supported platforms.
 use crate::env;
 use crate::fmt;
 use crate::io;
 use crate::io::prelude::*;
 use crate::path::{self, Path, PathBuf};
-use crate::sync::{Mutex, PoisonError};
+use crate::sync::{Mutex, MutexGuard, PoisonError};
 
 /// Max number of frames to print.
 const MAX_NB_FRAMES: usize = 100;
 
-pub fn lock() -> impl Drop {
+pub(crate) struct BacktraceLock<'a>(#[allow(dead_code)] MutexGuard<'a, ()>);
+
+pub(crate) fn lock<'a>() -> BacktraceLock<'a> {
     static LOCK: Mutex<()> = Mutex::new(());
-    LOCK.lock().unwrap_or_else(PoisonError::into_inner)
+    BacktraceLock(LOCK.lock().unwrap_or_else(PoisonError::into_inner))
 }
 
-/// Prints the current backtrace.
-pub fn print(w: &mut dyn Write, format: PrintFmt) -> io::Result<()> {
-    // There are issues currently linking libbacktrace into tests, and in
-    // general during std's own unit tests we're not testing this path. In
-    // test mode immediately return here to optimize away any references to the
-    // libbacktrace symbols
-    if cfg!(test) {
-        return Ok(());
-    }
-
-    // Use a lock to prevent mixed output in multithreading context.
-    // Some platforms also requires it, like `SymFromAddr` on Windows.
-    unsafe {
-        let _lock = lock();
-        _print(w, format)
-    }
-}
+impl BacktraceLock<'_> {
+    /// Prints the current backtrace.
+    ///
+    /// NOTE: this function is not Sync. The caller must hold a mutex lock, or there must be only one thread in the program.
+    pub(crate) fn print(&mut self, w: &mut dyn Write, format: PrintFmt) -> io::Result<()> {
+        // There are issues currently linking libbacktrace into tests, and in
+        // general during std's own unit tests we're not testing this path. In
+        // test mode immediately return here to optimize away any references to the
+        // libbacktrace symbols
+        if cfg!(test) {
+            return Ok(());
+        }
 
-unsafe fn _print(w: &mut dyn Write, format: PrintFmt) -> io::Result<()> {
-    struct DisplayBacktrace {
-        format: PrintFmt,
-    }
-    impl fmt::Display for DisplayBacktrace {
-        fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
-            unsafe { _print_fmt(fmt, self.format) }
+        struct DisplayBacktrace {
+            format: PrintFmt,
+        }
+        impl fmt::Display for DisplayBacktrace {
+            fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+                unsafe { _print_fmt(fmt, self.format) }
+            }
         }
+        write!(w, "{}", DisplayBacktrace { format })
     }
-    write!(w, "{}", DisplayBacktrace { format })
 }
 
 unsafe fn _print_fmt(fmt: &mut fmt::Formatter<'_>, print_fmt: PrintFmt) -> fmt::Result {
diff --git a/std/src/sys/exit_guard.rs b/std/src/sys/exit_guard.rs
new file mode 100644
index 0000000000000..5a090f506661d
--- /dev/null
+++ b/std/src/sys/exit_guard.rs
@@ -0,0 +1,72 @@
+cfg_if::cfg_if! {
+    if #[cfg(target_os = "linux")] {
+        /// pthread_t is a pointer on some platforms,
+        /// so we wrap it in this to impl Send + Sync.
+        #[derive(Clone, Copy)]
+        #[repr(transparent)]
+        struct PThread(libc::pthread_t);
+        // Safety: pthread_t is safe to send between threads
+        unsafe impl Send for PThread {}
+        // Safety: pthread_t is safe to share between threads
+        unsafe impl Sync for PThread {}
+        /// Mitigation for <https://github.com/rust-lang/rust/issues/126600>
+        ///
+        /// On glibc, `libc::exit` has been observed to not always be thread-safe.
+        /// It is currently unclear whether that is a glibc bug or allowed by the standard.
+        /// To mitigate this problem, we ensure that only one
+        /// Rust thread calls `libc::exit` (or returns from `main`) by calling this function before
+        /// calling `libc::exit` (or returning from `main`).
+        ///
+        /// Technically, this is not enough to ensure soundness, since other code directly calling
+        /// `libc::exit` will still race with this.
+        ///
+        /// *This function does not itself call `libc::exit`.* This is so it can also be used
+        /// to guard returning from `main`.
+        ///
+        /// This function will return only the first time it is called in a process.
+        ///
+        /// * If it is called again on the same thread as the first call, it will abort.
+        /// * If it is called again on a different thread, it will wait in a loop
+        ///   (waiting for the process to exit).
+        #[cfg_attr(any(test, doctest), allow(dead_code))]
+        pub(crate) fn unique_thread_exit() {
+            let this_thread_id = unsafe { libc::pthread_self() };
+            use crate::sync::{Mutex, PoisonError};
+            static EXITING_THREAD_ID: Mutex<Option<PThread>> = Mutex::new(None);
+            let mut exiting_thread_id =
+                EXITING_THREAD_ID.lock().unwrap_or_else(PoisonError::into_inner);
+            match *exiting_thread_id {
+                None => {
+                    // This is the first thread to call `unique_thread_exit`,
+                    // and this is the first time it is called.
+                    // Set EXITING_THREAD_ID to this thread's ID and return.
+                    *exiting_thread_id = Some(PThread(this_thread_id));
+                },
+                Some(exiting_thread_id) if exiting_thread_id.0 == this_thread_id => {
+                    // This is the first thread to call `unique_thread_exit`,
+                    // but this is the second time it is called.
+                    // Abort the process.
+                    core::panicking::panic_nounwind("std::process::exit called re-entrantly")
+                }
+                Some(_) => {
+                    // This is not the first thread to call `unique_thread_exit`.
+                    // Pause until the process exits.
+                    drop(exiting_thread_id);
+                    loop {
+                        // Safety: libc::pause is safe to call.
+                        unsafe { libc::pause(); }
+                    }
+                }
+            }
+        }
+    } else {
+        /// Mitigation for <https://github.com/rust-lang/rust/issues/126600>
+        ///
+        /// Mitigation is ***NOT*** implemented on this platform, either because this platform
+        /// is not affected, or because mitigation is not yet implemented for this platform.
+        #[cfg_attr(any(test, doctest), allow(dead_code))]
+        pub(crate) fn unique_thread_exit() {
+            // Mitigation not required on platforms where `exit` is thread-safe.
+        }
+    }
+}
diff --git a/std/src/sys/mod.rs b/std/src/sys/mod.rs
index 8f70cefc60121..e50758ce00d8b 100644
--- a/std/src/sys/mod.rs
+++ b/std/src/sys/mod.rs
@@ -1,3 +1,5 @@
+#![allow(unsafe_op_in_unsafe_fn)]
+
 /// The PAL (platform abstraction layer) contains platform-specific abstractions
 /// for implementing the features in the other submodules, e.g. UNIX file
 /// descriptors.
@@ -5,7 +7,9 @@ mod pal;
 
 mod personality;
 
+pub mod backtrace;
 pub mod cmath;
+pub mod exit_guard;
 pub mod os_str;
 pub mod path;
 pub mod sync;
diff --git a/std/src/sys/os_str/bytes.rs b/std/src/sys/os_str/bytes.rs
index 18b969bca85a6..2a7477e3afc20 100644
--- a/std/src/sys/os_str/bytes.rs
+++ b/std/src/sys/os_str/bytes.rs
@@ -176,6 +176,11 @@ impl Buf {
         self.inner.extend_from_slice(&s.inner)
     }
 
+    #[inline]
+    pub fn leak<'a>(self) -> &'a mut Slice {
+        unsafe { mem::transmute(self.inner.leak()) }
+    }
+
     #[inline]
     pub fn into_box(self) -> Box<Slice> {
         unsafe { mem::transmute(self.inner.into_boxed_slice()) }
@@ -197,10 +202,20 @@ impl Buf {
         self.as_slice().into_rc()
     }
 
-    /// Part of a hack to make PathBuf::push/pop more efficient.
+    /// Provides plumbing to core `Vec::truncate`.
+    /// More well behaving alternative to allowing outer types
+    /// full mutable access to the core `Vec`.
+    #[inline]
+    pub(crate) fn truncate(&mut self, len: usize) {
+        self.inner.truncate(len);
+    }
+
+    /// Provides plumbing to core `Vec::extend_from_slice`.
+    /// More well behaving alternative to allowing outer types
+    /// full mutable access to the core `Vec`.
     #[inline]
-    pub(crate) fn as_mut_vec_for_path_buf(&mut self) -> &mut Vec<u8> {
-        &mut self.inner
+    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+        self.inner.extend_from_slice(other);
     }
 }
 
diff --git a/std/src/sys/os_str/mod.rs b/std/src/sys/os_str/mod.rs
index b509729475bf7..345e661586d03 100644
--- a/std/src/sys/os_str/mod.rs
+++ b/std/src/sys/os_str/mod.rs
@@ -1,3 +1,5 @@
+#![forbid(unsafe_op_in_unsafe_fn)]
+
 cfg_if::cfg_if! {
     if #[cfg(any(
         target_os = "windows",
diff --git a/std/src/sys/os_str/wtf8.rs b/std/src/sys/os_str/wtf8.rs
index b3ceb55802dc5..806bf033dbc94 100644
--- a/std/src/sys/os_str/wtf8.rs
+++ b/std/src/sys/os_str/wtf8.rs
@@ -1,5 +1,5 @@
-/// The underlying OsString/OsStr implementation on Windows is a
-/// wrapper around the "WTF-8" encoding; see the `wtf8` module for more.
+//! The underlying OsString/OsStr implementation on Windows is a
+//! wrapper around the "WTF-8" encoding; see the `wtf8` module for more.
 use crate::borrow::Cow;
 use crate::collections::TryReserveError;
 use crate::fmt;
@@ -70,7 +70,7 @@ impl Buf {
 
     #[inline]
     pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self {
-        Self { inner: Wtf8Buf::from_bytes_unchecked(s) }
+        unsafe { Self { inner: Wtf8Buf::from_bytes_unchecked(s) } }
     }
 
     pub fn with_capacity(capacity: usize) -> Buf {
@@ -138,6 +138,11 @@ impl Buf {
         self.inner.shrink_to(min_capacity)
     }
 
+    #[inline]
+    pub fn leak<'a>(self) -> &'a mut Slice {
+        unsafe { mem::transmute(self.inner.leak()) }
+    }
+
     #[inline]
     pub fn into_box(self) -> Box<Slice> {
         unsafe { mem::transmute(self.inner.into_box()) }
@@ -159,10 +164,20 @@ impl Buf {
         self.as_slice().into_rc()
     }
 
-    /// Part of a hack to make PathBuf::push/pop more efficient.
+    /// Provides plumbing to core `Vec::truncate`.
+    /// More well behaving alternative to allowing outer types
+    /// full mutable access to the core `Vec`.
+    #[inline]
+    pub(crate) fn truncate(&mut self, len: usize) {
+        self.inner.truncate(len);
+    }
+
+    /// Provides plumbing to core `Vec::extend_from_slice`.
+    /// More well behaving alternative to allowing outer types
+    /// full mutable access to the core `Vec`.
     #[inline]
-    pub(crate) fn as_mut_vec_for_path_buf(&mut self) -> &mut Vec<u8> {
-        self.inner.as_mut_vec_for_path_buf()
+    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+        self.inner.extend_from_slice(other);
     }
 }
 
@@ -174,7 +189,7 @@ impl Slice {
 
     #[inline]
     pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice {
-        mem::transmute(Wtf8::from_bytes_unchecked(s))
+        unsafe { mem::transmute(Wtf8::from_bytes_unchecked(s)) }
     }
 
     #[track_caller]
diff --git a/std/src/sys/pal/hermit/fd.rs b/std/src/sys/pal/hermit/fd.rs
index d7dab08cfbd57..3c52b85de23a2 100644
--- a/std/src/sys/pal/hermit/fd.rs
+++ b/std/src/sys/pal/hermit/fd.rs
@@ -1,7 +1,8 @@
 #![unstable(reason = "not public", issue = "none", feature = "fd")]
 
 use super::hermit_abi;
-use crate::io::{self, Read};
+use crate::cmp;
+use crate::io::{self, IoSlice, IoSliceMut, Read};
 use crate::os::hermit::io::{FromRawFd, OwnedFd, RawFd};
 use crate::sys::cvt;
 use crate::sys::unsupported;
@@ -9,6 +10,10 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
 
 use crate::os::hermit::io::*;
 
+const fn max_iov() -> usize {
+    hermit_abi::IOV_MAX
+}
+
 #[derive(Debug)]
 pub struct FileDesc {
     fd: OwnedFd,
@@ -21,6 +26,22 @@ impl FileDesc {
         Ok(result as usize)
     }
 
+    pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
+        let ret = cvt(unsafe {
+            hermit_abi::readv(
+                self.as_raw_fd(),
+                bufs.as_mut_ptr() as *mut hermit_abi::iovec as *const hermit_abi::iovec,
+                cmp::min(bufs.len(), max_iov()),
+            )
+        })?;
+        Ok(ret as usize)
+    }
+
+    #[inline]
+    pub fn is_read_vectored(&self) -> bool {
+        true
+    }
+
     pub fn read_to_end(&self, buf: &mut Vec<u8>) -> io::Result<usize> {
         let mut me = self;
         (&mut me).read_to_end(buf)
@@ -32,6 +53,22 @@ impl FileDesc {
         Ok(result as usize)
     }
 
+    pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
+        let ret = cvt(unsafe {
+            hermit_abi::writev(
+                self.as_raw_fd(),
+                bufs.as_ptr() as *const hermit_abi::iovec,
+                cmp::min(bufs.len(), max_iov()),
+            )
+        })?;
+        Ok(ret as usize)
+    }
+
+    #[inline]
+    pub fn is_write_vectored(&self) -> bool {
+        true
+    }
+
     pub fn duplicate(&self) -> io::Result<FileDesc> {
         self.duplicate_path(&[])
     }
diff --git a/std/src/sys/pal/hermit/fs.rs b/std/src/sys/pal/hermit/fs.rs
index a4a16e6e86b0c..e4e9eee044efa 100644
--- a/std/src/sys/pal/hermit/fs.rs
+++ b/std/src/sys/pal/hermit/fs.rs
@@ -1,7 +1,7 @@
 use super::fd::FileDesc;
 use super::hermit_abi::{
     self, dirent64, stat as stat_struct, DT_DIR, DT_LNK, DT_REG, DT_UNKNOWN, O_APPEND, O_CREAT,
-    O_EXCL, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG,
+    O_DIRECTORY, O_EXCL, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG,
 };
 use crate::ffi::{CStr, OsStr, OsString};
 use crate::fmt;
@@ -18,7 +18,7 @@ use crate::sys::time::SystemTime;
 use crate::sys::unsupported;
 use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
 
-pub use crate::sys_common::fs::{copy, try_exists};
+pub use crate::sys_common::fs::{copy, exists};
 
 #[derive(Debug)]
 pub struct File(FileDesc);
@@ -62,7 +62,7 @@ pub struct DirEntry {
     /// 64-bit inode number
     ino: u64,
     /// File type
-    type_: u32,
+    type_: u8,
     /// name of the entry
     name: OsString,
 }
@@ -90,7 +90,7 @@ pub struct FilePermissions {
 
 #[derive(Copy, Clone, Eq, Debug)]
 pub struct FileType {
-    mode: u32,
+    mode: u8,
 }
 
 impl PartialEq for FileType {
@@ -112,31 +112,23 @@ pub struct DirBuilder {
 
 impl FileAttr {
     pub fn modified(&self) -> io::Result<SystemTime> {
-        Ok(SystemTime::new(
-            self.stat_val.st_mtime.try_into().unwrap(),
-            self.stat_val.st_mtime_nsec.try_into().unwrap(),
-        ))
+        Ok(SystemTime::new(self.stat_val.st_mtim.tv_sec, self.stat_val.st_mtim.tv_nsec))
     }
 
     pub fn accessed(&self) -> io::Result<SystemTime> {
-        Ok(SystemTime::new(
-            self.stat_val.st_atime.try_into().unwrap(),
-            self.stat_val.st_atime_nsec.try_into().unwrap(),
-        ))
+        Ok(SystemTime::new(self.stat_val.st_atim.tv_sec, self.stat_val.st_atim.tv_nsec))
     }
 
     pub fn created(&self) -> io::Result<SystemTime> {
-        Ok(SystemTime::new(
-            self.stat_val.st_ctime.try_into().unwrap(),
-            self.stat_val.st_ctime_nsec.try_into().unwrap(),
-        ))
+        Ok(SystemTime::new(self.stat_val.st_ctim.tv_sec, self.stat_val.st_ctim.tv_nsec))
     }
 
     pub fn size(&self) -> u64 {
         self.stat_val.st_size as u64
     }
+
     pub fn perm(&self) -> FilePermissions {
-        FilePermissions { mode: (self.stat_val.st_mode) }
+        FilePermissions { mode: self.stat_val.st_mode }
     }
 
     pub fn file_type(&self) -> FileType {
@@ -220,7 +212,7 @@ impl Iterator for ReadDir {
                 let entry = DirEntry {
                     root: self.inner.root.clone(),
                     ino: dir.d_ino,
-                    type_: dir.d_type as u32,
+                    type_: dir.d_type,
                     name: OsString::from_vec(name_bytes.to_vec()),
                 };
 
@@ -251,7 +243,7 @@ impl DirEntry {
     }
 
     pub fn file_type(&self) -> io::Result<FileType> {
-        Ok(FileType { mode: self.type_ as u32 })
+        Ok(FileType { mode: self.type_ })
     }
 
     #[allow(dead_code)]
@@ -385,12 +377,12 @@ impl File {
     }
 
     pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
-        crate::io::default_read_vectored(|buf| self.read(buf), bufs)
+        self.0.read_vectored(bufs)
     }
 
     #[inline]
     pub fn is_read_vectored(&self) -> bool {
-        false
+        self.0.is_read_vectored()
     }
 
     pub fn read_buf(&self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
@@ -402,12 +394,12 @@ impl File {
     }
 
     pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
-        crate::io::default_write_vectored(|buf| self.write(buf), bufs)
+        self.0.write_vectored(bufs)
     }
 
     #[inline]
     pub fn is_write_vectored(&self) -> bool {
-        false
+        self.0.is_write_vectored()
     }
 
     #[inline]
@@ -439,13 +431,13 @@ impl DirBuilder {
 
     pub fn mkdir(&self, path: &Path) -> io::Result<()> {
         run_path_with_cstr(path, &|path| {
-            cvt(unsafe { hermit_abi::mkdir(path.as_ptr(), self.mode) }).map(|_| ())
+            cvt(unsafe { hermit_abi::mkdir(path.as_ptr(), self.mode.into()) }).map(|_| ())
         })
     }
 
     #[allow(dead_code)]
     pub fn set_mode(&mut self, mode: u32) {
-        self.mode = mode as u32;
+        self.mode = mode;
     }
 }
 
@@ -501,8 +493,9 @@ impl FromRawFd for File {
 }
 
 pub fn readdir(path: &Path) -> io::Result<ReadDir> {
-    let fd_raw =
-        run_path_with_cstr(path, &|path| cvt(unsafe { hermit_abi::opendir(path.as_ptr()) }))?;
+    let fd_raw = run_path_with_cstr(path, &|path| {
+        cvt(unsafe { hermit_abi::open(path.as_ptr(), O_RDONLY | O_DIRECTORY, 0) })
+    })?;
     let fd = unsafe { FileDesc::from_raw_fd(fd_raw as i32) };
     let root = path.to_path_buf();
 
diff --git a/std/src/sys/pal/hermit/futex.rs b/std/src/sys/pal/hermit/futex.rs
index 571b288565871..21c5facd52fbd 100644
--- a/std/src/sys/pal/hermit/futex.rs
+++ b/std/src/sys/pal/hermit/futex.rs
@@ -3,6 +3,11 @@ use crate::ptr::null;
 use crate::sync::atomic::AtomicU32;
 use crate::time::Duration;
 
+/// An atomic for use as a futex that is at least 8-bits but may be larger.
+pub type SmallAtomic = AtomicU32;
+/// Must be the underlying type of SmallAtomic
+pub type SmallPrimitive = u32;
+
 pub fn futex_wait(futex: &AtomicU32, expected: u32, timeout: Option<Duration>) -> bool {
     // Calculate the timeout as a relative timespec.
     //
@@ -10,7 +15,7 @@ pub fn futex_wait(futex: &AtomicU32, expected: u32, timeout: Option<Duration>) -
     let timespec = timeout.and_then(|dur| {
         Some(hermit_abi::timespec {
             tv_sec: dur.as_secs().try_into().ok()?,
-            tv_nsec: dur.subsec_nanos().into(),
+            tv_nsec: dur.subsec_nanos().try_into().ok()?,
         })
     });
 
diff --git a/std/src/sys/pal/hermit/io.rs b/std/src/sys/pal/hermit/io.rs
new file mode 100644
index 0000000000000..9de7b53e53c03
--- /dev/null
+++ b/std/src/sys/pal/hermit/io.rs
@@ -0,0 +1,82 @@
+use crate::marker::PhantomData;
+use crate::os::hermit::io::{AsFd, AsRawFd};
+use crate::slice;
+
+use hermit_abi::{c_void, iovec};
+
+#[derive(Copy, Clone)]
+#[repr(transparent)]
+pub struct IoSlice<'a> {
+    vec: iovec,
+    _p: PhantomData<&'a [u8]>,
+}
+
+impl<'a> IoSlice<'a> {
+    #[inline]
+    pub fn new(buf: &'a [u8]) -> IoSlice<'a> {
+        IoSlice {
+            vec: iovec { iov_base: buf.as_ptr() as *mut u8 as *mut c_void, iov_len: buf.len() },
+            _p: PhantomData,
+        }
+    }
+
+    #[inline]
+    pub fn advance(&mut self, n: usize) {
+        if self.vec.iov_len < n {
+            panic!("advancing IoSlice beyond its length");
+        }
+
+        unsafe {
+            self.vec.iov_len -= n;
+            self.vec.iov_base = self.vec.iov_base.add(n);
+        }
+    }
+
+    #[inline]
+    pub fn as_slice(&self) -> &[u8] {
+        unsafe { slice::from_raw_parts(self.vec.iov_base as *mut u8, self.vec.iov_len) }
+    }
+}
+
+#[repr(transparent)]
+pub struct IoSliceMut<'a> {
+    vec: iovec,
+    _p: PhantomData<&'a mut [u8]>,
+}
+
+impl<'a> IoSliceMut<'a> {
+    #[inline]
+    pub fn new(buf: &'a mut [u8]) -> IoSliceMut<'a> {
+        IoSliceMut {
+            vec: iovec { iov_base: buf.as_mut_ptr() as *mut c_void, iov_len: buf.len() },
+            _p: PhantomData,
+        }
+    }
+
+    #[inline]
+    pub fn advance(&mut self, n: usize) {
+        if self.vec.iov_len < n {
+            panic!("advancing IoSliceMut beyond its length");
+        }
+
+        unsafe {
+            self.vec.iov_len -= n;
+            self.vec.iov_base = self.vec.iov_base.add(n);
+        }
+    }
+
+    #[inline]
+    pub fn as_slice(&self) -> &[u8] {
+        unsafe { slice::from_raw_parts(self.vec.iov_base as *mut u8, self.vec.iov_len) }
+    }
+
+    #[inline]
+    pub fn as_mut_slice(&mut self) -> &mut [u8] {
+        unsafe { slice::from_raw_parts_mut(self.vec.iov_base as *mut u8, self.vec.iov_len) }
+    }
+}
+
+pub fn is_terminal(fd: &impl AsFd) -> bool {
+    let fd = fd.as_fd();
+    hermit_abi::isatty(fd.as_raw_fd())
+}
diff --git a/std/src/sys/pal/hermit/mod.rs b/std/src/sys/pal/hermit/mod.rs
index a64323a3a296e..55583b89d6714 100644
--- a/std/src/sys/pal/hermit/mod.rs
+++ b/std/src/sys/pal/hermit/mod.rs
@@ -23,7 +23,6 @@ pub mod env;
 pub mod fd;
 pub mod fs;
 pub mod futex;
-#[path = "../unsupported/io.rs"]
 pub mod io;
 pub mod net;
 pub mod os;
@@ -33,9 +32,6 @@ pub mod pipe;
 pub mod process;
 pub mod stdio;
 pub mod thread;
-pub mod thread_local_dtor;
-#[path = "../unsupported/thread_local_key.rs"]
-pub mod thread_local_key;
 pub mod time;
 
 use crate::io::ErrorKind;
@@ -98,7 +94,6 @@ pub unsafe extern "C" fn runtime_entry(
     argv: *const *const c_char,
     env: *const *const c_char,
 ) -> ! {
-    use thread_local_dtor::run_dtors;
     extern "C" {
         fn main(argc: isize, argv: *const *const c_char) -> i32;
     }
@@ -108,7 +103,7 @@ pub unsafe extern "C" fn runtime_entry(
 
     let result = main(argc as isize, argv);
 
-    run_dtors();
+    crate::sys::thread_local::destructors::run();
     hermit_abi::exit(result);
 }
 
diff --git a/std/src/sys/pal/hermit/net.rs b/std/src/sys/pal/hermit/net.rs
index 00dbca86a4bae..6016d50eba085 100644
--- a/std/src/sys/pal/hermit/net.rs
+++ b/std/src/sys/pal/hermit/net.rs
@@ -175,12 +175,12 @@ impl Socket {
     }
 
     pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
-        crate::io::default_read_vectored(|b| self.read(b), bufs)
+        self.0.read_vectored(bufs)
     }
 
     #[inline]
     pub fn is_read_vectored(&self) -> bool {
-        false
+        self.0.is_read_vectored()
     }
 
     fn recv_from_with_flags(&self, buf: &mut [u8], flags: i32) -> io::Result<(usize, SocketAddr)> {
@@ -209,16 +209,15 @@ impl Socket {
     }
 
     pub fn write(&self, buf: &[u8]) -> io::Result<usize> {
-        let sz = cvt(unsafe { netc::write(self.0.as_raw_fd(), buf.as_ptr(), buf.len()) })?;
-        Ok(sz.try_into().unwrap())
+        self.0.write(buf)
     }
 
     pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
-        crate::io::default_write_vectored(|b| self.write(b), bufs)
+        self.0.write_vectored(bufs)
     }
 
     pub fn is_write_vectored(&self) -> bool {
-        false
+        self.0.is_write_vectored()
     }
 
     pub fn set_timeout(&self, dur: Option<Duration>, kind: i32) -> io::Result<()> {
@@ -265,7 +264,7 @@ impl Socket {
             Shutdown::Read => netc::SHUT_RD,
             Shutdown::Both => netc::SHUT_RDWR,
         };
-        cvt(unsafe { netc::shutdown_socket(self.as_raw_fd(), how) })?;
+        cvt(unsafe { netc::shutdown(self.as_raw_fd(), how) })?;
         Ok(())
     }
 
diff --git a/std/src/sys/pal/hermit/os.rs b/std/src/sys/pal/hermit/os.rs
index cc6781238319b..a7a73c756f216 100644
--- a/std/src/sys/pal/hermit/os.rs
+++ b/std/src/sys/pal/hermit/os.rs
@@ -172,18 +172,14 @@ pub fn getenv(k: &OsStr) -> Option<OsString> {
     unsafe { ENV.as_ref().unwrap().lock().unwrap().get_mut(k).cloned() }
 }
 
-pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
-    unsafe {
-        let (k, v) = (k.to_owned(), v.to_owned());
-        ENV.as_ref().unwrap().lock().unwrap().insert(k, v);
-    }
+pub unsafe fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
+    let (k, v) = (k.to_owned(), v.to_owned());
+    ENV.as_ref().unwrap().lock().unwrap().insert(k, v);
     Ok(())
 }
 
-pub fn unsetenv(k: &OsStr) -> io::Result<()> {
-    unsafe {
-        ENV.as_ref().unwrap().lock().unwrap().remove(k);
-    }
+pub unsafe fn unsetenv(k: &OsStr) -> io::Result<()> {
+    ENV.as_ref().unwrap().lock().unwrap().remove(k);
     Ok(())
 }
 
@@ -202,5 +198,5 @@ pub fn exit(code: i32) -> ! {
 }
 
 pub fn getpid() -> u32 {
-    unsafe { hermit_abi::getpid() }
+    unsafe { hermit_abi::getpid() as u32 }
 }
diff --git a/std/src/sys/pal/hermit/stdio.rs b/std/src/sys/pal/hermit/stdio.rs
index 777c57b391c89..3ea00f5cc5ec9 100644
--- a/std/src/sys/pal/hermit/stdio.rs
+++ b/std/src/sys/pal/hermit/stdio.rs
@@ -1,6 +1,9 @@
 use super::hermit_abi;
 use crate::io;
 use crate::io::{IoSlice, IoSliceMut};
+use crate::mem::ManuallyDrop;
+use crate::os::hermit::io::FromRawFd;
+use crate::sys::fd::FileDesc;
 
 pub struct Stdin;
 pub struct Stdout;
@@ -13,12 +16,14 @@ impl Stdin {
 }
 
 impl io::Read for Stdin {
-    fn read(&mut self, data: &mut [u8]) -> io::Result<usize> {
-        self.read_vectored(&mut [IoSliceMut::new(data)])
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        unsafe { ManuallyDrop::new(FileDesc::from_raw_fd(hermit_abi::STDIN_FILENO)).read(buf) }
     }
 
-    fn read_vectored(&mut self, _data: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
-        Ok(0)
+    fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
+        unsafe {
+            ManuallyDrop::new(FileDesc::from_raw_fd(hermit_abi::STDIN_FILENO)).read_vectored(bufs)
+        }
     }
 
     #[inline]
@@ -34,27 +39,13 @@ impl Stdout {
 }
 
 impl io::Write for Stdout {
-    fn write(&mut self, data: &[u8]) -> io::Result<usize> {
-        let len;
-
-        unsafe { len = hermit_abi::write(1, data.as_ptr() as *const u8, data.len()) }
-
-        if len < 0 {
-            Err(io::const_io_error!(io::ErrorKind::Uncategorized, "Stdout is not able to print"))
-        } else {
-            Ok(len as usize)
-        }
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        unsafe { ManuallyDrop::new(FileDesc::from_raw_fd(hermit_abi::STDOUT_FILENO)).write(buf) }
     }
 
-    fn write_vectored(&mut self, data: &[IoSlice<'_>]) -> io::Result<usize> {
-        let len;
-
-        unsafe { len = hermit_abi::write(1, data.as_ptr() as *const u8, data.len()) }
-
-        if len < 0 {
-            Err(io::const_io_error!(io::ErrorKind::Uncategorized, "Stdout is not able to print"))
-        } else {
-            Ok(len as usize)
+    fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
+        unsafe {
+            ManuallyDrop::new(FileDesc::from_raw_fd(hermit_abi::STDOUT_FILENO)).write_vectored(bufs)
         }
     }
 
@@ -75,27 +66,13 @@ impl Stderr {
 }
 
 impl io::Write for Stderr {
-    fn write(&mut self, data: &[u8]) -> io::Result<usize> {
-        let len;
-
-        unsafe { len = hermit_abi::write(2, data.as_ptr() as *const u8, data.len()) }
-
-        if len < 0 {
-            Err(io::const_io_error!(io::ErrorKind::Uncategorized, "Stderr is not able to print"))
-        } else {
-            Ok(len as usize)
-        }
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        unsafe { ManuallyDrop::new(FileDesc::from_raw_fd(hermit_abi::STDERR_FILENO)).write(buf) }
     }
 
-    fn write_vectored(&mut self, data: &[IoSlice<'_>]) -> io::Result<usize> {
-        let len;
-
-        unsafe { len = hermit_abi::write(2, data.as_ptr() as *const u8, data.len()) }
-
-        if len < 0 {
-            Err(io::const_io_error!(io::ErrorKind::Uncategorized, "Stderr is not able to print"))
-        } else {
-            Ok(len as usize)
+    fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
+        unsafe {
+            ManuallyDrop::new(FileDesc::from_raw_fd(hermit_abi::STDERR_FILENO)).write_vectored(bufs)
         }
     }
 
@@ -109,10 +86,10 @@ impl io::Write for Stderr {
     }
 }
 
-pub const STDIN_BUF_SIZE: usize = 0;
+pub const STDIN_BUF_SIZE: usize = 128;
 
-pub fn is_ebadf(_err: &io::Error) -> bool {
-    true
+pub fn is_ebadf(err: &io::Error) -> bool {
+    err.raw_os_error() == Some(hermit_abi::EBADF)
 }
 
 pub fn panic_output() -> Option<impl io::Write> {
diff --git a/std/src/sys/pal/hermit/thread.rs b/std/src/sys/pal/hermit/thread.rs
index b336dcd6860e4..a244b953d2a49 100644
--- a/std/src/sys/pal/hermit/thread.rs
+++ b/std/src/sys/pal/hermit/thread.rs
@@ -1,7 +1,6 @@
 #![allow(dead_code)]
 
 use super::hermit_abi;
-use super::thread_local_dtor::run_dtors;
 use crate::ffi::CStr;
 use crate::io;
 use crate::mem;
@@ -50,7 +49,7 @@ impl Thread {
                 Box::from_raw(ptr::with_exposed_provenance::<Box<dyn FnOnce()>>(main).cast_mut())();
 
                 // run all destructors
-                run_dtors();
+                crate::sys::thread_local::destructors::run();
             }
         }
     }
@@ -98,5 +97,5 @@ impl Thread {
 }
 
 pub fn available_parallelism() -> io::Result<NonZero<usize>> {
-    unsafe { Ok(NonZero::new_unchecked(hermit_abi::get_processor_count())) }
+    unsafe { Ok(NonZero::new_unchecked(hermit_abi::available_parallelism())) }
 }
diff --git a/std/src/sys/pal/hermit/thread_local_dtor.rs b/std/src/sys/pal/hermit/thread_local_dtor.rs
deleted file mode 100644
index 98adaf4bff1aa..0000000000000
--- a/std/src/sys/pal/hermit/thread_local_dtor.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-#![cfg(target_thread_local)]
-#![unstable(feature = "thread_local_internals", issue = "none")]
-
-// Simplify dtor registration by using a list of destructors.
-// The this solution works like the implementation of macOS and
-// doesn't additional OS support
-
-use crate::cell::RefCell;
-
-#[thread_local]
-static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
-
-pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    match DTORS.try_borrow_mut() {
-        Ok(mut dtors) => dtors.push((t, dtor)),
-        Err(_) => rtabort!("global allocator may not use TLS"),
-    }
-}
-
-// every thread call this function to run through all possible destructors
-pub unsafe fn run_dtors() {
-    let mut list = DTORS.take();
-    while !list.is_empty() {
-        for (ptr, dtor) in list {
-            dtor(ptr);
-        }
-        list = DTORS.take();
-    }
-}
diff --git a/std/src/sys/pal/hermit/time.rs b/std/src/sys/pal/hermit/time.rs
index 2bf24462fa825..e0cb7c2aa98a5 100644
--- a/std/src/sys/pal/hermit/time.rs
+++ b/std/src/sys/pal/hermit/time.rs
@@ -1,11 +1,13 @@
 #![allow(dead_code)]
 
-use super::hermit_abi::{self, timespec, CLOCK_MONOTONIC, CLOCK_REALTIME, NSEC_PER_SEC};
+use super::hermit_abi::{self, timespec, CLOCK_MONOTONIC, CLOCK_REALTIME};
 use crate::cmp::Ordering;
 use crate::ops::{Add, AddAssign, Sub, SubAssign};
 use crate::time::Duration;
 use core::hash::{Hash, Hasher};
 
+const NSEC_PER_SEC: i32 = 1_000_000_000;
+
 #[derive(Copy, Clone, Debug)]
 struct Timespec {
     t: timespec,
@@ -16,8 +18,8 @@ impl Timespec {
         Timespec { t: timespec { tv_sec: 0, tv_nsec: 0 } }
     }
 
-    const fn new(tv_sec: i64, tv_nsec: i64) -> Timespec {
-        assert!(tv_nsec >= 0 && tv_nsec < NSEC_PER_SEC as i64);
+    const fn new(tv_sec: i64, tv_nsec: i32) -> Timespec {
+        assert!(tv_nsec >= 0 && tv_nsec < NSEC_PER_SEC);
         // SAFETY: The assert above checks tv_nsec is within the valid range
         Timespec { t: timespec { tv_sec: tv_sec, tv_nsec: tv_nsec } }
     }
@@ -32,7 +34,7 @@ impl Timespec {
             } else {
                 Duration::new(
                     (self.t.tv_sec - 1 - other.t.tv_sec) as u64,
-                    self.t.tv_nsec as u32 + (NSEC_PER_SEC as u32) - other.t.tv_nsec as u32,
+                    (self.t.tv_nsec + NSEC_PER_SEC - other.t.tv_nsec) as u32,
                 )
             })
         } else {
@@ -48,9 +50,9 @@ impl Timespec {
 
         // Nano calculations can't overflow because nanos are <1B which fit
         // in a u32.
-        let mut nsec = other.subsec_nanos() + self.t.tv_nsec as u32;
-        if nsec >= NSEC_PER_SEC as u32 {
-            nsec -= NSEC_PER_SEC as u32;
+        let mut nsec = other.subsec_nanos() + u32::try_from(self.t.tv_nsec).unwrap();
+        if nsec >= NSEC_PER_SEC.try_into().unwrap() {
+            nsec -= u32::try_from(NSEC_PER_SEC).unwrap();
             secs = secs.checked_add(1)?;
         }
         Some(Timespec { t: timespec { tv_sec: secs, tv_nsec: nsec as _ } })
@@ -200,7 +202,7 @@ pub struct SystemTime(Timespec);
 pub const UNIX_EPOCH: SystemTime = SystemTime(Timespec::zero());
 
 impl SystemTime {
-    pub fn new(tv_sec: i64, tv_nsec: i64) -> SystemTime {
+    pub fn new(tv_sec: i64, tv_nsec: i32) -> SystemTime {
         SystemTime(Timespec::new(tv_sec, tv_nsec))
     }
 
diff --git a/std/src/sys/pal/itron/thread.rs b/std/src/sys/pal/itron/thread.rs
index 205226ce1da80..fd7b5558f7566 100644
--- a/std/src/sys/pal/itron/thread.rs
+++ b/std/src/sys/pal/itron/thread.rs
@@ -1,5 +1,6 @@
 //! Thread implementation backed by μITRON tasks. Assumes `acre_tsk` and
 //! `exd_tsk` are available.
+
 use super::{
     abi,
     error::{expect_success, expect_success_aborting, ItronError},
@@ -14,7 +15,6 @@ use crate::{
     num::NonZero,
     ptr::NonNull,
     sync::atomic::{AtomicUsize, Ordering},
-    sys::thread_local_dtor::run_dtors,
     time::Duration,
 };
 
@@ -116,7 +116,7 @@ impl Thread {
 
             // Run TLS destructors now because they are not
             // called automatically for terminated tasks.
-            unsafe { run_dtors() };
+            unsafe { crate::sys::thread_local::destructors::run() };
 
             let old_lifecycle = inner
                 .lifecycle
diff --git a/std/src/sys/pal/mod.rs b/std/src/sys/pal/mod.rs
index 8c75ac652998b..df0176244489a 100644
--- a/std/src/sys/pal/mod.rs
+++ b/std/src/sys/pal/mod.rs
@@ -94,36 +94,5 @@ cfg_if::cfg_if! {
     }
 }
 
-// Solaris/Illumos requires a wrapper around log, log2, and log10 functions
-// because of their non-standard behavior (e.g., log(-n) returns -Inf instead
-// of expected NaN).
-#[cfg(not(test))]
-#[cfg(any(target_os = "solaris", target_os = "illumos"))]
-#[inline]
-pub fn log_wrapper<F: Fn(f64) -> f64>(n: f64, log_fn: F) -> f64 {
-    if n.is_finite() {
-        if n > 0.0 {
-            log_fn(n)
-        } else if n == 0.0 {
-            f64::NEG_INFINITY // log(0) = -Inf
-        } else {
-            f64::NAN // log(-n) = NaN
-        }
-    } else if n.is_nan() {
-        n // log(NaN) = NaN
-    } else if n > 0.0 {
-        n // log(Inf) = Inf
-    } else {
-        f64::NAN // log(-Inf) = NaN
-    }
-}
-
-#[cfg(not(test))]
-#[cfg(not(any(target_os = "solaris", target_os = "illumos")))]
-#[inline]
-pub fn log_wrapper<F: Fn(f64) -> f64>(n: f64, log_fn: F) -> f64 {
-    log_fn(n)
-}
-
 #[cfg(not(target_os = "uefi"))]
 pub type RawOsError = i32;
diff --git a/std/src/sys/pal/sgx/mod.rs b/std/src/sys/pal/sgx/mod.rs
index d30976ec15149..851ab9b9f9767 100644
--- a/std/src/sys/pal/sgx/mod.rs
+++ b/std/src/sys/pal/sgx/mod.rs
@@ -26,7 +26,6 @@ pub mod pipe;
 pub mod process;
 pub mod stdio;
 pub mod thread;
-pub mod thread_local_key;
 pub mod thread_parking;
 pub mod time;
 pub mod waitqueue;
diff --git a/std/src/sys/pal/sgx/os.rs b/std/src/sys/pal/sgx/os.rs
index 86f4c7d3d56d6..c021300d4ae33 100644
--- a/std/src/sys/pal/sgx/os.rs
+++ b/std/src/sys/pal/sgx/os.rs
@@ -157,13 +157,13 @@ pub fn getenv(k: &OsStr) -> Option<OsString> {
     get_env_store().and_then(|s| s.lock().unwrap().get(k).cloned())
 }
 
-pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
     let (k, v) = (k.to_owned(), v.to_owned());
     create_env_store().lock().unwrap().insert(k, v);
     Ok(())
 }
 
-pub fn unsetenv(k: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(k: &OsStr) -> io::Result<()> {
     if let Some(env) = get_env_store() {
         env.lock().unwrap().remove(k);
     }
diff --git a/std/src/sys/pal/sgx/thread.rs b/std/src/sys/pal/sgx/thread.rs
index 7d271e6d2b65d..446cdd18b7e42 100644
--- a/std/src/sys/pal/sgx/thread.rs
+++ b/std/src/sys/pal/sgx/thread.rs
@@ -15,7 +15,7 @@ pub use self::task_queue::JoinNotifier;
 
 mod task_queue {
     use super::wait_notify;
-    use crate::sync::{Mutex, MutexGuard, Once};
+    use crate::sync::{Mutex, MutexGuard};
 
     pub type JoinHandle = wait_notify::Waiter;
 
@@ -28,12 +28,12 @@ mod task_queue {
     }
 
     pub(super) struct Task {
-        p: Box<dyn FnOnce()>,
+        p: Box<dyn FnOnce() + Send>,
         done: JoinNotifier,
     }
 
     impl Task {
-        pub(super) fn new(p: Box<dyn FnOnce()>) -> (Task, JoinHandle) {
+        pub(super) fn new(p: Box<dyn FnOnce() + Send>) -> (Task, JoinHandle) {
             let (done, recv) = wait_notify::new();
             let done = JoinNotifier(Some(done));
             (Task { p, done }, recv)
@@ -45,18 +45,12 @@ mod task_queue {
         }
     }
 
-    #[cfg_attr(test, linkage = "available_externally")]
-    #[export_name = "_ZN16__rust_internals3std3sys3sgx6thread15TASK_QUEUE_INITE"]
-    static TASK_QUEUE_INIT: Once = Once::new();
     #[cfg_attr(test, linkage = "available_externally")]
     #[export_name = "_ZN16__rust_internals3std3sys3sgx6thread10TASK_QUEUEE"]
-    static mut TASK_QUEUE: Option<Mutex<Vec<Task>>> = None;
+    static TASK_QUEUE: Mutex<Vec<Task>> = Mutex::new(Vec::new());
 
     pub(super) fn lock() -> MutexGuard<'static, Vec<Task>> {
-        unsafe {
-            TASK_QUEUE_INIT.call_once(|| TASK_QUEUE = Some(Default::default()));
-            TASK_QUEUE.as_ref().unwrap().lock().unwrap()
-        }
+        TASK_QUEUE.lock().unwrap()
     }
 }
 
@@ -101,7 +95,7 @@ pub mod wait_notify {
 
 impl Thread {
     // unsafe: see thread::Builder::spawn_unchecked for safety requirements
-    pub unsafe fn new(_stack: usize, p: Box<dyn FnOnce()>) -> io::Result<Thread> {
+    pub unsafe fn new(_stack: usize, p: Box<dyn FnOnce() + Send>) -> io::Result<Thread> {
         let mut queue_lock = task_queue::lock();
         unsafe { usercalls::launch_thread()? };
         let (task, handle) = task_queue::Task::new(p);
diff --git a/std/src/sys/pal/solid/abi/fs.rs b/std/src/sys/pal/solid/abi/fs.rs
index 49526f4c9cd4d..75efaaac2a948 100644
--- a/std/src/sys/pal/solid/abi/fs.rs
+++ b/std/src/sys/pal/solid/abi/fs.rs
@@ -1,4 +1,5 @@
 //! `solid_fs.h`
+
 use crate::os::raw::{c_char, c_int, c_uchar};
 pub use libc::{
     ino_t, off_t, stat, time_t, O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY,
diff --git a/std/src/sys/pal/solid/fs.rs b/std/src/sys/pal/solid/fs.rs
index a6c1336109ad7..dc83e4f4b4999 100644
--- a/std/src/sys/pal/solid/fs.rs
+++ b/std/src/sys/pal/solid/fs.rs
@@ -12,7 +12,7 @@ use crate::{
     sys::unsupported,
 };
 
-pub use crate::sys_common::fs::try_exists;
+pub use crate::sys_common::fs::exists;
 
 /// A file descriptor.
 #[derive(Clone, Copy)]
diff --git a/std/src/sys/pal/solid/mod.rs b/std/src/sys/pal/solid/mod.rs
index 3f6ff37903ac6..9a7741ddda71e 100644
--- a/std/src/sys/pal/solid/mod.rs
+++ b/std/src/sys/pal/solid/mod.rs
@@ -33,8 +33,6 @@ pub mod pipe;
 pub mod process;
 pub mod stdio;
 pub use self::itron::thread;
-pub mod thread_local_dtor;
-pub mod thread_local_key;
 pub use self::itron::thread_parking;
 pub mod time;
 
diff --git a/std/src/sys/pal/solid/os.rs b/std/src/sys/pal/solid/os.rs
index ef35d8788a236..ac90aae4ebe46 100644
--- a/std/src/sys/pal/solid/os.rs
+++ b/std/src/sys/pal/solid/os.rs
@@ -191,7 +191,7 @@ pub fn getenv(k: &OsStr) -> Option<OsString> {
     .flatten()
 }
 
-pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
     run_with_cstr(k.as_bytes(), &|k| {
         run_with_cstr(v.as_bytes(), &|v| {
             let _guard = ENV_LOCK.write();
@@ -200,7 +200,7 @@ pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
     })
 }
 
-pub fn unsetenv(n: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(n: &OsStr) -> io::Result<()> {
     run_with_cstr(n.as_bytes(), &|nbuf| {
         let _guard = ENV_LOCK.write();
         cvt_env(unsafe { libc::unsetenv(nbuf.as_ptr()) }).map(drop)
diff --git a/std/src/sys/pal/solid/thread_local_dtor.rs b/std/src/sys/pal/solid/thread_local_dtor.rs
deleted file mode 100644
index 26918a4fcb012..0000000000000
--- a/std/src/sys/pal/solid/thread_local_dtor.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-#![cfg(target_thread_local)]
-#![unstable(feature = "thread_local_internals", issue = "none")]
-
-// Simplify dtor registration by using a list of destructors.
-
-use super::{abi, itron::task};
-use crate::cell::{Cell, RefCell};
-
-#[thread_local]
-static REGISTERED: Cell<bool> = Cell::new(false);
-
-#[thread_local]
-static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
-
-pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    if !REGISTERED.get() {
-        let tid = task::current_task_id_aborting();
-        // Register `tls_dtor` to make sure the TLS destructors are called
-        // for tasks created by other means than `std::thread`
-        unsafe { abi::SOLID_TLS_AddDestructor(tid as i32, tls_dtor) };
-        REGISTERED.set(true);
-    }
-
-    match DTORS.try_borrow_mut() {
-        Ok(mut dtors) => dtors.push((t, dtor)),
-        Err(_) => rtabort!("global allocator may not use TLS"),
-    }
-}
-
-pub unsafe fn run_dtors() {
-    let mut list = DTORS.take();
-    while !list.is_empty() {
-        for (ptr, dtor) in list {
-            unsafe { dtor(ptr) };
-        }
-
-        list = DTORS.take();
-    }
-}
-
-unsafe extern "C" fn tls_dtor(_unused: *mut u8) {
-    unsafe { run_dtors() };
-}
diff --git a/std/src/sys/pal/solid/thread_local_key.rs b/std/src/sys/pal/solid/thread_local_key.rs
deleted file mode 100644
index b37bf99969887..0000000000000
--- a/std/src/sys/pal/solid/thread_local_key.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-pub type Key = usize;
-
-#[inline]
-pub unsafe fn create(_dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
-    panic!("should not be used on the solid target");
-}
-
-#[inline]
-pub unsafe fn set(_key: Key, _value: *mut u8) {
-    panic!("should not be used on the solid target");
-}
-
-#[inline]
-pub unsafe fn get(_key: Key) -> *mut u8 {
-    panic!("should not be used on the solid target");
-}
-
-#[inline]
-pub unsafe fn destroy(_key: Key) {
-    panic!("should not be used on the solid target");
-}
diff --git a/std/src/sys/pal/teeos/alloc.rs b/std/src/sys/pal/teeos/alloc.rs
index e236819aa2388..b280d1dd76f7a 100644
--- a/std/src/sys/pal/teeos/alloc.rs
+++ b/std/src/sys/pal/teeos/alloc.rs
@@ -11,9 +11,9 @@ unsafe impl GlobalAlloc for System {
         // Also see <https://github.com/rust-lang/rust/issues/45955> and
         // <https://github.com/rust-lang/rust/issues/62251#issuecomment-507580914>.
         if layout.align() <= MIN_ALIGN && layout.align() <= layout.size() {
-            libc::malloc(layout.size()) as *mut u8
+            unsafe { libc::malloc(layout.size()) as *mut u8 }
         } else {
-            aligned_malloc(&layout)
+            unsafe { aligned_malloc(&layout) }
         }
     }
 
@@ -21,11 +21,11 @@ unsafe impl GlobalAlloc for System {
     unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
         // See the comment above in `alloc` for why this check looks the way it does.
         if layout.align() <= MIN_ALIGN && layout.align() <= layout.size() {
-            libc::calloc(layout.size(), 1) as *mut u8
+            unsafe { libc::calloc(layout.size(), 1) as *mut u8 }
         } else {
-            let ptr = self.alloc(layout);
+            let ptr = unsafe { self.alloc(layout) };
             if !ptr.is_null() {
-                ptr::write_bytes(ptr, 0, layout.size());
+                unsafe { ptr::write_bytes(ptr, 0, layout.size()) };
             }
             ptr
         }
@@ -33,15 +33,15 @@ unsafe impl GlobalAlloc for System {
 
     #[inline]
     unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
-        libc::free(ptr as *mut libc::c_void)
+        unsafe { libc::free(ptr as *mut libc::c_void) }
     }
 
     #[inline]
     unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
         if layout.align() <= MIN_ALIGN && layout.align() <= new_size {
-            libc::realloc(ptr as *mut libc::c_void, new_size) as *mut u8
+            unsafe { libc::realloc(ptr as *mut libc::c_void, new_size) as *mut u8 }
         } else {
-            realloc_fallback(self, ptr, layout, new_size)
+            unsafe { realloc_fallback(self, ptr, layout, new_size) }
         }
     }
 }
@@ -52,6 +52,6 @@ unsafe fn aligned_malloc(layout: &Layout) -> *mut u8 {
     // posix_memalign requires that the alignment be a multiple of `sizeof(void*)`.
     // Since these are all powers of 2, we can just use max.
     let align = layout.align().max(crate::mem::size_of::<usize>());
-    let ret = libc::posix_memalign(&mut out, align, layout.size());
+    let ret = unsafe { libc::posix_memalign(&mut out, align, layout.size()) };
     if ret != 0 { ptr::null_mut() } else { out as *mut u8 }
 }
diff --git a/std/src/sys/pal/teeos/mod.rs b/std/src/sys/pal/teeos/mod.rs
index 6dd465a12ed49..adefd1bb42c8d 100644
--- a/std/src/sys/pal/teeos/mod.rs
+++ b/std/src/sys/pal/teeos/mod.rs
@@ -2,7 +2,7 @@
 //!
 //! This module contains the facade (aka platform-specific) implementations of
 //! OS level functionality for Teeos.
-#![allow(unsafe_op_in_unsafe_fn)]
+#![deny(unsafe_op_in_unsafe_fn)]
 #![allow(unused_variables)]
 #![allow(dead_code)]
 
@@ -27,9 +27,6 @@ pub mod process;
 mod rand;
 pub mod stdio;
 pub mod thread;
-pub mod thread_local_dtor;
-#[path = "../unix/thread_local_key.rs"]
-pub mod thread_local_key;
 #[allow(non_upper_case_globals)]
 #[path = "../unix/time.rs"]
 pub mod time;
diff --git a/std/src/sys/pal/teeos/os.rs b/std/src/sys/pal/teeos/os.rs
index e54a92f01f86b..3be0846a6dd4d 100644
--- a/std/src/sys/pal/teeos/os.rs
+++ b/std/src/sys/pal/teeos/os.rs
@@ -109,11 +109,11 @@ pub fn getenv(_: &OsStr) -> Option<OsString> {
     None
 }
 
-pub fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
     Err(io::Error::new(io::ErrorKind::Unsupported, "cannot set env vars on this platform"))
 }
 
-pub fn unsetenv(_: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(_: &OsStr) -> io::Result<()> {
     Err(io::Error::new(io::ErrorKind::Unsupported, "cannot unset env vars on this platform"))
 }
 
diff --git a/std/src/sys/pal/teeos/thread.rs b/std/src/sys/pal/teeos/thread.rs
index f4723b2ea46bf..7a27d749f1c9c 100644
--- a/std/src/sys/pal/teeos/thread.rs
+++ b/std/src/sys/pal/teeos/thread.rs
@@ -28,22 +28,24 @@ impl Thread {
     // unsafe: see thread::Builder::spawn_unchecked for safety requirements
     pub unsafe fn new(stack: usize, p: Box<dyn FnOnce()>) -> io::Result<Thread> {
         let p = Box::into_raw(Box::new(p));
-        let mut native: libc::pthread_t = mem::zeroed();
-        let mut attr: libc::pthread_attr_t = mem::zeroed();
-        assert_eq!(libc::pthread_attr_init(&mut attr), 0);
+        let mut native: libc::pthread_t = unsafe { mem::zeroed() };
+        let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() };
+        assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0);
         assert_eq!(
-            libc::pthread_attr_settee(
-                &mut attr,
-                libc::TEESMP_THREAD_ATTR_CA_INHERIT,
-                libc::TEESMP_THREAD_ATTR_TASK_ID_INHERIT,
-                libc::TEESMP_THREAD_ATTR_HAS_SHADOW,
-            ),
+            unsafe {
+                libc::pthread_attr_settee(
+                    &mut attr,
+                    libc::TEESMP_THREAD_ATTR_CA_INHERIT,
+                    libc::TEESMP_THREAD_ATTR_TASK_ID_INHERIT,
+                    libc::TEESMP_THREAD_ATTR_HAS_SHADOW,
+                )
+            },
             0,
         );
 
         let stack_size = cmp::max(stack, min_stack_size(&attr));
 
-        match libc::pthread_attr_setstacksize(&mut attr, stack_size) {
+        match unsafe { libc::pthread_attr_setstacksize(&mut attr, stack_size) } {
             0 => {}
             n => {
                 assert_eq!(n, libc::EINVAL);
@@ -54,7 +56,7 @@ impl Thread {
                 let page_size = os::page_size();
                 let stack_size =
                     (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
-                assert_eq!(libc::pthread_attr_setstacksize(&mut attr, stack_size), 0);
+                assert_eq!(unsafe { libc::pthread_attr_setstacksize(&mut attr, stack_size) }, 0);
             }
         };
 
@@ -62,12 +64,12 @@ impl Thread {
         // Note: if the thread creation fails and this assert fails, then p will
         // be leaked. However, an alternative design could cause double-free
         // which is clearly worse.
-        assert_eq!(libc::pthread_attr_destroy(&mut attr), 0);
+        assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0);
 
         return if ret != 0 {
             // The thread failed to start and as a result p was not consumed. Therefore, it is
             // safe to reconstruct the box so that it gets deallocated.
-            drop(Box::from_raw(p));
+            drop(unsafe { Box::from_raw(p) });
             Err(io::Error::from_raw_os_error(ret))
         } else {
             // The new thread will start running earliest after the next yield.
diff --git a/std/src/sys/pal/teeos/thread_local_dtor.rs b/std/src/sys/pal/teeos/thread_local_dtor.rs
deleted file mode 100644
index 5c6bc4d675011..0000000000000
--- a/std/src/sys/pal/teeos/thread_local_dtor.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    use crate::sys_common::thread_local_dtor::register_dtor_fallback;
-    register_dtor_fallback(t, dtor);
-}
diff --git a/std/src/sys/pal/uefi/mod.rs b/std/src/sys/pal/uefi/mod.rs
index 48b74df138439..4d50d9e8c3d9c 100644
--- a/std/src/sys/pal/uefi/mod.rs
+++ b/std/src/sys/pal/uefi/mod.rs
@@ -11,6 +11,7 @@
 //!
 //! [`OsStr`]: crate::ffi::OsStr
 //! [`OsString`]: crate::ffi::OsString
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 pub mod alloc;
 pub mod args;
@@ -28,8 +29,6 @@ pub mod pipe;
 pub mod process;
 pub mod stdio;
 pub mod thread;
-#[path = "../unsupported/thread_local_key.rs"]
-pub mod thread_local_key;
 pub mod time;
 
 mod helpers;
diff --git a/std/src/sys/pal/uefi/os.rs b/std/src/sys/pal/uefi/os.rs
index 58838c5876ebd..0b27977df2fde 100644
--- a/std/src/sys/pal/uefi/os.rs
+++ b/std/src/sys/pal/uefi/os.rs
@@ -203,11 +203,11 @@ pub fn getenv(_: &OsStr) -> Option<OsString> {
     None
 }
 
-pub fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot set env vars on this platform"))
 }
 
-pub fn unsetenv(_: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(_: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot unset env vars on this platform"))
 }
 
diff --git a/std/src/sys/pal/uefi/thread.rs b/std/src/sys/pal/uefi/thread.rs
index edc736978a123..7d4006ff4b2f7 100644
--- a/std/src/sys/pal/uefi/thread.rs
+++ b/std/src/sys/pal/uefi/thread.rs
@@ -7,7 +7,7 @@ use crate::time::Duration;
 
 pub struct Thread(!);
 
-pub const DEFAULT_MIN_STACK_SIZE: usize = 4096;
+pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024;
 
 impl Thread {
     // unsafe: see thread::Builder::spawn_unchecked for safety requirements
diff --git a/std/src/sys/pal/unix/alloc.rs b/std/src/sys/pal/unix/alloc.rs
index 2f908e3d0e956..eb3a57c212b4a 100644
--- a/std/src/sys/pal/unix/alloc.rs
+++ b/std/src/sys/pal/unix/alloc.rs
@@ -59,10 +59,9 @@ unsafe impl GlobalAlloc for System {
 }
 
 cfg_if::cfg_if! {
-    // We use posix_memalign wherever possible, but not all targets have that function.
+    // We use posix_memalign wherever possible, but some targets have very incomplete POSIX coverage
+    // so we need a fallback for those.
     if #[cfg(any(
-        target_os = "redox",
-        target_os = "espidf",
         target_os = "horizon",
         target_os = "vita",
     ))] {
@@ -74,12 +73,11 @@ cfg_if::cfg_if! {
         #[inline]
         unsafe fn aligned_malloc(layout: &Layout) -> *mut u8 {
             let mut out = ptr::null_mut();
-            // We prefer posix_memalign over aligned_malloc since with aligned_malloc,
-            // implementations are making almost arbitrary choices for which alignments are
-            // "supported", making it hard to use. For instance, some implementations require the
-            // size to be a multiple of the alignment (wasi emmalloc), while others require the
-            // alignment to be at least the pointer size (Illumos, macOS) -- which may or may not be
-            // standards-compliant, but that does not help us.
+            // We prefer posix_memalign over aligned_alloc since it is more widely available, and
+            // since with aligned_alloc, implementations are making almost arbitrary choices for
+            // which alignments are "supported", making it hard to use. For instance, some
+            // implementations require the size to be a multiple of the alignment (wasi emmalloc),
+            // while others require the alignment to be at least the pointer size (Illumos, macOS).
             // posix_memalign only has one, clear requirement: that the alignment be a multiple of
             // `sizeof(void*)`. Since these are all powers of 2, we can just use max.
             let align = layout.align().max(crate::mem::size_of::<usize>());
diff --git a/std/src/sys/pal/unix/args.rs b/std/src/sys/pal/unix/args.rs
index db2ec73148e38..e2ec838b740cb 100644
--- a/std/src/sys/pal/unix/args.rs
+++ b/std/src/sys/pal/unix/args.rs
@@ -183,7 +183,7 @@ mod imp {
 // Use `_NSGetArgc` and `_NSGetArgv` on Apple platforms.
 //
 // Even though these have underscores in their names, they've been available
-// since since the first versions of both macOS and iOS, and are declared in
+// since the first versions of both macOS and iOS, and are declared in
 // the header `crt_externs.h`.
 //
 // NOTE: This header was added to the iOS 13.0 SDK, which has been the source
diff --git a/std/src/sys/pal/unix/fs.rs b/std/src/sys/pal/unix/fs.rs
index fbbd40bfb796a..8308a48f16a9e 100644
--- a/std/src/sys/pal/unix/fs.rs
+++ b/std/src/sys/pal/unix/fs.rs
@@ -20,18 +20,14 @@ use crate::sys::time::SystemTime;
 use crate::sys::{cvt, cvt_r};
 use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
 
-#[cfg(any(all(target_os = "linux", target_env = "gnu"), target_vendor = "apple"))]
+#[cfg(all(target_os = "linux", target_env = "gnu"))]
 use crate::sys::weak::syscall;
-#[cfg(any(target_os = "android", target_os = "macos", target_os = "solaris"))]
+#[cfg(target_os = "android")]
 use crate::sys::weak::weak;
 
 use libc::{c_int, mode_t};
 
-#[cfg(any(
-    target_os = "solaris",
-    all(target_os = "linux", target_env = "gnu"),
-    target_vendor = "apple",
-))]
+#[cfg(all(target_os = "linux", target_env = "gnu"))]
 use libc::c_char;
 #[cfg(any(
     all(target_os = "linux", not(target_env = "musl")),
@@ -101,7 +97,7 @@ use libc::{
 ))]
 use libc::{dirent64, fstat64, ftruncate64, lseek64, lstat64, off64_t, open64, stat64};
 
-pub use crate::sys_common::fs::try_exists;
+pub use crate::sys_common::fs::exists;
 
 pub struct File(FileDesc);
 
@@ -1481,29 +1477,33 @@ impl FromRawFd for File {
 
 impl fmt::Debug for File {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        #[cfg(any(
-            target_os = "linux",
-            target_os = "netbsd",
-            target_os = "illumos",
-            target_os = "solaris"
-        ))]
+        #[cfg(any(target_os = "linux", target_os = "illumos", target_os = "solaris"))]
         fn get_path(fd: c_int) -> Option<PathBuf> {
             let mut p = PathBuf::from("/proc/self/fd");
             p.push(&fd.to_string());
             readlink(&p).ok()
         }
 
-        #[cfg(target_vendor = "apple")]
+        #[cfg(any(target_vendor = "apple", target_os = "netbsd"))]
         fn get_path(fd: c_int) -> Option<PathBuf> {
             // FIXME: The use of PATH_MAX is generally not encouraged, but it
-            // is inevitable in this case because Apple targets define `fcntl`
+            // is inevitable in this case because Apple targets and NetBSD define `fcntl`
             // with `F_GETPATH` in terms of `MAXPATHLEN`, and there are no
             // alternatives. If a better method is invented, it should be used
             // instead.
             let mut buf = vec![0; libc::PATH_MAX as usize];
             let n = unsafe { libc::fcntl(fd, libc::F_GETPATH, buf.as_ptr()) };
             if n == -1 {
-                return None;
+                cfg_if::cfg_if! {
+                    if #[cfg(target_os = "netbsd")] {
+                        // fallback to procfs as last resort
+                        let mut p = PathBuf::from("/proc/self/fd");
+                        p.push(&fd.to_string());
+                        return readlink(&p).ok();
+                    } else {
+                        return None;
+                    }
+                }
             }
             let l = buf.iter().position(|&c| c == 0).unwrap();
             buf.truncate(l as usize);
@@ -1557,6 +1557,8 @@ impl fmt::Debug for File {
             target_os = "netbsd",
             target_os = "openbsd",
             target_os = "vxworks",
+            target_os = "solaris",
+            target_os = "illumos",
             target_vendor = "apple",
         ))]
         fn get_mode(fd: c_int) -> Option<(bool, bool)> {
@@ -1579,6 +1581,8 @@ impl fmt::Debug for File {
             target_os = "netbsd",
             target_os = "openbsd",
             target_os = "vxworks",
+            target_os = "solaris",
+            target_os = "illumos",
             target_vendor = "apple",
         )))]
         fn get_mode(_fd: c_int) -> Option<(bool, bool)> {
@@ -1745,19 +1749,6 @@ pub fn link(original: &Path, link: &Path) -> io::Result<()> {
                     // Android has `linkat` on newer versions, but we happen to know `link`
                     // always has the correct behavior, so it's here as well.
                     cvt(unsafe { libc::link(original.as_ptr(), link.as_ptr()) })?;
-                } else if #[cfg(any(target_os = "macos", target_os = "solaris"))] {
-                    // MacOS (<=10.9) and Solaris 10 lack support for linkat while newer
-                    // versions have it. We want to use linkat if it is available, so we use weak!
-                    // to check. `linkat` is preferable to `link` because it gives us a flag to
-                    // specify how symlinks should be handled. We pass 0 as the flags argument,
-                    // meaning it shouldn't follow symlinks.
-                    weak!(fn linkat(c_int, *const c_char, c_int, *const c_char, c_int) -> c_int);
-
-                    if let Some(f) = linkat.get() {
-                        cvt(unsafe { f(libc::AT_FDCWD, original.as_ptr(), libc::AT_FDCWD, link.as_ptr(), 0) })?;
-                    } else {
-                        cvt(unsafe { libc::link(original.as_ptr(), link.as_ptr()) })?;
-                    };
                 } else {
                     // Where we can, use `linkat` instead of `link`; see the comment above
                     // this one for details on why.
@@ -1900,8 +1891,6 @@ pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
 
 #[cfg(target_vendor = "apple")]
 pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
-    use crate::sync::atomic::{AtomicBool, Ordering};
-
     const COPYFILE_ALL: libc::copyfile_flags_t = libc::COPYFILE_METADATA | libc::COPYFILE_DATA;
 
     struct FreeOnDrop(libc::copyfile_state_t);
@@ -1910,46 +1899,27 @@ pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
             // The code below ensures that `FreeOnDrop` is never a null pointer
             unsafe {
                 // `copyfile_state_free` returns -1 if the `to` or `from` files
-                // cannot be closed. However, this is not considered this an
-                // error.
+                // cannot be closed. However, this is not considered an error.
                 libc::copyfile_state_free(self.0);
             }
         }
     }
 
-    // MacOS prior to 10.12 don't support `fclonefileat`
-    // We store the availability in a global to avoid unnecessary syscalls
-    static HAS_FCLONEFILEAT: AtomicBool = AtomicBool::new(true);
-    syscall! {
-        // Mirrors `libc::fclonefileat`
-        fn fclonefileat(
-            srcfd: libc::c_int,
-            dst_dirfd: libc::c_int,
-            dst: *const c_char,
-            flags: libc::c_int
-        ) -> libc::c_int
-    }
-
     let (reader, reader_metadata) = open_from(from)?;
 
-    // Opportunistically attempt to create a copy-on-write clone of `from`
-    // using `fclonefileat`.
-    if HAS_FCLONEFILEAT.load(Ordering::Relaxed) {
-        let clonefile_result = run_path_with_cstr(to, &|to| {
-            cvt(unsafe { fclonefileat(reader.as_raw_fd(), libc::AT_FDCWD, to.as_ptr(), 0) })
-        });
-        match clonefile_result {
-            Ok(_) => return Ok(reader_metadata.len()),
-            Err(err) => match err.raw_os_error() {
-                // `fclonefileat` will fail on non-APFS volumes, if the
-                // destination already exists, or if the source and destination
-                // are on different devices. In all these cases `fcopyfile`
-                // should succeed.
-                Some(libc::ENOTSUP) | Some(libc::EEXIST) | Some(libc::EXDEV) => (),
-                Some(libc::ENOSYS) => HAS_FCLONEFILEAT.store(false, Ordering::Relaxed),
-                _ => return Err(err),
-            },
-        }
+    let clonefile_result = run_path_with_cstr(to, &|to| {
+        cvt(unsafe { libc::fclonefileat(reader.as_raw_fd(), libc::AT_FDCWD, to.as_ptr(), 0) })
+    });
+    match clonefile_result {
+        Ok(_) => return Ok(reader_metadata.len()),
+        Err(e) => match e.raw_os_error() {
+            // `fclonefileat` will fail on non-APFS volumes, if the
+            // destination already exists, or if the source and destination
+            // are on different devices. In all these cases `fcopyfile`
+            // should succeed.
+            Some(libc::ENOTSUP) | Some(libc::EEXIST) | Some(libc::EXDEV) => (),
+            _ => return Err(e),
+        },
     }
 
     // Fall back to using `fcopyfile` if `fclonefileat` does not succeed.
@@ -2006,13 +1976,14 @@ pub fn chroot(dir: &Path) -> io::Result<()> {
 
 pub use remove_dir_impl::remove_dir_all;
 
-// Fallback for REDOX, ESP-ID, Horizon, Vita and Miri
+// Fallback for REDOX, ESP-ID, Horizon, Vita, Vxworks and Miri
 #[cfg(any(
     target_os = "redox",
     target_os = "espidf",
     target_os = "horizon",
     target_os = "vita",
     target_os = "nto",
+    target_os = "vxworks",
     miri
 ))]
 mod remove_dir_impl {
@@ -2026,6 +1997,7 @@ mod remove_dir_impl {
     target_os = "horizon",
     target_os = "vita",
     target_os = "nto",
+    target_os = "vxworks",
     miri
 )))]
 mod remove_dir_impl {
@@ -2038,56 +2010,10 @@ mod remove_dir_impl {
     use crate::sys::common::small_c_string::run_path_with_cstr;
     use crate::sys::{cvt, cvt_r};
 
-    #[cfg(not(any(
-        all(target_os = "linux", target_env = "gnu"),
-        all(target_os = "macos", not(target_arch = "aarch64"))
-    )))]
+    #[cfg(not(all(target_os = "linux", target_env = "gnu")))]
     use libc::{fdopendir, openat, unlinkat};
     #[cfg(all(target_os = "linux", target_env = "gnu"))]
     use libc::{fdopendir, openat64 as openat, unlinkat};
-    #[cfg(all(target_os = "macos", not(target_arch = "aarch64")))]
-    use macos_weak::{fdopendir, openat, unlinkat};
-
-    #[cfg(all(target_os = "macos", not(target_arch = "aarch64")))]
-    mod macos_weak {
-        use crate::sys::weak::weak;
-        use libc::{c_char, c_int, DIR};
-
-        fn get_openat_fn() -> Option<unsafe extern "C" fn(c_int, *const c_char, c_int) -> c_int> {
-            weak!(fn openat(c_int, *const c_char, c_int) -> c_int);
-            openat.get()
-        }
-
-        pub fn has_openat() -> bool {
-            get_openat_fn().is_some()
-        }
-
-        pub unsafe fn openat(dirfd: c_int, pathname: *const c_char, flags: c_int) -> c_int {
-            get_openat_fn().map(|openat| openat(dirfd, pathname, flags)).unwrap_or_else(|| {
-                crate::sys::pal::unix::os::set_errno(libc::ENOSYS);
-                -1
-            })
-        }
-
-        pub unsafe fn fdopendir(fd: c_int) -> *mut DIR {
-            #[cfg(all(target_os = "macos", target_arch = "x86"))]
-            weak!(fn fdopendir(c_int) -> *mut DIR, "fdopendir$INODE64$UNIX2003");
-            #[cfg(all(target_os = "macos", target_arch = "x86_64"))]
-            weak!(fn fdopendir(c_int) -> *mut DIR, "fdopendir$INODE64");
-            fdopendir.get().map(|fdopendir| fdopendir(fd)).unwrap_or_else(|| {
-                crate::sys::pal::unix::os::set_errno(libc::ENOSYS);
-                crate::ptr::null_mut()
-            })
-        }
-
-        pub unsafe fn unlinkat(dirfd: c_int, pathname: *const c_char, flags: c_int) -> c_int {
-            weak!(fn unlinkat(c_int, *const c_char, c_int) -> c_int);
-            unlinkat.get().map(|unlinkat| unlinkat(dirfd, pathname, flags)).unwrap_or_else(|| {
-                crate::sys::pal::unix::os::set_errno(libc::ENOSYS);
-                -1
-            })
-        }
-    }
 
     pub fn openat_nofollow_dironly(parent_fd: Option<RawFd>, p: &CStr) -> io::Result<OwnedFd> {
         let fd = cvt_r(|| unsafe {
@@ -2200,19 +2126,7 @@ mod remove_dir_impl {
         }
     }
 
-    #[cfg(not(all(target_os = "macos", not(target_arch = "aarch64"))))]
     pub fn remove_dir_all(p: &Path) -> io::Result<()> {
         remove_dir_all_modern(p)
     }
-
-    #[cfg(all(target_os = "macos", not(target_arch = "aarch64")))]
-    pub fn remove_dir_all(p: &Path) -> io::Result<()> {
-        if macos_weak::has_openat() {
-            // openat() is available with macOS 10.10+, just like unlinkat() and fdopendir()
-            remove_dir_all_modern(p)
-        } else {
-            // fall back to classic implementation
-            crate::sys_common::fs::remove_dir_all(p)
-        }
-    }
 }
diff --git a/std/src/sys/pal/unix/futex.rs b/std/src/sys/pal/unix/futex.rs
index 26161a9af79d8..b8900da4cddb5 100644
--- a/std/src/sys/pal/unix/futex.rs
+++ b/std/src/sys/pal/unix/futex.rs
@@ -11,6 +11,11 @@
 use crate::sync::atomic::AtomicU32;
 use crate::time::Duration;
 
+/// An atomic for use as a futex that is at least 8-bits but may be larger.
+pub type SmallAtomic = AtomicU32;
+/// Must be the underlying type of SmallAtomic
+pub type SmallPrimitive = u32;
+
 /// Wait for a futex_wake operation to wake us.
 ///
 /// Returns directly if the futex doesn't hold the expected value.
diff --git a/std/src/sys/pal/unix/kernel_copy.rs b/std/src/sys/pal/unix/kernel_copy.rs
index 18acd5ecccd5c..cd38b7c07e2b1 100644
--- a/std/src/sys/pal/unix/kernel_copy.rs
+++ b/std/src/sys/pal/unix/kernel_copy.rs
@@ -560,6 +560,12 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
     // We store the availability in a global to avoid unnecessary syscalls
     static HAS_COPY_FILE_RANGE: AtomicU8 = AtomicU8::new(NOT_PROBED);
 
+    let mut have_probed = match HAS_COPY_FILE_RANGE.load(Ordering::Relaxed) {
+        NOT_PROBED => false,
+        UNAVAILABLE => return CopyResult::Fallback(0),
+        _ => true,
+    };
+
     syscall! {
         fn copy_file_range(
             fd_in: libc::c_int,
@@ -571,25 +577,22 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
         ) -> libc::ssize_t
     }
 
-    match HAS_COPY_FILE_RANGE.load(Ordering::Relaxed) {
-        NOT_PROBED => {
-            // EPERM can indicate seccomp filters or an immutable file.
-            // To distinguish these cases we probe with invalid file descriptors which should result in EBADF if the syscall is supported
-            // and some other error (ENOSYS or EPERM) if it's not available
-            let result = unsafe {
-                cvt(copy_file_range(INVALID_FD, ptr::null_mut(), INVALID_FD, ptr::null_mut(), 1, 0))
-            };
-
-            if matches!(result.map_err(|e| e.raw_os_error()), Err(Some(EBADF))) {
-                HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed);
-            } else {
-                HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed);
-                return CopyResult::Fallback(0);
-            }
+    fn probe_copy_file_range_support() -> u8 {
+        // In some cases, we cannot determine availability from the first
+        // `copy_file_range` call. In this case, we probe with an invalid file
+        // descriptor so that the results are easily interpretable.
+        match unsafe {
+            cvt(copy_file_range(INVALID_FD, ptr::null_mut(), INVALID_FD, ptr::null_mut(), 1, 0))
+                .map_err(|e| e.raw_os_error())
+        } {
+            Err(Some(EPERM | ENOSYS)) => UNAVAILABLE,
+            Err(Some(EBADF)) => AVAILABLE,
+            Ok(_) => panic!("unexpected copy_file_range probe success"),
+            // Treat other errors as the syscall
+            // being unavailable.
+            Err(_) => UNAVAILABLE,
         }
-        UNAVAILABLE => return CopyResult::Fallback(0),
-        _ => {}
-    };
+    }
 
     let mut written = 0u64;
     while written < max_len {
@@ -604,6 +607,11 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
             cvt(copy_file_range(reader, ptr::null_mut(), writer, ptr::null_mut(), bytes_to_copy, 0))
         };
 
+        if !have_probed && copy_result.is_ok() {
+            have_probed = true;
+            HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed);
+        }
+
         match copy_result {
             Ok(0) if written == 0 => {
                 // fallback to work around several kernel bugs where copy_file_range will fail to
@@ -619,7 +627,28 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
                 return match err.raw_os_error() {
                     // when file offset + max_length > u64::MAX
                     Some(EOVERFLOW) => CopyResult::Fallback(written),
-                    Some(ENOSYS | EXDEV | EINVAL | EPERM | EOPNOTSUPP | EBADF) if written == 0 => {
+                    Some(raw_os_error @ (ENOSYS | EXDEV | EINVAL | EPERM | EOPNOTSUPP | EBADF))
+                        if written == 0 =>
+                    {
+                        if !have_probed {
+                            let available = if matches!(raw_os_error, ENOSYS | EOPNOTSUPP | EPERM) {
+                                // EPERM can indicate seccomp filters or an
+                                // immutable file. To distinguish these
+                                // cases we probe with invalid file
+                                // descriptors which should result in EBADF
+                                // if the syscall is supported and EPERM or
+                                // ENOSYS if it's not available.
+                                //
+                                // For EOPNOTSUPP, see below. In the case of
+                                // ENOSYS, we try to cover for faulty FUSE
+                                // drivers.
+                                probe_copy_file_range_support()
+                            } else {
+                                AVAILABLE
+                            };
+                            HAS_COPY_FILE_RANGE.store(available, Ordering::Relaxed);
+                        }
+
                         // Try fallback io::copy if either:
                         // - Kernel version is < 4.5 (ENOSYS¹)
                         // - Files are mounted on different fs (EXDEV)
diff --git a/std/src/sys/pal/unix/linux/mod.rs b/std/src/sys/pal/unix/linux/mod.rs
new file mode 100644
index 0000000000000..88aa1e3deccf8
--- /dev/null
+++ b/std/src/sys/pal/unix/linux/mod.rs
@@ -0,0 +1 @@
+pub mod pidfd;
diff --git a/std/src/sys/pal/unix/linux/pidfd.rs b/std/src/sys/pal/unix/linux/pidfd.rs
new file mode 100644
index 0000000000000..7474f80e94f9d
--- /dev/null
+++ b/std/src/sys/pal/unix/linux/pidfd.rs
@@ -0,0 +1,76 @@
+use crate::io;
+use crate::os::fd::{AsRawFd, FromRawFd, RawFd};
+use crate::sys::cvt;
+use crate::sys::pal::unix::fd::FileDesc;
+use crate::sys::process::ExitStatus;
+use crate::sys_common::{AsInner, FromInner, IntoInner};
+
+#[cfg(test)]
+mod tests;
+
+#[derive(Debug)]
+pub(crate) struct PidFd(FileDesc);
+
+impl PidFd {
+    pub fn kill(&self) -> io::Result<()> {
+        return cvt(unsafe {
+            libc::syscall(
+                libc::SYS_pidfd_send_signal,
+                self.0.as_raw_fd(),
+                libc::SIGKILL,
+                crate::ptr::null::<()>(),
+                0,
+            )
+        })
+        .map(drop);
+    }
+
+    pub fn wait(&self) -> io::Result<ExitStatus> {
+        let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };
+        cvt(unsafe {
+            libc::waitid(libc::P_PIDFD, self.0.as_raw_fd() as u32, &mut siginfo, libc::WEXITED)
+        })?;
+        return Ok(ExitStatus::from_waitid_siginfo(siginfo));
+    }
+
+    pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> {
+        let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };
+
+        cvt(unsafe {
+            libc::waitid(
+                libc::P_PIDFD,
+                self.0.as_raw_fd() as u32,
+                &mut siginfo,
+                libc::WEXITED | libc::WNOHANG,
+            )
+        })?;
+        if unsafe { siginfo.si_pid() } == 0 {
+            return Ok(None);
+        }
+        return Ok(Some(ExitStatus::from_waitid_siginfo(siginfo)));
+    }
+}
+
+impl AsInner<FileDesc> for PidFd {
+    fn as_inner(&self) -> &FileDesc {
+        &self.0
+    }
+}
+
+impl IntoInner<FileDesc> for PidFd {
+    fn into_inner(self) -> FileDesc {
+        self.0
+    }
+}
+
+impl FromInner<FileDesc> for PidFd {
+    fn from_inner(inner: FileDesc) -> Self {
+        Self(inner)
+    }
+}
+
+impl FromRawFd for PidFd {
+    unsafe fn from_raw_fd(fd: RawFd) -> Self {
+        Self(FileDesc::from_raw_fd(fd))
+    }
+}
diff --git a/std/src/sys/pal/unix/linux/pidfd/tests.rs b/std/src/sys/pal/unix/linux/pidfd/tests.rs
new file mode 100644
index 0000000000000..fb928c76fbd04
--- /dev/null
+++ b/std/src/sys/pal/unix/linux/pidfd/tests.rs
@@ -0,0 +1,99 @@
+use crate::assert_matches::assert_matches;
+use crate::os::fd::{AsRawFd, RawFd};
+use crate::os::linux::process::{ChildExt, CommandExt as _};
+use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
+use crate::process::Command;
+
+#[test]
+fn test_command_pidfd() {
+    let pidfd_open_available = probe_pidfd_support();
+
+    // always exercise creation attempts
+    let mut child = Command::new("false").create_pidfd(true).spawn().unwrap();
+
+    // but only check if we know that the kernel supports pidfds.
+    // We don't assert the precise value, since the standard library
+    // might have opened other file descriptors before our code runs.
+    if pidfd_open_available {
+        assert!(child.pidfd().is_ok());
+    }
+    if let Ok(pidfd) = child.pidfd() {
+        let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
+        assert!(flags & libc::FD_CLOEXEC != 0);
+    }
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
+    let status = child.wait().expect("error waiting on pidfd");
+    assert_eq!(status.code(), Some(1));
+
+    let mut child = Command::new("sleep").arg("1000").create_pidfd(true).spawn().unwrap();
+    assert_matches!(child.try_wait(), Ok(None));
+    child.kill().expect("failed to kill child");
+    let status = child.wait().expect("error waiting on pidfd");
+    assert_eq!(status.signal(), Some(libc::SIGKILL));
+
+    let _ = Command::new("echo")
+        .create_pidfd(false)
+        .spawn()
+        .unwrap()
+        .pidfd()
+        .expect_err("pidfd should not have been created when create_pid(false) is set");
+
+    let _ = Command::new("echo")
+        .spawn()
+        .unwrap()
+        .pidfd()
+        .expect_err("pidfd should not have been created");
+
+    // exercise the fork/exec path since the earlier attempts may have used pidfd_spawnp()
+    let mut child =
+        unsafe { Command::new("false").pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
+
+    assert!(child.id() > 0 && child.id() < -1i32 as u32);
+
+    if pidfd_open_available {
+        assert!(child.pidfd().is_ok())
+    }
+    child.wait().expect("error waiting on child");
+}
+
+#[test]
+fn test_pidfd() {
+    if !probe_pidfd_support() {
+        return;
+    }
+
+    let child = Command::new("sleep")
+        .arg("1000")
+        .create_pidfd(true)
+        .spawn()
+        .expect("executing 'sleep' failed");
+
+    let fd = child.into_pidfd().unwrap();
+
+    assert_matches!(fd.try_wait(), Ok(None));
+    fd.kill().expect("kill failed");
+    fd.kill().expect("sending kill twice failed");
+    let status = fd.wait().expect("1st wait failed");
+    assert_eq!(status.signal(), Some(libc::SIGKILL));
+
+    // Trying to wait again for a reaped child is safe since there's no pid-recycling race.
+    // But doing so will return an error.
+    let res = fd.wait();
+    assert_matches!(res, Err(e) if e.raw_os_error() == Some(libc::ECHILD));
+
+    // Ditto for additional attempts to kill an already-dead child.
+    let res = fd.kill();
+    assert_matches!(res, Err(e) if e.raw_os_error() == Some(libc::ESRCH));
+}
+
+fn probe_pidfd_support() -> bool {
+    // pidfds require the pidfd_open syscall
+    let our_pid = crate::process::id();
+    let pidfd = unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) };
+    if pidfd >= 0 {
+        unsafe { libc::close(pidfd as RawFd) };
+        true
+    } else {
+        false
+    }
+}
diff --git a/std/src/sys/pal/unix/mod.rs b/std/src/sys/pal/unix/mod.rs
index 735ed96bc7b16..262f9c704a882 100644
--- a/std/src/sys/pal/unix/mod.rs
+++ b/std/src/sys/pal/unix/mod.rs
@@ -20,6 +20,8 @@ pub mod io;
 pub mod kernel_copy;
 #[cfg(target_os = "l4re")]
 mod l4re;
+#[cfg(target_os = "linux")]
+pub mod linux;
 #[cfg(not(target_os = "l4re"))]
 pub mod net;
 #[cfg(target_os = "l4re")]
@@ -31,8 +33,6 @@ pub mod rand;
 pub mod stack_overflow;
 pub mod stdio;
 pub mod thread;
-pub mod thread_local_dtor;
-pub mod thread_local_key;
 pub mod thread_parking;
 pub mod time;
 
@@ -305,10 +305,13 @@ macro_rules! impl_is_minus_one {
 
 impl_is_minus_one! { i8 i16 i32 i64 isize }
 
+/// Convert native return values to Result using the *-1 means error is in `errno`*  convention.
+/// Non-error values are `Ok`-wrapped.
 pub fn cvt<T: IsMinusOne>(t: T) -> crate::io::Result<T> {
     if t.is_minus_one() { Err(crate::io::Error::last_os_error()) } else { Ok(t) }
 }
 
+/// `-1` → look at `errno` → retry on `EINTR`. Otherwise `Ok()`-wrap the closure return value.
 pub fn cvt_r<T, F>(mut f: F) -> crate::io::Result<T>
 where
     T: IsMinusOne,
@@ -323,6 +326,7 @@ where
 }
 
 #[allow(dead_code)] // Not used on all platforms.
+/// Zero means `Ok()`, all other values are treated as raw OS errors. Does not look at `errno`.
 pub fn cvt_nz(error: libc::c_int) -> crate::io::Result<()> {
     if error == 0 { Ok(()) } else { Err(crate::io::Error::from_raw_os_error(error)) }
 }
diff --git a/std/src/sys/pal/unix/os.rs b/std/src/sys/pal/unix/os.rs
index 8afc49f52274c..9adc2b94e599e 100644
--- a/std/src/sys/pal/unix/os.rs
+++ b/std/src/sys/pal/unix/os.rs
@@ -462,21 +462,21 @@ pub fn current_exe() -> io::Result<PathBuf> {
 
 #[cfg(target_os = "haiku")]
 pub fn current_exe() -> io::Result<PathBuf> {
+    let mut name = vec![0; libc::PATH_MAX as usize];
     unsafe {
-        let mut info: mem::MaybeUninit<libc::image_info> = mem::MaybeUninit::uninit();
-        let mut cookie: i32 = 0;
-        // the executable can be found at team id 0
-        let result = libc::_get_next_image_info(
-            0,
-            &mut cookie,
-            info.as_mut_ptr(),
-            mem::size_of::<libc::image_info>(),
+        let result = libc::find_path(
+            crate::ptr::null_mut(),
+            libc::path_base_directory::B_FIND_PATH_IMAGE_PATH,
+            crate::ptr::null_mut(),
+            name.as_mut_ptr(),
+            name.len(),
         );
-        if result != 0 {
+        if result != libc::B_OK {
             use crate::io::ErrorKind;
             Err(io::const_io_error!(ErrorKind::Uncategorized, "Error getting executable path"))
         } else {
-            let name = CStr::from_ptr((*info.as_ptr()).name.as_ptr()).to_bytes();
+            // find_path adds the null terminator.
+            let name = CStr::from_ptr(name.as_ptr()).to_bytes();
             Ok(PathBuf::from(OsStr::from_bytes(name)))
         }
     }
@@ -675,19 +675,19 @@ pub fn getenv(k: &OsStr) -> Option<OsString> {
     .flatten()
 }
 
-pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
     run_with_cstr(k.as_bytes(), &|k| {
         run_with_cstr(v.as_bytes(), &|v| {
             let _guard = ENV_LOCK.write();
-            cvt(unsafe { libc::setenv(k.as_ptr(), v.as_ptr(), 1) }).map(drop)
+            cvt(libc::setenv(k.as_ptr(), v.as_ptr(), 1)).map(drop)
         })
     })
 }
 
-pub fn unsetenv(n: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(n: &OsStr) -> io::Result<()> {
     run_with_cstr(n.as_bytes(), &|nbuf| {
         let _guard = ENV_LOCK.write();
-        cvt(unsafe { libc::unsetenv(nbuf.as_ptr()) }).map(drop)
+        cvt(libc::unsetenv(nbuf.as_ptr())).map(drop)
     })
 }
 
@@ -738,17 +738,17 @@ pub fn home_dir() -> Option<PathBuf> {
             n => n as usize,
         };
         let mut buf = Vec::with_capacity(amt);
-        let mut passwd: libc::passwd = mem::zeroed();
+        let mut p = mem::MaybeUninit::<libc::passwd>::uninit();
         let mut result = ptr::null_mut();
         match libc::getpwuid_r(
             libc::getuid(),
-            &mut passwd,
+            p.as_mut_ptr(),
             buf.as_mut_ptr(),
             buf.capacity(),
             &mut result,
         ) {
             0 if !result.is_null() => {
-                let ptr = passwd.pw_dir as *const _;
+                let ptr = (*result).pw_dir as *const _;
                 let bytes = CStr::from_ptr(ptr).to_bytes().to_vec();
                 Some(OsStringExt::from_vec(bytes))
             }
@@ -758,6 +758,7 @@ pub fn home_dir() -> Option<PathBuf> {
 }
 
 pub fn exit(code: i32) -> ! {
+    crate::sys::exit_guard::unique_thread_exit();
     unsafe { libc::exit(code as c_int) }
 }
 
diff --git a/std/src/sys/pal/unix/process/process_unix.rs b/std/src/sys/pal/unix/process/process_unix.rs
index e2fca8c7e63dc..abd4a334783e4 100644
--- a/std/src/sys/pal/unix/process/process_unix.rs
+++ b/std/src/sys/pal/unix/process/process_unix.rs
@@ -7,9 +7,7 @@ use crate::sys::cvt;
 use crate::sys::process::process_common::*;
 
 #[cfg(target_os = "linux")]
-use crate::os::linux::process::PidFd;
-#[cfg(target_os = "linux")]
-use crate::os::unix::io::AsRawFd;
+use crate::sys::pal::unix::linux::pidfd::PidFd;
 
 #[cfg(target_os = "vxworks")]
 use libc::RTP_ID as pid_t;
@@ -451,17 +449,82 @@ impl Command {
         use crate::mem::MaybeUninit;
         use crate::sys::weak::weak;
         use crate::sys::{self, cvt_nz, on_broken_pipe_flag_used};
+        #[cfg(target_os = "linux")]
+        use core::sync::atomic::{AtomicU8, Ordering};
 
         if self.get_gid().is_some()
             || self.get_uid().is_some()
             || (self.env_saw_path() && !self.program_is_path())
             || !self.get_closures().is_empty()
             || self.get_groups().is_some()
-            || self.get_create_pidfd()
         {
             return Ok(None);
         }
 
+        cfg_if::cfg_if! {
+            if #[cfg(target_os = "linux")] {
+                weak! {
+                    fn pidfd_spawnp(
+                        *mut libc::c_int,
+                        *const libc::c_char,
+                        *const libc::posix_spawn_file_actions_t,
+                        *const libc::posix_spawnattr_t,
+                        *const *mut libc::c_char,
+                        *const *mut libc::c_char
+                    ) -> libc::c_int
+                }
+
+                weak! { fn pidfd_getpid(libc::c_int) -> libc::c_int }
+
+                static PIDFD_SUPPORTED: AtomicU8 = AtomicU8::new(0);
+                const UNKNOWN: u8 = 0;
+                const SPAWN: u8 = 1;
+                // Obtaining a pidfd via the fork+exec path might work
+                const FORK_EXEC: u8 = 2;
+                // Neither pidfd_spawn nor fork/exec will get us a pidfd.
+                // Instead we'll just posix_spawn if the other preconditions are met.
+                const NO: u8 = 3;
+
+                if self.get_create_pidfd() {
+                    let mut support = PIDFD_SUPPORTED.load(Ordering::Relaxed);
+                    if support == FORK_EXEC {
+                        return Ok(None);
+                    }
+                    if support == UNKNOWN {
+                        support = NO;
+                        let our_pid = crate::process::id();
+                        let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int);
+                        match pidfd {
+                            Ok(pidfd) => {
+                                support = FORK_EXEC;
+                                if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) {
+                                    if pidfd_spawnp.get().is_some() && pid as u32 == our_pid {
+                                        support = SPAWN
+                                    }
+                                }
+                                unsafe { libc::close(pidfd) };
+                            }
+                            Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
+                                // We're temporarily(?) out of file descriptors.  In this case obtaining a pidfd would also fail
+                                // Don't update the support flag so we can probe again later.
+                                return Err(e)
+                            }
+                            _ => {}
+                        }
+                        PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
+                        if support == FORK_EXEC {
+                            return Ok(None);
+                        }
+                    }
+                    core::assert_matches::debug_assert_matches!(support, SPAWN | NO);
+                }
+            } else {
+                if self.get_create_pidfd() {
+                    unreachable!("only implemented on linux")
+                }
+            }
+        }
+
         // Only glibc 2.24+ posix_spawn() supports returning ENOENT directly.
         #[cfg(all(target_os = "linux", target_env = "gnu"))]
         {
@@ -545,9 +608,6 @@ impl Command {
 
         let pgroup = self.get_pgroup();
 
-        // Safety: -1 indicates we don't have a pidfd.
-        let mut p = unsafe { Process::new(0, -1) };
-
         struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
 
         impl Drop for PosixSpawnFileActions<'_> {
@@ -642,6 +702,47 @@ impl Command {
             #[cfg(target_os = "nto")]
             let spawn_fn = retrying_libc_posix_spawnp;
 
+            #[cfg(target_os = "linux")]
+            if self.get_create_pidfd() && PIDFD_SUPPORTED.load(Ordering::Relaxed) == SPAWN {
+                let mut pidfd: libc::c_int = -1;
+                let spawn_res = pidfd_spawnp.get().unwrap()(
+                    &mut pidfd,
+                    self.get_program_cstr().as_ptr(),
+                    file_actions.0.as_ptr(),
+                    attrs.0.as_ptr(),
+                    self.get_argv().as_ptr() as *const _,
+                    envp as *const _,
+                );
+
+                let spawn_res = cvt_nz(spawn_res);
+                if let Err(ref e) = spawn_res
+                    && e.raw_os_error() == Some(libc::ENOSYS)
+                {
+                    PIDFD_SUPPORTED.store(FORK_EXEC, Ordering::Relaxed);
+                    return Ok(None);
+                }
+                spawn_res?;
+
+                let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) {
+                    Ok(pid) => pid,
+                    Err(e) => {
+                        // The child has been spawned and we are holding its pidfd.
+                        // But we cannot obtain its pid even though pidfd_getpid support was verified earlier.
+                        // This might happen if libc can't open procfs because the file descriptor limit has been reached.
+                        libc::close(pidfd);
+                        return Err(Error::new(
+                            e.kind(),
+                            "pidfd_spawnp succeeded but the child's PID could not be obtained",
+                        ));
+                    }
+                };
+
+                return Ok(Some(Process::new(pid, pidfd)));
+            }
+
+            // Safety: -1 indicates we don't have a pidfd.
+            let mut p = Process::new(0, -1);
+
             let spawn_res = spawn_fn(
                 &mut p.pid,
                 self.get_program_cstr().as_ptr(),
@@ -788,6 +889,12 @@ pub struct Process {
 
 impl Process {
     #[cfg(target_os = "linux")]
+    /// # Safety
+    ///
+    /// `pidfd` must either be -1 (representing no file descriptor) or a valid, exclusively owned file
+    /// descriptor (See [I/O Safety]).
+    ///
+    /// [I/O Safety]: crate::io#io-safety
     unsafe fn new(pid: pid_t, pidfd: pid_t) -> Self {
         use crate::os::unix::io::FromRawFd;
         use crate::sys_common::FromInner;
@@ -815,16 +922,7 @@ impl Process {
         #[cfg(target_os = "linux")]
         if let Some(pid_fd) = self.pidfd.as_ref() {
             // pidfd_send_signal predates pidfd_open. so if we were able to get an fd then sending signals will work too
-            return cvt(unsafe {
-                libc::syscall(
-                    libc::SYS_pidfd_send_signal,
-                    pid_fd.as_raw_fd(),
-                    libc::SIGKILL,
-                    crate::ptr::null::<()>(),
-                    0,
-                )
-            })
-            .map(drop);
+            return pid_fd.kill();
         }
         cvt(unsafe { libc::kill(self.pid, libc::SIGKILL) }).map(drop)
     }
@@ -836,12 +934,7 @@ impl Process {
         }
         #[cfg(target_os = "linux")]
         if let Some(pid_fd) = self.pidfd.as_ref() {
-            let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };
-
-            cvt_r(|| unsafe {
-                libc::waitid(libc::P_PIDFD, pid_fd.as_raw_fd() as u32, &mut siginfo, libc::WEXITED)
-            })?;
-            let status = ExitStatus::from_waitid_siginfo(siginfo);
+            let status = pid_fd.wait()?;
             self.status = Some(status);
             return Ok(status);
         }
@@ -857,22 +950,11 @@ impl Process {
         }
         #[cfg(target_os = "linux")]
         if let Some(pid_fd) = self.pidfd.as_ref() {
-            let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };
-
-            cvt(unsafe {
-                libc::waitid(
-                    libc::P_PIDFD,
-                    pid_fd.as_raw_fd() as u32,
-                    &mut siginfo,
-                    libc::WEXITED | libc::WNOHANG,
-                )
-            })?;
-            if unsafe { siginfo.si_pid() } == 0 {
-                return Ok(None);
+            let status = pid_fd.try_wait()?;
+            if let Some(status) = status {
+                self.status = Some(status)
             }
-            let status = ExitStatus::from_waitid_siginfo(siginfo);
-            self.status = Some(status);
-            return Ok(Some(status));
+            return Ok(status);
         }
         let mut status = 0 as c_int;
         let pid = cvt(unsafe { libc::waitpid(self.pid, &mut status, libc::WNOHANG) })?;
@@ -1053,6 +1135,10 @@ fn signal_string(signal: i32) -> &'static str {
         libc::SIGINFO => " (SIGINFO)",
         #[cfg(target_os = "hurd")]
         libc::SIGLOST => " (SIGLOST)",
+        #[cfg(target_os = "freebsd")]
+        libc::SIGTHR => " (SIGTHR)",
+        #[cfg(target_os = "freebsd")]
+        libc::SIGLIBRT => " (SIGLIBRT)",
         _ => "",
     }
 }
@@ -1101,20 +1187,33 @@ impl ExitStatusError {
 }
 
 #[cfg(target_os = "linux")]
-#[unstable(feature = "linux_pidfd", issue = "82971")]
-impl crate::os::linux::process::ChildExt for crate::process::Child {
-    fn pidfd(&self) -> io::Result<&PidFd> {
-        self.handle
-            .pidfd
-            .as_ref()
-            .ok_or_else(|| Error::new(ErrorKind::Uncategorized, "No pidfd was created."))
-    }
+mod linux_child_ext {
+
+    use crate::io;
+    use crate::mem;
+    use crate::os::linux::process as os;
+    use crate::sys::pal::unix::linux::pidfd as imp;
+    use crate::sys::pal::unix::ErrorKind;
+    use crate::sys_common::FromInner;
+
+    #[unstable(feature = "linux_pidfd", issue = "82971")]
+    impl crate::os::linux::process::ChildExt for crate::process::Child {
+        fn pidfd(&self) -> io::Result<&os::PidFd> {
+            self.handle
+                .pidfd
+                .as_ref()
+                // SAFETY: The os type is a transparent wrapper, therefore we can transmute references
+                .map(|fd| unsafe { mem::transmute::<&imp::PidFd, &os::PidFd>(fd) })
+                .ok_or_else(|| io::Error::new(ErrorKind::Uncategorized, "No pidfd was created."))
+        }
 
-    fn take_pidfd(&mut self) -> io::Result<PidFd> {
-        self.handle
-            .pidfd
-            .take()
-            .ok_or_else(|| Error::new(ErrorKind::Uncategorized, "No pidfd was created."))
+        fn into_pidfd(mut self) -> Result<os::PidFd, Self> {
+            self.handle
+                .pidfd
+                .take()
+                .map(|fd| <os::PidFd as FromInner<imp::PidFd>>::from_inner(fd))
+                .ok_or_else(|| self)
+        }
     }
 }
 
diff --git a/std/src/sys/pal/unix/process/process_unix/tests.rs b/std/src/sys/pal/unix/process/process_unix/tests.rs
index 0a6c6ec19fc7e..e5e1f956bc351 100644
--- a/std/src/sys/pal/unix/process/process_unix/tests.rs
+++ b/std/src/sys/pal/unix/process/process_unix/tests.rs
@@ -60,57 +60,3 @@ fn test_command_fork_no_unwind() {
             || signal == libc::SIGSEGV
     );
 }
-
-#[test]
-#[cfg(target_os = "linux")] // pidfds are a linux-specific concept
-fn test_command_pidfd() {
-    use crate::assert_matches::assert_matches;
-    use crate::os::fd::{AsRawFd, RawFd};
-    use crate::os::linux::process::{ChildExt, CommandExt};
-    use crate::process::Command;
-
-    // pidfds require the pidfd_open syscall
-    let our_pid = crate::process::id();
-    let pidfd = unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) };
-    let pidfd_open_available = if pidfd >= 0 {
-        unsafe { libc::close(pidfd as RawFd) };
-        true
-    } else {
-        false
-    };
-
-    // always exercise creation attempts
-    let mut child = Command::new("false").create_pidfd(true).spawn().unwrap();
-
-    // but only check if we know that the kernel supports pidfds.
-    // We don't assert the precise value, since the standard library
-    // might have opened other file descriptors before our code runs.
-    if pidfd_open_available {
-        assert!(child.pidfd().is_ok());
-    }
-    if let Ok(pidfd) = child.pidfd() {
-        let flags = super::cvt(unsafe { libc::fcntl(pidfd.as_raw_fd(), libc::F_GETFD) }).unwrap();
-        assert!(flags & libc::FD_CLOEXEC != 0);
-    }
-    let status = child.wait().expect("error waiting on pidfd");
-    assert_eq!(status.code(), Some(1));
-
-    let mut child = Command::new("sleep").arg("1000").create_pidfd(true).spawn().unwrap();
-    assert_matches!(child.try_wait(), Ok(None));
-    child.kill().expect("failed to kill child");
-    let status = child.wait().expect("error waiting on pidfd");
-    assert_eq!(status.signal(), Some(libc::SIGKILL));
-
-    let _ = Command::new("echo")
-        .create_pidfd(false)
-        .spawn()
-        .unwrap()
-        .pidfd()
-        .expect_err("pidfd should not have been created when create_pid(false) is set");
-
-    let _ = Command::new("echo")
-        .spawn()
-        .unwrap()
-        .pidfd()
-        .expect_err("pidfd should not have been created");
-}
diff --git a/std/src/sys/pal/unix/process/process_unsupported/wait_status.rs b/std/src/sys/pal/unix/process/process_unsupported/wait_status.rs
index e6dfadcf4a4cf..973188b1f2b27 100644
--- a/std/src/sys/pal/unix/process/process_unsupported/wait_status.rs
+++ b/std/src/sys/pal/unix/process/process_unsupported/wait_status.rs
@@ -1,6 +1,7 @@
 //! Emulated wait status for non-Unix #[cfg(unix) platforms
 //!
 //! Separate module to facilitate testing against a real Unix implementation.
+
 use crate::ffi::c_int;
 use crate::fmt;
 use crate::num::NonZero;
diff --git a/std/src/sys/pal/unix/process/process_vxworks.rs b/std/src/sys/pal/unix/process/process_vxworks.rs
index 76179e0910d9e..5007dbd34b4ab 100644
--- a/std/src/sys/pal/unix/process/process_vxworks.rs
+++ b/std/src/sys/pal/unix/process/process_vxworks.rs
@@ -1,5 +1,5 @@
 use crate::fmt;
-use crate::io::{self, Error, ErrorKind};
+use crate::io::{self, ErrorKind};
 use crate::num::NonZero;
 use crate::sys;
 use crate::sys::cvt;
diff --git a/std/src/sys/pal/unix/stack_overflow.rs b/std/src/sys/pal/unix/stack_overflow.rs
index 26c49257ad00d..2e5bd85327a19 100644
--- a/std/src/sys/pal/unix/stack_overflow.rs
+++ b/std/src/sys/pal/unix/stack_overflow.rs
@@ -491,6 +491,14 @@ mod imp {
     }
 }
 
+// This is intentionally not enabled on iOS/tvOS/watchOS/visionOS, as it uses
+// several symbols that might lead to rejections from the App Store, namely
+// `sigaction`, `sigaltstack`, `sysctlbyname`, `mmap`, `munmap` and `mprotect`.
+//
+// This might be overly cautious, though it is also what Swift does (and they
+// usually have fewer qualms about forwards compatibility, since the runtime
+// is shipped with the OS):
+// <https://github.com/apple/swift/blob/swift-5.10-RELEASE/stdlib/public/runtime/CrashHandlerMacOS.cpp>
 #[cfg(not(any(
     target_os = "linux",
     target_os = "freebsd",
diff --git a/std/src/sys/pal/unix/thread.rs b/std/src/sys/pal/unix/thread.rs
index 853ef8736de24..619f4e4121e73 100644
--- a/std/src/sys/pal/unix/thread.rs
+++ b/std/src/sys/pal/unix/thread.rs
@@ -475,9 +475,10 @@ mod cgroups {
     //! * cgroup v2 in non-standard mountpoints
     //! * paths containing control characters or spaces, since those would be escaped in procfs
     //!   output and we don't unescape
+
     use crate::borrow::Cow;
     use crate::ffi::OsString;
-    use crate::fs::{try_exists, File};
+    use crate::fs::{exists, File};
     use crate::io::Read;
     use crate::io::{BufRead, BufReader};
     use crate::os::unix::ffi::OsStringExt;
@@ -555,7 +556,7 @@ mod cgroups {
         path.push("cgroup.controllers");
 
         // skip if we're not looking at cgroup2
-        if matches!(try_exists(&path), Err(_) | Ok(false)) {
+        if matches!(exists(&path), Err(_) | Ok(false)) {
             return usize::MAX;
         };
 
@@ -612,7 +613,7 @@ mod cgroups {
             path.push(&group_path);
 
             // skip if we guessed the mount incorrectly
-            if matches!(try_exists(&path), Err(_) | Ok(false)) {
+            if matches!(exists(&path), Err(_) | Ok(false)) {
                 continue;
             }
 
@@ -717,5 +718,14 @@ unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
 
 #[cfg(target_os = "netbsd")]
 unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
-    2048 // just a guess
+    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
+
+    *STACK.get_or_init(|| {
+        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
+        if stack < 0 {
+            stack = 2048; // just a guess
+        }
+
+        stack as usize
+    })
 }
diff --git a/std/src/sys/pal/unix/thread_local_dtor.rs b/std/src/sys/pal/unix/thread_local_dtor.rs
deleted file mode 100644
index 75db6e112ed35..0000000000000
--- a/std/src/sys/pal/unix/thread_local_dtor.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-#![cfg(target_thread_local)]
-#![unstable(feature = "thread_local_internals", issue = "none")]
-
-//! Provides thread-local destructors without an associated "key", which
-//! can be more efficient.
-
-// Since what appears to be glibc 2.18 this symbol has been shipped which
-// GCC and clang both use to invoke destructors in thread_local globals, so
-// let's do the same!
-//
-// Note, however, that we run on lots older linuxes, as well as cross
-// compiling from a newer linux to an older linux, so we also have a
-// fallback implementation to use as well.
-#[cfg(any(
-    target_os = "linux",
-    target_os = "android",
-    target_os = "fuchsia",
-    target_os = "redox",
-    target_os = "hurd",
-    target_os = "netbsd",
-    target_os = "dragonfly"
-))]
-// FIXME: The Rust compiler currently omits weakly function definitions (i.e.,
-// __cxa_thread_atexit_impl) and its metadata from LLVM IR.
-#[no_sanitize(cfi, kcfi)]
-pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    use crate::mem;
-    use crate::sys_common::thread_local_dtor::register_dtor_fallback;
-
-    /// This is necessary because the __cxa_thread_atexit_impl implementation
-    /// std links to by default may be a C or C++ implementation that was not
-    /// compiled using the Clang integer normalization option.
-    #[cfg(sanitizer_cfi_normalize_integers)]
-    use core::ffi::c_int;
-    #[cfg(not(sanitizer_cfi_normalize_integers))]
-    #[cfi_encoding = "i"]
-    #[repr(transparent)]
-    pub struct c_int(#[allow(dead_code)] pub libc::c_int);
-
-    extern "C" {
-        #[linkage = "extern_weak"]
-        static __dso_handle: *mut u8;
-        #[linkage = "extern_weak"]
-        static __cxa_thread_atexit_impl: Option<
-            extern "C" fn(
-                unsafe extern "C" fn(*mut libc::c_void),
-                *mut libc::c_void,
-                *mut libc::c_void,
-            ) -> c_int,
-        >;
-    }
-
-    if let Some(f) = __cxa_thread_atexit_impl {
-        unsafe {
-            f(
-                mem::transmute::<
-                    unsafe extern "C" fn(*mut u8),
-                    unsafe extern "C" fn(*mut libc::c_void),
-                >(dtor),
-                t.cast(),
-                core::ptr::addr_of!(__dso_handle) as *mut _,
-            );
-        }
-        return;
-    }
-    register_dtor_fallback(t, dtor);
-}
-
-// This implementation is very similar to register_dtor_fallback in
-// sys_common/thread_local.rs. The main difference is that we want to hook into
-// macOS's analog of the above linux function, _tlv_atexit. OSX will run the
-// registered dtors before any TLS slots get freed, and when the main thread
-// exits.
-//
-// Unfortunately, calling _tlv_atexit while tls dtors are running is UB. The
-// workaround below is to register, via _tlv_atexit, a custom DTOR list once per
-// thread. thread_local dtors are pushed to the DTOR list without calling
-// _tlv_atexit.
-#[cfg(target_vendor = "apple")]
-pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    use crate::cell::{Cell, RefCell};
-    use crate::ptr;
-
-    #[thread_local]
-    static REGISTERED: Cell<bool> = Cell::new(false);
-
-    #[thread_local]
-    static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
-
-    if !REGISTERED.get() {
-        _tlv_atexit(run_dtors, ptr::null_mut());
-        REGISTERED.set(true);
-    }
-
-    extern "C" {
-        fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8);
-    }
-
-    match DTORS.try_borrow_mut() {
-        Ok(mut dtors) => dtors.push((t, dtor)),
-        Err(_) => rtabort!("global allocator may not use TLS"),
-    }
-
-    unsafe extern "C" fn run_dtors(_: *mut u8) {
-        let mut list = DTORS.take();
-        while !list.is_empty() {
-            for (ptr, dtor) in list {
-                dtor(ptr);
-            }
-            list = DTORS.take();
-        }
-    }
-}
-
-#[cfg(any(
-    target_os = "vxworks",
-    target_os = "horizon",
-    target_os = "emscripten",
-    target_os = "aix",
-    target_os = "freebsd",
-))]
-#[cfg_attr(target_family = "wasm", allow(unused))] // might remain unused depending on target details (e.g. wasm32-unknown-emscripten)
-pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    use crate::sys_common::thread_local_dtor::register_dtor_fallback;
-    register_dtor_fallback(t, dtor);
-}
diff --git a/std/src/sys/pal/unix/thread_local_key.rs b/std/src/sys/pal/unix/thread_local_key.rs
deleted file mode 100644
index 2b2d079ee4d01..0000000000000
--- a/std/src/sys/pal/unix/thread_local_key.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-#![allow(dead_code)] // not used on all platforms
-
-use crate::mem;
-
-pub type Key = libc::pthread_key_t;
-
-#[inline]
-pub unsafe fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
-    let mut key = 0;
-    assert_eq!(libc::pthread_key_create(&mut key, mem::transmute(dtor)), 0);
-    key
-}
-
-#[inline]
-pub unsafe fn set(key: Key, value: *mut u8) {
-    let r = libc::pthread_setspecific(key, value as *mut _);
-    debug_assert_eq!(r, 0);
-}
-
-#[inline]
-pub unsafe fn get(key: Key) -> *mut u8 {
-    libc::pthread_getspecific(key) as *mut u8
-}
-
-#[inline]
-pub unsafe fn destroy(key: Key) {
-    let r = libc::pthread_key_delete(key);
-    debug_assert_eq!(r, 0);
-}
diff --git a/std/src/sys/pal/unsupported/fs.rs b/std/src/sys/pal/unsupported/fs.rs
index 6ac1b5d2bcfca..474c9fe97d18d 100644
--- a/std/src/sys/pal/unsupported/fs.rs
+++ b/std/src/sys/pal/unsupported/fs.rs
@@ -291,7 +291,7 @@ pub fn remove_dir_all(_path: &Path) -> io::Result<()> {
     unsupported()
 }
 
-pub fn try_exists(_path: &Path) -> io::Result<bool> {
+pub fn exists(_path: &Path) -> io::Result<bool> {
     unsupported()
 }
 
diff --git a/std/src/sys/pal/unsupported/mod.rs b/std/src/sys/pal/unsupported/mod.rs
index 01f5cfd429753..442e6042ad561 100644
--- a/std/src/sys/pal/unsupported/mod.rs
+++ b/std/src/sys/pal/unsupported/mod.rs
@@ -11,9 +11,6 @@ pub mod pipe;
 pub mod process;
 pub mod stdio;
 pub mod thread;
-#[cfg(target_thread_local)]
-pub mod thread_local_dtor;
-pub mod thread_local_key;
 pub mod time;
 
 mod common;
diff --git a/std/src/sys/pal/unsupported/os.rs b/std/src/sys/pal/unsupported/os.rs
index 248b34829f2ee..3be98898bbeb9 100644
--- a/std/src/sys/pal/unsupported/os.rs
+++ b/std/src/sys/pal/unsupported/os.rs
@@ -96,11 +96,11 @@ pub fn getenv(_: &OsStr) -> Option<OsString> {
     None
 }
 
-pub fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot set env vars on this platform"))
 }
 
-pub fn unsetenv(_: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(_: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot unset env vars on this platform"))
 }
 
diff --git a/std/src/sys/pal/unsupported/thread.rs b/std/src/sys/pal/unsupported/thread.rs
index ea939247199c2..89f8bad7026ee 100644
--- a/std/src/sys/pal/unsupported/thread.rs
+++ b/std/src/sys/pal/unsupported/thread.rs
@@ -6,7 +6,7 @@ use crate::time::Duration;
 
 pub struct Thread(!);
 
-pub const DEFAULT_MIN_STACK_SIZE: usize = 4096;
+pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024;
 
 impl Thread {
     // unsafe: see thread::Builder::spawn_unchecked for safety requirements
diff --git a/std/src/sys/pal/unsupported/thread_local_dtor.rs b/std/src/sys/pal/unsupported/thread_local_dtor.rs
deleted file mode 100644
index 84660ea588156..0000000000000
--- a/std/src/sys/pal/unsupported/thread_local_dtor.rs
+++ /dev/null
@@ -1,10 +0,0 @@
-#![unstable(feature = "thread_local_internals", issue = "none")]
-
-#[cfg_attr(target_family = "wasm", allow(unused))] // unused on wasm32-unknown-unknown
-pub unsafe fn register_dtor(_t: *mut u8, _dtor: unsafe extern "C" fn(*mut u8)) {
-    // FIXME: right now there is no concept of "thread exit", but this is likely
-    // going to show up at some point in the form of an exported symbol that the
-    // wasm runtime is going to be expected to call. For now we basically just
-    // ignore the arguments, but if such a function starts to exist it will
-    // likely look like the OSX implementation in `unix/fast_thread_local.rs`
-}
diff --git a/std/src/sys/pal/unsupported/thread_local_key.rs b/std/src/sys/pal/unsupported/thread_local_key.rs
deleted file mode 100644
index b6e5e4cd2e197..0000000000000
--- a/std/src/sys/pal/unsupported/thread_local_key.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-pub type Key = usize;
-
-#[inline]
-pub unsafe fn create(_dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
-    panic!("should not be used on this target");
-}
-
-#[inline]
-pub unsafe fn set(_key: Key, _value: *mut u8) {
-    panic!("should not be used on this target");
-}
-
-#[inline]
-pub unsafe fn get(_key: Key) -> *mut u8 {
-    panic!("should not be used on this target");
-}
-
-#[inline]
-pub unsafe fn destroy(_key: Key) {
-    panic!("should not be used on this target");
-}
diff --git a/std/src/sys/pal/wasi/fs.rs b/std/src/sys/pal/wasi/fs.rs
index 529b82e019893..c58e6a08b374e 100644
--- a/std/src/sys/pal/wasi/fs.rs
+++ b/std/src/sys/pal/wasi/fs.rs
@@ -17,7 +17,7 @@ use crate::sys::time::SystemTime;
 use crate::sys::unsupported;
 use crate::sys_common::{AsInner, FromInner, IntoInner};
 
-pub use crate::sys_common::fs::try_exists;
+pub use crate::sys_common::fs::exists;
 
 pub struct File {
     fd: WasiFd,
diff --git a/std/src/sys/pal/wasi/mod.rs b/std/src/sys/pal/wasi/mod.rs
index c1266619b36ab..d8fe06d1973c9 100644
--- a/std/src/sys/pal/wasi/mod.rs
+++ b/std/src/sys/pal/wasi/mod.rs
@@ -33,22 +33,21 @@ pub mod pipe;
 pub mod process;
 pub mod stdio;
 pub mod thread;
-#[path = "../unsupported/thread_local_dtor.rs"]
-pub mod thread_local_dtor;
-#[path = "../unsupported/thread_local_key.rs"]
-pub mod thread_local_key;
 pub mod time;
 
 #[path = "../unsupported/common.rs"]
 #[deny(unsafe_op_in_unsafe_fn)]
 #[allow(unused)]
 mod common;
+
 pub use common::*;
 
 mod helpers;
-// These exports are listed individually to work around Rust's glob import
-// conflict rules. If we glob export `helpers` and `common` together, then
-// the compiler complains about conflicts.
+
+// The following exports are listed individually to work around Rust's glob
+// import conflict rules. If we glob export `helpers` and `common` together,
+// then the compiler complains about conflicts.
+
 pub use helpers::abort_internal;
 pub use helpers::decode_error_kind;
 use helpers::err2io;
diff --git a/std/src/sys/pal/wasi/os.rs b/std/src/sys/pal/wasi/os.rs
index ee377b6ef791d..e96296997e6a9 100644
--- a/std/src/sys/pal/wasi/os.rs
+++ b/std/src/sys/pal/wasi/os.rs
@@ -244,7 +244,7 @@ pub fn getenv(k: &OsStr) -> Option<OsString> {
     .flatten()
 }
 
-pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
     run_with_cstr(k.as_bytes(), &|k| {
         run_with_cstr(v.as_bytes(), &|v| unsafe {
             let _guard = env_write_lock();
@@ -253,7 +253,7 @@ pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
     })
 }
 
-pub fn unsetenv(n: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(n: &OsStr) -> io::Result<()> {
     run_with_cstr(n.as_bytes(), &|nbuf| unsafe {
         let _guard = env_write_lock();
         cvt(libc::unsetenv(nbuf.as_ptr())).map(drop)
diff --git a/std/src/sys/pal/wasi/thread.rs b/std/src/sys/pal/wasi/thread.rs
index d45fb28b67e63..975eef2451f4c 100644
--- a/std/src/sys/pal/wasi/thread.rs
+++ b/std/src/sys/pal/wasi/thread.rs
@@ -66,7 +66,7 @@ cfg_if::cfg_if! {
     }
 }
 
-pub const DEFAULT_MIN_STACK_SIZE: usize = 4096;
+pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024;
 
 impl Thread {
     // unsafe: see thread::Builder::spawn_unchecked for safety requirements
diff --git a/std/src/sys/pal/wasip2/mod.rs b/std/src/sys/pal/wasip2/mod.rs
index 6787ffb4bed8f..0930d2e22fa8d 100644
--- a/std/src/sys/pal/wasip2/mod.rs
+++ b/std/src/sys/pal/wasip2/mod.rs
@@ -34,10 +34,6 @@ pub mod process;
 pub mod stdio;
 #[path = "../wasi/thread.rs"]
 pub mod thread;
-#[path = "../unsupported/thread_local_dtor.rs"]
-pub mod thread_local_dtor;
-#[path = "../unsupported/thread_local_key.rs"]
-pub mod thread_local_key;
 #[path = "../wasi/time.rs"]
 pub mod time;
 
@@ -45,13 +41,16 @@ pub mod time;
 #[deny(unsafe_op_in_unsafe_fn)]
 #[allow(unused)]
 mod common;
+
 pub use common::*;
 
 #[path = "../wasi/helpers.rs"]
 mod helpers;
-// These exports are listed individually to work around Rust's glob import
-// conflict rules. If we glob export `helpers` and `common` together, then
-// the compiler complains about conflicts.
+
+// The following exports are listed individually to work around Rust's glob
+// import conflict rules. If we glob export `helpers` and `common` together,
+// then the compiler complains about conflicts.
+
 pub use helpers::abort_internal;
 pub use helpers::decode_error_kind;
 use helpers::err2io;
diff --git a/std/src/sys/pal/wasm/atomics/futex.rs b/std/src/sys/pal/wasm/atomics/futex.rs
index f4fbe9f48554b..3584138ca0447 100644
--- a/std/src/sys/pal/wasm/atomics/futex.rs
+++ b/std/src/sys/pal/wasm/atomics/futex.rs
@@ -1,7 +1,16 @@
-use crate::arch::wasm32;
+#[cfg(target_arch = "wasm32")]
+use core::arch::wasm32 as wasm;
+#[cfg(target_arch = "wasm64")]
+use core::arch::wasm64 as wasm;
+
 use crate::sync::atomic::AtomicU32;
 use crate::time::Duration;
 
+/// An atomic for use as a futex that is at least 8-bits but may be larger.
+pub type SmallAtomic = AtomicU32;
+/// Must be the underlying type of SmallAtomic
+pub type SmallPrimitive = u32;
+
 /// Wait for a futex_wake operation to wake us.
 ///
 /// Returns directly if the futex doesn't hold the expected value.
@@ -10,11 +19,8 @@ use crate::time::Duration;
 pub fn futex_wait(futex: &AtomicU32, expected: u32, timeout: Option<Duration>) -> bool {
     let timeout = timeout.and_then(|t| t.as_nanos().try_into().ok()).unwrap_or(-1);
     unsafe {
-        wasm32::memory_atomic_wait32(
-            futex as *const AtomicU32 as *mut i32,
-            expected as i32,
-            timeout,
-        ) < 2
+        wasm::memory_atomic_wait32(futex as *const AtomicU32 as *mut i32, expected as i32, timeout)
+            < 2
     }
 }
 
@@ -23,12 +29,12 @@ pub fn futex_wait(futex: &AtomicU32, expected: u32, timeout: Option<Duration>) -
 /// Returns true if this actually woke up such a thread,
 /// or false if no thread was waiting on this futex.
 pub fn futex_wake(futex: &AtomicU32) -> bool {
-    unsafe { wasm32::memory_atomic_notify(futex as *const AtomicU32 as *mut i32, 1) > 0 }
+    unsafe { wasm::memory_atomic_notify(futex as *const AtomicU32 as *mut i32, 1) > 0 }
 }
 
 /// Wake up all threads that are waiting on futex_wait on this futex.
 pub fn futex_wake_all(futex: &AtomicU32) {
     unsafe {
-        wasm32::memory_atomic_notify(futex as *const AtomicU32 as *mut i32, i32::MAX as u32);
+        wasm::memory_atomic_notify(futex as *const AtomicU32 as *mut i32, i32::MAX as u32);
     }
 }
diff --git a/std/src/sys/pal/wasm/atomics/thread.rs b/std/src/sys/pal/wasm/atomics/thread.rs
index 49f936f14498c..afdb159fe6f8b 100644
--- a/std/src/sys/pal/wasm/atomics/thread.rs
+++ b/std/src/sys/pal/wasm/atomics/thread.rs
@@ -6,7 +6,7 @@ use crate::time::Duration;
 
 pub struct Thread(!);
 
-pub const DEFAULT_MIN_STACK_SIZE: usize = 4096;
+pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024;
 
 impl Thread {
     // unsafe: see thread::Builder::spawn_unchecked for safety requirements
@@ -19,7 +19,11 @@ impl Thread {
     pub fn set_name(_name: &CStr) {}
 
     pub fn sleep(dur: Duration) {
-        use crate::arch::wasm32;
+        #[cfg(target_arch = "wasm32")]
+        use core::arch::wasm32 as wasm;
+        #[cfg(target_arch = "wasm64")]
+        use core::arch::wasm64 as wasm;
+
         use crate::cmp;
 
         // Use an atomic wait to block the current thread artificially with a
@@ -31,7 +35,7 @@ impl Thread {
         while nanos > 0 {
             let amt = cmp::min(i64::MAX as u128, nanos);
             let mut x = 0;
-            let val = unsafe { wasm32::memory_atomic_wait32(&mut x, 0, amt as i64) };
+            let val = unsafe { wasm::memory_atomic_wait32(&mut x, 0, amt as i64) };
             debug_assert_eq!(val, 2);
             nanos -= amt;
         }
diff --git a/std/src/sys/pal/wasm/mod.rs b/std/src/sys/pal/wasm/mod.rs
index 75dd10826cc04..4c34859e918bb 100644
--- a/std/src/sys/pal/wasm/mod.rs
+++ b/std/src/sys/pal/wasm/mod.rs
@@ -34,10 +34,6 @@ pub mod pipe;
 pub mod process;
 #[path = "../unsupported/stdio.rs"]
 pub mod stdio;
-#[path = "../unsupported/thread_local_dtor.rs"]
-pub mod thread_local_dtor;
-#[path = "../unsupported/thread_local_key.rs"]
-pub mod thread_local_key;
 #[path = "../unsupported/time.rs"]
 pub mod time;
 
diff --git a/std/src/sys/pal/windows/alloc.rs b/std/src/sys/pal/windows/alloc.rs
index 681d1a5efe932..987be6b69eec9 100644
--- a/std/src/sys/pal/windows/alloc.rs
+++ b/std/src/sys/pal/windows/alloc.rs
@@ -1,10 +1,8 @@
-#![deny(unsafe_op_in_unsafe_fn)]
-
 use crate::alloc::{GlobalAlloc, Layout, System};
 use crate::ffi::c_void;
 use crate::ptr;
 use crate::sync::atomic::{AtomicPtr, Ordering};
-use crate::sys::c;
+use crate::sys::c::{self, windows_targets};
 use crate::sys::common::alloc::{realloc_fallback, MIN_ALIGN};
 use core::mem::MaybeUninit;
 
@@ -15,76 +13,73 @@ mod tests;
 // See https://docs.microsoft.com/windows/win32/api/heapapi/
 
 // Flag to indicate that the memory returned by `HeapAlloc` should be zeroed.
-const HEAP_ZERO_MEMORY: c::DWORD = 0x00000008;
+const HEAP_ZERO_MEMORY: u32 = 0x00000008;
 
-#[link(name = "kernel32")]
-extern "system" {
-    // Get a handle to the default heap of the current process, or null if the operation fails.
-    //
-    // SAFETY: Successful calls to this function within the same process are assumed to
-    // always return the same handle, which remains valid for the entire lifetime of the process.
-    //
-    // See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-getprocessheap
-    fn GetProcessHeap() -> c::HANDLE;
+// Get a handle to the default heap of the current process, or null if the operation fails.
+//
+// SAFETY: Successful calls to this function within the same process are assumed to
+// always return the same handle, which remains valid for the entire lifetime of the process.
+//
+// See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-getprocessheap
+windows_targets::link!("kernel32.dll" "system" fn GetProcessHeap() -> c::HANDLE);
 
-    // Allocate a block of `dwBytes` bytes of memory from a given heap `hHeap`.
-    // The allocated memory may be uninitialized, or zeroed if `dwFlags` is
-    // set to `HEAP_ZERO_MEMORY`.
-    //
-    // Returns a pointer to the newly-allocated memory or null if the operation fails.
-    // The returned pointer will be aligned to at least `MIN_ALIGN`.
-    //
-    // SAFETY:
-    //  - `hHeap` must be a non-null handle returned by `GetProcessHeap`.
-    //  - `dwFlags` must be set to either zero or `HEAP_ZERO_MEMORY`.
-    //
-    // Note that `dwBytes` is allowed to be zero, contrary to some other allocators.
-    //
-    // See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-heapalloc
-    fn HeapAlloc(hHeap: c::HANDLE, dwFlags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID;
+// Allocate a block of `dwBytes` bytes of memory from a given heap `hHeap`.
+// The allocated memory may be uninitialized, or zeroed if `dwFlags` is
+// set to `HEAP_ZERO_MEMORY`.
+//
+// Returns a pointer to the newly-allocated memory or null if the operation fails.
+// The returned pointer will be aligned to at least `MIN_ALIGN`.
+//
+// SAFETY:
+//  - `hHeap` must be a non-null handle returned by `GetProcessHeap`.
+//  - `dwFlags` must be set to either zero or `HEAP_ZERO_MEMORY`.
+//
+// Note that `dwBytes` is allowed to be zero, contrary to some other allocators.
+//
+// See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-heapalloc
+windows_targets::link!("kernel32.dll" "system" fn HeapAlloc(hheap: c::HANDLE, dwflags: u32, dwbytes: usize) -> *mut core::ffi::c_void);
 
-    // Reallocate a block of memory behind a given pointer `lpMem` from a given heap `hHeap`,
-    // to a block of at least `dwBytes` bytes, either shrinking the block in place,
-    // or allocating at a new location, copying memory, and freeing the original location.
-    //
-    // Returns a pointer to the reallocated memory or null if the operation fails.
-    // The returned pointer will be aligned to at least `MIN_ALIGN`.
-    // If the operation fails the given block will never have been freed.
-    //
-    // SAFETY:
-    //  - `hHeap` must be a non-null handle returned by `GetProcessHeap`.
-    //  - `dwFlags` must be set to zero.
-    //  - `lpMem` must be a non-null pointer to an allocated block returned by `HeapAlloc` or
-    //     `HeapReAlloc`, that has not already been freed.
-    // If the block was successfully reallocated at a new location, pointers pointing to
-    // the freed memory, such as `lpMem`, must not be dereferenced ever again.
-    //
-    // Note that `dwBytes` is allowed to be zero, contrary to some other allocators.
-    //
-    // See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-heaprealloc
-    fn HeapReAlloc(
-        hHeap: c::HANDLE,
-        dwFlags: c::DWORD,
-        lpMem: c::LPVOID,
-        dwBytes: c::SIZE_T,
-    ) -> c::LPVOID;
+// Reallocate a block of memory behind a given pointer `lpMem` from a given heap `hHeap`,
+// to a block of at least `dwBytes` bytes, either shrinking the block in place,
+// or allocating at a new location, copying memory, and freeing the original location.
+//
+// Returns a pointer to the reallocated memory or null if the operation fails.
+// The returned pointer will be aligned to at least `MIN_ALIGN`.
+// If the operation fails the given block will never have been freed.
+//
+// SAFETY:
+//  - `hHeap` must be a non-null handle returned by `GetProcessHeap`.
+//  - `dwFlags` must be set to zero.
+//  - `lpMem` must be a non-null pointer to an allocated block returned by `HeapAlloc` or
+//     `HeapReAlloc`, that has not already been freed.
+// If the block was successfully reallocated at a new location, pointers pointing to
+// the freed memory, such as `lpMem`, must not be dereferenced ever again.
+//
+// Note that `dwBytes` is allowed to be zero, contrary to some other allocators.
+//
+// See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-heaprealloc
+windows_targets::link!("kernel32.dll" "system" fn HeapReAlloc(
+    hheap: c::HANDLE,
+    dwflags : u32,
+    lpmem: *const core::ffi::c_void,
+    dwbytes: usize
+) -> *mut core::ffi::c_void);
 
-    // Free a block of memory behind a given pointer `lpMem` from a given heap `hHeap`.
-    // Returns a nonzero value if the operation is successful, and zero if the operation fails.
-    //
-    // SAFETY:
-    //  - `hHeap` must be a non-null handle returned by `GetProcessHeap`.
-    //  - `dwFlags` must be set to zero.
-    //  - `lpMem` must be a pointer to an allocated block returned by `HeapAlloc` or `HeapReAlloc`,
-    //     that has not already been freed.
-    // If the block was successfully freed, pointers pointing to the freed memory, such as `lpMem`,
-    // must not be dereferenced ever again.
-    //
-    // Note that `lpMem` is allowed to be null, which will not cause the operation to fail.
-    //
-    // See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-heapfree
-    fn HeapFree(hHeap: c::HANDLE, dwFlags: c::DWORD, lpMem: c::LPVOID) -> c::BOOL;
-}
+// Free a block of memory behind a given pointer `lpMem` from a given heap `hHeap`.
+// Returns a nonzero value if the operation is successful, and zero if the operation fails.
+//
+// SAFETY:
+//  - `hHeap` must be a non-null handle returned by `GetProcessHeap`.
+//  - `dwFlags` must be set to zero.
+//  - `lpMem` must be a pointer to an allocated block returned by `HeapAlloc` or `HeapReAlloc`,
+//     that has not already been freed.
+// If the block was successfully freed, pointers pointing to the freed memory, such as `lpMem`,
+// must not be dereferenced ever again.
+//
+// Note that `lpMem` is allowed to be null, which will not cause the operation to fail.
+//
+// See https://docs.microsoft.com/windows/win32/api/heapapi/nf-heapapi-heapfree
+windows_targets::link!("kernel32.dll" "system" fn HeapFree(hheap: c::HANDLE, dwflags: u32, lpmem: *const core::ffi::c_void) -> c::BOOL);
 
 // Cached handle to the default heap of the current process.
 // Either a non-null handle returned by `GetProcessHeap`, or null when not yet initialized or `GetProcessHeap` failed.
@@ -116,9 +111,9 @@ fn init_or_get_process_heap() -> c::HANDLE {
 #[cold]
 extern "C" fn process_heap_init_and_alloc(
     _heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`
-    flags: c::DWORD,
-    dwBytes: c::SIZE_T,
-) -> c::LPVOID {
+    flags: u32,
+    dwBytes: usize,
+) -> *mut c_void {
     let heap = init_or_get_process_heap();
     if core::intrinsics::unlikely(heap.is_null()) {
         return ptr::null_mut();
@@ -130,9 +125,9 @@ extern "C" fn process_heap_init_and_alloc(
 #[inline(never)]
 fn process_heap_alloc(
     _heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`,
-    flags: c::DWORD,
-    dwBytes: c::SIZE_T,
-) -> c::LPVOID {
+    flags: u32,
+    dwBytes: usize,
+) -> *mut c_void {
     let heap = HEAP.load(Ordering::Relaxed);
     if core::intrinsics::likely(!heap.is_null()) {
         // SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`.
@@ -190,7 +185,7 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 {
         // it, it is safe to write a header directly before it.
         unsafe { ptr::write((aligned as *mut Header).sub(1), Header(ptr)) };
 
-        // SAFETY: The returned pointer does not point to the to the start of an allocated block,
+        // SAFETY: The returned pointer does not point to the start of an allocated block,
         // but there is a header readable directly before it containing the location of the start
         // of the block.
         aligned
@@ -243,7 +238,7 @@ unsafe impl GlobalAlloc for System {
 
         // SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`,
         // `block` is a pointer to the start of an allocated block.
-        unsafe { HeapFree(heap, 0, block as c::LPVOID) };
+        unsafe { HeapFree(heap, 0, block.cast::<c_void>()) };
     }
 
     #[inline]
@@ -256,7 +251,7 @@ unsafe impl GlobalAlloc for System {
             // SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`,
             // `ptr` is a pointer to the start of an allocated block.
             // The returned pointer points to the start of an allocated block.
-            unsafe { HeapReAlloc(heap, 0, ptr as c::LPVOID, new_size) as *mut u8 }
+            unsafe { HeapReAlloc(heap, 0, ptr.cast::<c_void>(), new_size).cast::<u8>() }
         } else {
             // SAFETY: `realloc_fallback` is implemented using `dealloc` and `alloc`, which will
             // correctly handle `ptr` and return a pointer satisfying the guarantees of `System`
diff --git a/std/src/sys/pal/windows/api.rs b/std/src/sys/pal/windows/api.rs
index 555ad581b8568..00c816a6c09b8 100644
--- a/std/src/sys/pal/windows/api.rs
+++ b/std/src/sys/pal/windows/api.rs
@@ -227,8 +227,10 @@ pub fn set_file_information_by_handle<T: SetFileInformation>(
         info: *const c_void,
         size: u32,
     ) -> Result<(), WinError> {
-        let result = c::SetFileInformationByHandle(handle, class, info, size);
-        (result != 0).then_some(()).ok_or_else(get_last_error)
+        unsafe {
+            let result = c::SetFileInformationByHandle(handle, class, info, size);
+            (result != 0).then_some(()).ok_or_else(get_last_error)
+        }
     }
     // SAFETY: The `SetFileInformation` trait ensures that this is safe.
     unsafe { set_info(handle, T::CLASS, info.as_ptr(), info.size()) }
@@ -251,3 +253,39 @@ pub fn get_last_error() -> WinError {
 pub struct WinError {
     pub code: u32,
 }
+impl WinError {
+    const fn new(code: u32) -> Self {
+        Self { code }
+    }
+}
+
+// Error code constants.
+// The constant names should be the same as the winapi constants except for the leading `ERROR_`.
+// Due to the sheer number of codes, error codes should only be added here on an as-needed basis.
+// However, they should never be removed as the assumption is they may be useful again in the future.
+#[allow(unused)]
+impl WinError {
+    /// Success is not an error.
+    /// Some Windows APIs do use this to distinguish between a zero return and an error return
+    /// but we should never return this to users as an error.
+    pub const SUCCESS: Self = Self::new(c::ERROR_SUCCESS);
+    // tidy-alphabetical-start
+    pub const ACCESS_DENIED: Self = Self::new(c::ERROR_ACCESS_DENIED);
+    pub const ALREADY_EXISTS: Self = Self::new(c::ERROR_ALREADY_EXISTS);
+    pub const CANT_ACCESS_FILE: Self = Self::new(c::ERROR_CANT_ACCESS_FILE);
+    pub const DELETE_PENDING: Self = Self::new(c::ERROR_DELETE_PENDING);
+    pub const DIRECTORY: Self = Self::new(c::ERROR_DIRECTORY);
+    pub const FILE_NOT_FOUND: Self = Self::new(c::ERROR_FILE_NOT_FOUND);
+    pub const INSUFFICIENT_BUFFER: Self = Self::new(c::ERROR_INSUFFICIENT_BUFFER);
+    pub const INVALID_FUNCTION: Self = Self::new(c::ERROR_INVALID_FUNCTION);
+    pub const INVALID_HANDLE: Self = Self::new(c::ERROR_INVALID_HANDLE);
+    pub const INVALID_PARAMETER: Self = Self::new(c::ERROR_INVALID_PARAMETER);
+    pub const NO_MORE_FILES: Self = Self::new(c::ERROR_NO_MORE_FILES);
+    pub const NOT_FOUND: Self = Self::new(c::ERROR_NOT_FOUND);
+    pub const NOT_SUPPORTED: Self = Self::new(c::ERROR_NOT_SUPPORTED);
+    pub const OPERATION_ABORTED: Self = Self::new(c::ERROR_OPERATION_ABORTED);
+    pub const PATH_NOT_FOUND: Self = Self::new(c::ERROR_PATH_NOT_FOUND);
+    pub const SHARING_VIOLATION: Self = Self::new(c::ERROR_SHARING_VIOLATION);
+    pub const TIMEOUT: Self = Self::new(c::ERROR_TIMEOUT);
+    // tidy-alphabetical-end
+}
diff --git a/std/src/sys/pal/windows/c.rs b/std/src/sys/pal/windows/c.rs
index 9d58ce05f018b..296d19a926d96 100644
--- a/std/src/sys/pal/windows/c.rs
+++ b/std/src/sys/pal/windows/c.rs
@@ -4,42 +4,23 @@
 #![cfg_attr(test, allow(dead_code))]
 #![unstable(issue = "none", feature = "windows_c")]
 #![allow(clippy::style)]
+#![allow(unsafe_op_in_unsafe_fn)]
 
 use crate::ffi::CStr;
 use crate::mem;
-use crate::num::NonZero;
-pub use crate::os::raw::c_int;
-use crate::os::raw::{c_char, c_long, c_longlong, c_uint, c_ulong, c_ushort, c_void};
+use crate::os::raw::{c_char, c_int, c_uint, c_ulong, c_ushort, c_void};
 use crate::os::windows::io::{AsRawHandle, BorrowedHandle};
 use crate::ptr;
 
+pub(super) mod windows_targets;
+
 mod windows_sys;
 pub use windows_sys::*;
 
-pub type DWORD = c_ulong;
-pub type NonZeroDWORD = NonZero<c_ulong>;
-pub type LARGE_INTEGER = c_longlong;
-#[cfg_attr(target_vendor = "uwp", allow(unused))]
-pub type LONG = c_long;
-pub type UINT = c_uint;
 pub type WCHAR = u16;
-pub type USHORT = c_ushort;
-pub type SIZE_T = usize;
-pub type CHAR = c_char;
-pub type ULONG = c_ulong;
-
-pub type LPCVOID = *const c_void;
-pub type LPOVERLAPPED = *mut OVERLAPPED;
-pub type LPSECURITY_ATTRIBUTES = *mut SECURITY_ATTRIBUTES;
-pub type LPVOID = *mut c_void;
-pub type LPWCH = *mut WCHAR;
-pub type LPWSTR = *mut WCHAR;
-
-#[cfg(target_vendor = "win7")]
-pub type PSRWLOCK = *mut SRWLOCK;
 
 pub type socklen_t = c_int;
-pub type ADDRESS_FAMILY = USHORT;
+pub type ADDRESS_FAMILY = c_ushort;
 pub use FD_SET as fd_set;
 pub use LINGER as linger;
 pub use TIMEVAL as timeval;
@@ -54,6 +35,7 @@ pub const EXIT_FAILURE: u32 = 1;
 pub const CONDITION_VARIABLE_INIT: CONDITION_VARIABLE = CONDITION_VARIABLE { Ptr: ptr::null_mut() };
 #[cfg(target_vendor = "win7")]
 pub const SRWLOCK_INIT: SRWLOCK = SRWLOCK { Ptr: ptr::null_mut() };
+#[cfg(not(target_thread_local))]
 pub const INIT_ONCE_STATIC_INIT: INIT_ONCE = INIT_ONCE { Ptr: ptr::null_mut() };
 
 // Some windows_sys types have different signs than the types we use.
@@ -148,25 +130,25 @@ pub struct MOUNT_POINT_REPARSE_BUFFER {
 #[repr(C)]
 pub struct SOCKADDR_STORAGE_LH {
     pub ss_family: ADDRESS_FAMILY,
-    pub __ss_pad1: [CHAR; 6],
+    pub __ss_pad1: [c_char; 6],
     pub __ss_align: i64,
-    pub __ss_pad2: [CHAR; 112],
+    pub __ss_pad2: [c_char; 112],
 }
 
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct sockaddr_in {
     pub sin_family: ADDRESS_FAMILY,
-    pub sin_port: USHORT,
+    pub sin_port: c_ushort,
     pub sin_addr: in_addr,
-    pub sin_zero: [CHAR; 8],
+    pub sin_zero: [c_char; 8],
 }
 
 #[repr(C)]
 #[derive(Copy, Clone)]
 pub struct sockaddr_in6 {
     pub sin6_family: ADDRESS_FAMILY,
-    pub sin6_port: USHORT,
+    pub sin6_port: c_ushort,
     pub sin6_flowinfo: c_ulong,
     pub sin6_addr: in6_addr,
     pub sin6_scope_id: c_ulong,
@@ -268,9 +250,9 @@ pub unsafe fn NtReadFile(
     apccontext: *mut c_void,
     iostatusblock: &mut IO_STATUS_BLOCK,
     buffer: *mut crate::mem::MaybeUninit<u8>,
-    length: ULONG,
-    byteoffset: Option<&LARGE_INTEGER>,
-    key: Option<&ULONG>,
+    length: u32,
+    byteoffset: Option<&i64>,
+    key: Option<&u32>,
 ) -> NTSTATUS {
     windows_sys::NtReadFile(
         filehandle.as_raw_handle(),
@@ -291,9 +273,9 @@ pub unsafe fn NtWriteFile(
     apccontext: *mut c_void,
     iostatusblock: &mut IO_STATUS_BLOCK,
     buffer: *const u8,
-    length: ULONG,
-    byteoffset: Option<&LARGE_INTEGER>,
-    key: Option<&ULONG>,
+    length: u32,
+    byteoffset: Option<&i64>,
+    key: Option<&u32>,
 ) -> NTSTATUS {
     windows_sys::NtWriteFile(
         filehandle.as_raw_handle(),
@@ -333,13 +315,13 @@ compat_fn_with_fallback! {
     // >= Win10 1607
     // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreaddescription
     pub fn SetThreadDescription(hthread: HANDLE, lpthreaddescription: PCWSTR) -> HRESULT {
-        SetLastError(ERROR_CALL_NOT_IMPLEMENTED as DWORD); E_NOTIMPL
+        SetLastError(ERROR_CALL_NOT_IMPLEMENTED as u32); E_NOTIMPL
     }
 
     // >= Win10 1607
     // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreaddescription
     pub fn GetThreadDescription(hthread: HANDLE, lpthreaddescription: *mut PWSTR) -> HRESULT {
-        SetLastError(ERROR_CALL_NOT_IMPLEMENTED as DWORD); E_NOTIMPL
+        SetLastError(ERROR_CALL_NOT_IMPLEMENTED as u32); E_NOTIMPL
     }
 
     // >= Win8 / Server 2012
@@ -400,27 +382,27 @@ compat_fn_with_fallback! {
     #[cfg(target_vendor = "win7")]
     pub fn NtCreateKeyedEvent(
         KeyedEventHandle: *mut HANDLE,
-        DesiredAccess: DWORD,
-        ObjectAttributes: LPVOID,
-        Flags: ULONG
+        DesiredAccess: u32,
+        ObjectAttributes: *mut c_void,
+        Flags: u32
     ) -> NTSTATUS {
         panic!("keyed events not available")
     }
     #[cfg(target_vendor = "win7")]
     pub fn NtReleaseKeyedEvent(
         EventHandle: HANDLE,
-        Key: LPVOID,
+        Key: *const c_void,
         Alertable: BOOLEAN,
-        Timeout: *mut c_longlong
+        Timeout: *mut i64
     ) -> NTSTATUS {
         panic!("keyed events not available")
     }
     #[cfg(target_vendor = "win7")]
     pub fn NtWaitForKeyedEvent(
         EventHandle: HANDLE,
-        Key: LPVOID,
+        Key: *const c_void,
         Alertable: BOOLEAN,
-        Timeout: *mut c_longlong
+        Timeout: *mut i64
     ) -> NTSTATUS {
         panic!("keyed events not available")
     }
@@ -450,9 +432,9 @@ compat_fn_with_fallback! {
         apccontext: *mut c_void,
         iostatusblock: &mut IO_STATUS_BLOCK,
         buffer: *mut crate::mem::MaybeUninit<u8>,
-        length: ULONG,
-        byteoffset: Option<&LARGE_INTEGER>,
-        key: Option<&ULONG>
+        length: u32,
+        byteoffset: Option<&i64>,
+        key: Option<&u32>
     ) -> NTSTATUS {
         STATUS_NOT_IMPLEMENTED
     }
@@ -464,9 +446,9 @@ compat_fn_with_fallback! {
         apccontext: *mut c_void,
         iostatusblock: &mut IO_STATUS_BLOCK,
         buffer: *const u8,
-        length: ULONG,
-        byteoffset: Option<&LARGE_INTEGER>,
-        key: Option<&ULONG>
+        length: u32,
+        byteoffset: Option<&i64>,
+        key: Option<&u32>
     ) -> NTSTATUS {
         STATUS_NOT_IMPLEMENTED
     }
@@ -503,11 +485,8 @@ if #[cfg(not(target_vendor = "uwp"))] {
     #[cfg(target_arch = "arm")]
     pub enum CONTEXT {}
 }}
-
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn WSAStartup(wversionrequested: u16, lpwsadata: *mut WSADATA) -> i32;
-}
+// WSAStartup is only redefined here so that we can override WSADATA for Arm32
+windows_targets::link!("ws2_32.dll" "system" fn WSAStartup(wversionrequested: u16, lpwsadata: *mut WSADATA) -> i32);
 #[cfg(target_arch = "arm")]
 #[repr(C)]
 pub struct WSADATA {
diff --git a/std/src/sys/pal/windows/c/README.md b/std/src/sys/pal/windows/c/README.md
index d458e55efbcdd..efefc5faba7a4 100644
--- a/std/src/sys/pal/windows/c/README.md
+++ b/std/src/sys/pal/windows/c/README.md
@@ -3,7 +3,7 @@ be edited manually.
 
 To add bindings, edit `bindings.txt` then regenerate using the following command:
 
-    ./x run generate-windows-sys && ./x fmt library/std
+    ./x run generate-windows-sys && ./x fmt
 
 If you need to override generated functions or types then add them to
 `library/std/src/sys/pal/windows/c.rs`.
diff --git a/std/src/sys/pal/windows/c/bindings.txt b/std/src/sys/pal/windows/c/bindings.txt
index 849e64ac59135..5ad4a3731d822 100644
--- a/std/src/sys/pal/windows/c/bindings.txt
+++ b/std/src/sys/pal/windows/c/bindings.txt
@@ -1,5 +1,5 @@
 --out windows_sys.rs
---config flatten std
+--config flatten sys
 --filter
 !Windows.Win32.Foundation.INVALID_HANDLE_VALUE
 Windows.Wdk.Storage.FileSystem.FILE_COMPLETE_IF_OPLOCKED
diff --git a/std/src/sys/pal/windows/c/windows_sys.rs b/std/src/sys/pal/windows/c/windows_sys.rs
index 1da8871ae44eb..fea00fec9ae59 100644
--- a/std/src/sys/pal/windows/c/windows_sys.rs
+++ b/std/src/sys/pal/windows/c/windows_sys.rs
@@ -1,846 +1,140 @@
-// Bindings generated by `windows-bindgen` 0.56.0
+// Bindings generated by `windows-bindgen` 0.58.0
 
 #![allow(non_snake_case, non_upper_case_globals, non_camel_case_types, dead_code, clippy::all)]
-#[link(name = "advapi32")]
-extern "system" {
-    pub fn OpenProcessToken(
-        processhandle: HANDLE,
-        desiredaccess: TOKEN_ACCESS_MASK,
-        tokenhandle: *mut HANDLE,
-    ) -> BOOL;
-}
-#[link(name = "advapi32")]
-extern "system" {
-    #[link_name = "SystemFunction036"]
-    pub fn RtlGenRandom(randombuffer: *mut core::ffi::c_void, randombufferlength: u32) -> BOOLEAN;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn AcquireSRWLockExclusive(srwlock: *mut SRWLOCK);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn AcquireSRWLockShared(srwlock: *mut SRWLOCK);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CancelIo(hfile: HANDLE) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CloseHandle(hobject: HANDLE) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CompareStringOrdinal(
-        lpstring1: PCWSTR,
-        cchcount1: i32,
-        lpstring2: PCWSTR,
-        cchcount2: i32,
-        bignorecase: BOOL,
-    ) -> COMPARESTRING_RESULT;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CopyFileExW(
-        lpexistingfilename: PCWSTR,
-        lpnewfilename: PCWSTR,
-        lpprogressroutine: LPPROGRESS_ROUTINE,
-        lpdata: *const core::ffi::c_void,
-        pbcancel: *mut BOOL,
-        dwcopyflags: u32,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateDirectoryW(
-        lppathname: PCWSTR,
-        lpsecurityattributes: *const SECURITY_ATTRIBUTES,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateEventW(
-        lpeventattributes: *const SECURITY_ATTRIBUTES,
-        bmanualreset: BOOL,
-        binitialstate: BOOL,
-        lpname: PCWSTR,
-    ) -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateFileW(
-        lpfilename: PCWSTR,
-        dwdesiredaccess: u32,
-        dwsharemode: FILE_SHARE_MODE,
-        lpsecurityattributes: *const SECURITY_ATTRIBUTES,
-        dwcreationdisposition: FILE_CREATION_DISPOSITION,
-        dwflagsandattributes: FILE_FLAGS_AND_ATTRIBUTES,
-        htemplatefile: HANDLE,
-    ) -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateHardLinkW(
-        lpfilename: PCWSTR,
-        lpexistingfilename: PCWSTR,
-        lpsecurityattributes: *const SECURITY_ATTRIBUTES,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateNamedPipeW(
-        lpname: PCWSTR,
-        dwopenmode: FILE_FLAGS_AND_ATTRIBUTES,
-        dwpipemode: NAMED_PIPE_MODE,
-        nmaxinstances: u32,
-        noutbuffersize: u32,
-        ninbuffersize: u32,
-        ndefaulttimeout: u32,
-        lpsecurityattributes: *const SECURITY_ATTRIBUTES,
-    ) -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateProcessW(
-        lpapplicationname: PCWSTR,
-        lpcommandline: PWSTR,
-        lpprocessattributes: *const SECURITY_ATTRIBUTES,
-        lpthreadattributes: *const SECURITY_ATTRIBUTES,
-        binherithandles: BOOL,
-        dwcreationflags: PROCESS_CREATION_FLAGS,
-        lpenvironment: *const core::ffi::c_void,
-        lpcurrentdirectory: PCWSTR,
-        lpstartupinfo: *const STARTUPINFOW,
-        lpprocessinformation: *mut PROCESS_INFORMATION,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateSymbolicLinkW(
-        lpsymlinkfilename: PCWSTR,
-        lptargetfilename: PCWSTR,
-        dwflags: SYMBOLIC_LINK_FLAGS,
-    ) -> BOOLEAN;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateThread(
-        lpthreadattributes: *const SECURITY_ATTRIBUTES,
-        dwstacksize: usize,
-        lpstartaddress: LPTHREAD_START_ROUTINE,
-        lpparameter: *const core::ffi::c_void,
-        dwcreationflags: THREAD_CREATION_FLAGS,
-        lpthreadid: *mut u32,
-    ) -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn CreateWaitableTimerExW(
-        lptimerattributes: *const SECURITY_ATTRIBUTES,
-        lptimername: PCWSTR,
-        dwflags: u32,
-        dwdesiredaccess: u32,
-    ) -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn DeleteFileW(lpfilename: PCWSTR) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn DeleteProcThreadAttributeList(lpattributelist: LPPROC_THREAD_ATTRIBUTE_LIST);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn DeviceIoControl(
-        hdevice: HANDLE,
-        dwiocontrolcode: u32,
-        lpinbuffer: *const core::ffi::c_void,
-        ninbuffersize: u32,
-        lpoutbuffer: *mut core::ffi::c_void,
-        noutbuffersize: u32,
-        lpbytesreturned: *mut u32,
-        lpoverlapped: *mut OVERLAPPED,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn DuplicateHandle(
-        hsourceprocesshandle: HANDLE,
-        hsourcehandle: HANDLE,
-        htargetprocesshandle: HANDLE,
-        lptargethandle: *mut HANDLE,
-        dwdesiredaccess: u32,
-        binherithandle: BOOL,
-        dwoptions: DUPLICATE_HANDLE_OPTIONS,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn ExitProcess(uexitcode: u32) -> !;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn FindClose(hfindfile: HANDLE) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn FindFirstFileW(lpfilename: PCWSTR, lpfindfiledata: *mut WIN32_FIND_DATAW) -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn FindNextFileW(hfindfile: HANDLE, lpfindfiledata: *mut WIN32_FIND_DATAW) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn FlushFileBuffers(hfile: HANDLE) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn FormatMessageW(
-        dwflags: FORMAT_MESSAGE_OPTIONS,
-        lpsource: *const core::ffi::c_void,
-        dwmessageid: u32,
-        dwlanguageid: u32,
-        lpbuffer: PWSTR,
-        nsize: u32,
-        arguments: *const *const i8,
-    ) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn FreeEnvironmentStringsW(penv: PCWSTR) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetActiveProcessorCount(groupnumber: u16) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetCommandLineW() -> PCWSTR;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetConsoleMode(hconsolehandle: HANDLE, lpmode: *mut CONSOLE_MODE) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetCurrentDirectoryW(nbufferlength: u32, lpbuffer: PWSTR) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetCurrentProcess() -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetCurrentProcessId() -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetCurrentThread() -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetEnvironmentStringsW() -> PWSTR;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetEnvironmentVariableW(lpname: PCWSTR, lpbuffer: PWSTR, nsize: u32) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetExitCodeProcess(hprocess: HANDLE, lpexitcode: *mut u32) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetFileAttributesW(lpfilename: PCWSTR) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetFileInformationByHandle(
-        hfile: HANDLE,
-        lpfileinformation: *mut BY_HANDLE_FILE_INFORMATION,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetFileInformationByHandleEx(
-        hfile: HANDLE,
-        fileinformationclass: FILE_INFO_BY_HANDLE_CLASS,
-        lpfileinformation: *mut core::ffi::c_void,
-        dwbuffersize: u32,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetFileType(hfile: HANDLE) -> FILE_TYPE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetFinalPathNameByHandleW(
-        hfile: HANDLE,
-        lpszfilepath: PWSTR,
-        cchfilepath: u32,
-        dwflags: GETFINALPATHNAMEBYHANDLE_FLAGS,
-    ) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetFullPathNameW(
-        lpfilename: PCWSTR,
-        nbufferlength: u32,
-        lpbuffer: PWSTR,
-        lpfilepart: *mut PWSTR,
-    ) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetLastError() -> WIN32_ERROR;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetModuleFileNameW(hmodule: HMODULE, lpfilename: PWSTR, nsize: u32) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetModuleHandleA(lpmodulename: PCSTR) -> HMODULE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetModuleHandleW(lpmodulename: PCWSTR) -> HMODULE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetOverlappedResult(
-        hfile: HANDLE,
-        lpoverlapped: *const OVERLAPPED,
-        lpnumberofbytestransferred: *mut u32,
-        bwait: BOOL,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetProcAddress(hmodule: HMODULE, lpprocname: PCSTR) -> FARPROC;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetProcessId(process: HANDLE) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetStdHandle(nstdhandle: STD_HANDLE) -> HANDLE;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetSystemDirectoryW(lpbuffer: PWSTR, usize: u32) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetSystemInfo(lpsysteminfo: *mut SYSTEM_INFO);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetSystemTimeAsFileTime(lpsystemtimeasfiletime: *mut FILETIME);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetSystemTimePreciseAsFileTime(lpsystemtimeasfiletime: *mut FILETIME);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetTempPathW(nbufferlength: u32, lpbuffer: PWSTR) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn GetWindowsDirectoryW(lpbuffer: PWSTR, usize: u32) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn InitOnceBeginInitialize(
-        lpinitonce: *mut INIT_ONCE,
-        dwflags: u32,
-        fpending: *mut BOOL,
-        lpcontext: *mut *mut core::ffi::c_void,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn InitOnceComplete(
-        lpinitonce: *mut INIT_ONCE,
-        dwflags: u32,
-        lpcontext: *const core::ffi::c_void,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn InitializeProcThreadAttributeList(
-        lpattributelist: LPPROC_THREAD_ATTRIBUTE_LIST,
-        dwattributecount: u32,
-        dwflags: u32,
-        lpsize: *mut usize,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn LocalFree(hmem: HLOCAL) -> HLOCAL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn MoveFileExW(
-        lpexistingfilename: PCWSTR,
-        lpnewfilename: PCWSTR,
-        dwflags: MOVE_FILE_FLAGS,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn MultiByteToWideChar(
-        codepage: u32,
-        dwflags: MULTI_BYTE_TO_WIDE_CHAR_FLAGS,
-        lpmultibytestr: PCSTR,
-        cbmultibyte: i32,
-        lpwidecharstr: PWSTR,
-        cchwidechar: i32,
-    ) -> i32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn QueryPerformanceCounter(lpperformancecount: *mut i64) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn QueryPerformanceFrequency(lpfrequency: *mut i64) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn ReadConsoleW(
-        hconsoleinput: HANDLE,
-        lpbuffer: *mut core::ffi::c_void,
-        nnumberofcharstoread: u32,
-        lpnumberofcharsread: *mut u32,
-        pinputcontrol: *const CONSOLE_READCONSOLE_CONTROL,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn ReadFile(
-        hfile: HANDLE,
-        lpbuffer: *mut u8,
-        nnumberofbytestoread: u32,
-        lpnumberofbytesread: *mut u32,
-        lpoverlapped: *mut OVERLAPPED,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn ReadFileEx(
-        hfile: HANDLE,
-        lpbuffer: *mut u8,
-        nnumberofbytestoread: u32,
-        lpoverlapped: *mut OVERLAPPED,
-        lpcompletionroutine: LPOVERLAPPED_COMPLETION_ROUTINE,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn ReleaseSRWLockExclusive(srwlock: *mut SRWLOCK);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn ReleaseSRWLockShared(srwlock: *mut SRWLOCK);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn RemoveDirectoryW(lppathname: PCWSTR) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetCurrentDirectoryW(lppathname: PCWSTR) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetEnvironmentVariableW(lpname: PCWSTR, lpvalue: PCWSTR) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetFileAttributesW(
-        lpfilename: PCWSTR,
-        dwfileattributes: FILE_FLAGS_AND_ATTRIBUTES,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetFileInformationByHandle(
-        hfile: HANDLE,
-        fileinformationclass: FILE_INFO_BY_HANDLE_CLASS,
-        lpfileinformation: *const core::ffi::c_void,
-        dwbuffersize: u32,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetFilePointerEx(
-        hfile: HANDLE,
-        lidistancetomove: i64,
-        lpnewfilepointer: *mut i64,
-        dwmovemethod: SET_FILE_POINTER_MOVE_METHOD,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetFileTime(
-        hfile: HANDLE,
-        lpcreationtime: *const FILETIME,
-        lplastaccesstime: *const FILETIME,
-        lplastwritetime: *const FILETIME,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetHandleInformation(hobject: HANDLE, dwmask: u32, dwflags: HANDLE_FLAGS) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetLastError(dwerrcode: WIN32_ERROR);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetThreadStackGuarantee(stacksizeinbytes: *mut u32) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SetWaitableTimer(
-        htimer: HANDLE,
-        lpduetime: *const i64,
-        lperiod: i32,
-        pfncompletionroutine: PTIMERAPCROUTINE,
-        lpargtocompletionroutine: *const core::ffi::c_void,
-        fresume: BOOL,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn Sleep(dwmilliseconds: u32);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SleepConditionVariableSRW(
-        conditionvariable: *mut CONDITION_VARIABLE,
-        srwlock: *mut SRWLOCK,
-        dwmilliseconds: u32,
-        flags: u32,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SleepEx(dwmilliseconds: u32, balertable: BOOL) -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn SwitchToThread() -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn TerminateProcess(hprocess: HANDLE, uexitcode: u32) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn TlsAlloc() -> u32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn TlsFree(dwtlsindex: u32) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn TlsGetValue(dwtlsindex: u32) -> *mut core::ffi::c_void;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn TlsSetValue(dwtlsindex: u32, lptlsvalue: *const core::ffi::c_void) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn TryAcquireSRWLockExclusive(srwlock: *mut SRWLOCK) -> BOOLEAN;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn TryAcquireSRWLockShared(srwlock: *mut SRWLOCK) -> BOOLEAN;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn UpdateProcThreadAttribute(
-        lpattributelist: LPPROC_THREAD_ATTRIBUTE_LIST,
-        dwflags: u32,
-        attribute: usize,
-        lpvalue: *const core::ffi::c_void,
-        cbsize: usize,
-        lppreviousvalue: *mut core::ffi::c_void,
-        lpreturnsize: *const usize,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn WaitForMultipleObjects(
-        ncount: u32,
-        lphandles: *const HANDLE,
-        bwaitall: BOOL,
-        dwmilliseconds: u32,
-    ) -> WAIT_EVENT;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn WaitForSingleObject(hhandle: HANDLE, dwmilliseconds: u32) -> WAIT_EVENT;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn WakeAllConditionVariable(conditionvariable: *mut CONDITION_VARIABLE);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn WakeConditionVariable(conditionvariable: *mut CONDITION_VARIABLE);
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn WideCharToMultiByte(
-        codepage: u32,
-        dwflags: u32,
-        lpwidecharstr: PCWSTR,
-        cchwidechar: i32,
-        lpmultibytestr: PSTR,
-        cbmultibyte: i32,
-        lpdefaultchar: PCSTR,
-        lpuseddefaultchar: *mut BOOL,
-    ) -> i32;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn WriteConsoleW(
-        hconsoleoutput: HANDLE,
-        lpbuffer: *const core::ffi::c_void,
-        nnumberofcharstowrite: u32,
-        lpnumberofcharswritten: *mut u32,
-        lpreserved: *const core::ffi::c_void,
-    ) -> BOOL;
-}
-#[link(name = "kernel32")]
-extern "system" {
-    pub fn WriteFileEx(
-        hfile: HANDLE,
-        lpbuffer: *const u8,
-        nnumberofbytestowrite: u32,
-        lpoverlapped: *mut OVERLAPPED,
-        lpcompletionroutine: LPOVERLAPPED_COMPLETION_ROUTINE,
-    ) -> BOOL;
-}
-#[link(name = "ntdll")]
-extern "system" {
-    pub fn NtCreateFile(
-        filehandle: *mut HANDLE,
-        desiredaccess: FILE_ACCESS_RIGHTS,
-        objectattributes: *const OBJECT_ATTRIBUTES,
-        iostatusblock: *mut IO_STATUS_BLOCK,
-        allocationsize: *const i64,
-        fileattributes: FILE_FLAGS_AND_ATTRIBUTES,
-        shareaccess: FILE_SHARE_MODE,
-        createdisposition: NTCREATEFILE_CREATE_DISPOSITION,
-        createoptions: NTCREATEFILE_CREATE_OPTIONS,
-        eabuffer: *const core::ffi::c_void,
-        ealength: u32,
-    ) -> NTSTATUS;
-}
-#[link(name = "ntdll")]
-extern "system" {
-    pub fn NtReadFile(
-        filehandle: HANDLE,
-        event: HANDLE,
-        apcroutine: PIO_APC_ROUTINE,
-        apccontext: *const core::ffi::c_void,
-        iostatusblock: *mut IO_STATUS_BLOCK,
-        buffer: *mut core::ffi::c_void,
-        length: u32,
-        byteoffset: *const i64,
-        key: *const u32,
-    ) -> NTSTATUS;
-}
-#[link(name = "ntdll")]
-extern "system" {
-    pub fn NtWriteFile(
-        filehandle: HANDLE,
-        event: HANDLE,
-        apcroutine: PIO_APC_ROUTINE,
-        apccontext: *const core::ffi::c_void,
-        iostatusblock: *mut IO_STATUS_BLOCK,
-        buffer: *const core::ffi::c_void,
-        length: u32,
-        byteoffset: *const i64,
-        key: *const u32,
-    ) -> NTSTATUS;
-}
-#[link(name = "ntdll")]
-extern "system" {
-    pub fn RtlNtStatusToDosError(status: NTSTATUS) -> u32;
-}
-#[link(name = "userenv")]
-extern "system" {
-    pub fn GetUserProfileDirectoryW(
-        htoken: HANDLE,
-        lpprofiledir: PWSTR,
-        lpcchsize: *mut u32,
-    ) -> BOOL;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn WSACleanup() -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn WSADuplicateSocketW(
-        s: SOCKET,
-        dwprocessid: u32,
-        lpprotocolinfo: *mut WSAPROTOCOL_INFOW,
-    ) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn WSAGetLastError() -> WSA_ERROR;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn WSARecv(
-        s: SOCKET,
-        lpbuffers: *const WSABUF,
-        dwbuffercount: u32,
-        lpnumberofbytesrecvd: *mut u32,
-        lpflags: *mut u32,
-        lpoverlapped: *mut OVERLAPPED,
-        lpcompletionroutine: LPWSAOVERLAPPED_COMPLETION_ROUTINE,
-    ) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn WSASend(
-        s: SOCKET,
-        lpbuffers: *const WSABUF,
-        dwbuffercount: u32,
-        lpnumberofbytessent: *mut u32,
-        dwflags: u32,
-        lpoverlapped: *mut OVERLAPPED,
-        lpcompletionroutine: LPWSAOVERLAPPED_COMPLETION_ROUTINE,
-    ) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn WSASocketW(
-        af: i32,
-        r#type: i32,
-        protocol: i32,
-        lpprotocolinfo: *const WSAPROTOCOL_INFOW,
-        g: u32,
-        dwflags: u32,
-    ) -> SOCKET;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn accept(s: SOCKET, addr: *mut SOCKADDR, addrlen: *mut i32) -> SOCKET;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn bind(s: SOCKET, name: *const SOCKADDR, namelen: i32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn closesocket(s: SOCKET) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn connect(s: SOCKET, name: *const SOCKADDR, namelen: i32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn freeaddrinfo(paddrinfo: *const ADDRINFOA);
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn getaddrinfo(
-        pnodename: PCSTR,
-        pservicename: PCSTR,
-        phints: *const ADDRINFOA,
-        ppresult: *mut *mut ADDRINFOA,
-    ) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn getpeername(s: SOCKET, name: *mut SOCKADDR, namelen: *mut i32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn getsockname(s: SOCKET, name: *mut SOCKADDR, namelen: *mut i32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn getsockopt(s: SOCKET, level: i32, optname: i32, optval: PSTR, optlen: *mut i32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn ioctlsocket(s: SOCKET, cmd: i32, argp: *mut u32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn listen(s: SOCKET, backlog: i32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn recv(s: SOCKET, buf: PSTR, len: i32, flags: SEND_RECV_FLAGS) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn recvfrom(
-        s: SOCKET,
-        buf: PSTR,
-        len: i32,
-        flags: i32,
-        from: *mut SOCKADDR,
-        fromlen: *mut i32,
-    ) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn select(
-        nfds: i32,
-        readfds: *mut FD_SET,
-        writefds: *mut FD_SET,
-        exceptfds: *mut FD_SET,
-        timeout: *const TIMEVAL,
-    ) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn send(s: SOCKET, buf: PCSTR, len: i32, flags: SEND_RECV_FLAGS) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn sendto(
-        s: SOCKET,
-        buf: PCSTR,
-        len: i32,
-        flags: i32,
-        to: *const SOCKADDR,
-        tolen: i32,
-    ) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn setsockopt(s: SOCKET, level: i32, optname: i32, optval: PCSTR, optlen: i32) -> i32;
-}
-#[link(name = "ws2_32")]
-extern "system" {
-    pub fn shutdown(s: SOCKET, how: WINSOCK_SHUTDOWN_HOW) -> i32;
-}
+windows_targets::link!("advapi32.dll" "system" fn OpenProcessToken(processhandle : HANDLE, desiredaccess : TOKEN_ACCESS_MASK, tokenhandle : *mut HANDLE) -> BOOL);
+windows_targets::link!("advapi32.dll" "system" "SystemFunction036" fn RtlGenRandom(randombuffer : *mut core::ffi::c_void, randombufferlength : u32) -> BOOLEAN);
+windows_targets::link!("kernel32.dll" "system" fn AcquireSRWLockExclusive(srwlock : *mut SRWLOCK));
+windows_targets::link!("kernel32.dll" "system" fn AcquireSRWLockShared(srwlock : *mut SRWLOCK));
+windows_targets::link!("kernel32.dll" "system" fn CancelIo(hfile : HANDLE) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn CloseHandle(hobject : HANDLE) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn CompareStringOrdinal(lpstring1 : PCWSTR, cchcount1 : i32, lpstring2 : PCWSTR, cchcount2 : i32, bignorecase : BOOL) -> COMPARESTRING_RESULT);
+windows_targets::link!("kernel32.dll" "system" fn CopyFileExW(lpexistingfilename : PCWSTR, lpnewfilename : PCWSTR, lpprogressroutine : LPPROGRESS_ROUTINE, lpdata : *const core::ffi::c_void, pbcancel : *mut BOOL, dwcopyflags : u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn CreateDirectoryW(lppathname : PCWSTR, lpsecurityattributes : *const SECURITY_ATTRIBUTES) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn CreateEventW(lpeventattributes : *const SECURITY_ATTRIBUTES, bmanualreset : BOOL, binitialstate : BOOL, lpname : PCWSTR) -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn CreateFileW(lpfilename : PCWSTR, dwdesiredaccess : u32, dwsharemode : FILE_SHARE_MODE, lpsecurityattributes : *const SECURITY_ATTRIBUTES, dwcreationdisposition : FILE_CREATION_DISPOSITION, dwflagsandattributes : FILE_FLAGS_AND_ATTRIBUTES, htemplatefile : HANDLE) -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn CreateHardLinkW(lpfilename : PCWSTR, lpexistingfilename : PCWSTR, lpsecurityattributes : *const SECURITY_ATTRIBUTES) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn CreateNamedPipeW(lpname : PCWSTR, dwopenmode : FILE_FLAGS_AND_ATTRIBUTES, dwpipemode : NAMED_PIPE_MODE, nmaxinstances : u32, noutbuffersize : u32, ninbuffersize : u32, ndefaulttimeout : u32, lpsecurityattributes : *const SECURITY_ATTRIBUTES) -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn CreateProcessW(lpapplicationname : PCWSTR, lpcommandline : PWSTR, lpprocessattributes : *const SECURITY_ATTRIBUTES, lpthreadattributes : *const SECURITY_ATTRIBUTES, binherithandles : BOOL, dwcreationflags : PROCESS_CREATION_FLAGS, lpenvironment : *const core::ffi::c_void, lpcurrentdirectory : PCWSTR, lpstartupinfo : *const STARTUPINFOW, lpprocessinformation : *mut PROCESS_INFORMATION) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn CreateSymbolicLinkW(lpsymlinkfilename : PCWSTR, lptargetfilename : PCWSTR, dwflags : SYMBOLIC_LINK_FLAGS) -> BOOLEAN);
+windows_targets::link!("kernel32.dll" "system" fn CreateThread(lpthreadattributes : *const SECURITY_ATTRIBUTES, dwstacksize : usize, lpstartaddress : LPTHREAD_START_ROUTINE, lpparameter : *const core::ffi::c_void, dwcreationflags : THREAD_CREATION_FLAGS, lpthreadid : *mut u32) -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn CreateWaitableTimerExW(lptimerattributes : *const SECURITY_ATTRIBUTES, lptimername : PCWSTR, dwflags : u32, dwdesiredaccess : u32) -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn DeleteFileW(lpfilename : PCWSTR) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn DeleteProcThreadAttributeList(lpattributelist : LPPROC_THREAD_ATTRIBUTE_LIST));
+windows_targets::link!("kernel32.dll" "system" fn DeviceIoControl(hdevice : HANDLE, dwiocontrolcode : u32, lpinbuffer : *const core::ffi::c_void, ninbuffersize : u32, lpoutbuffer : *mut core::ffi::c_void, noutbuffersize : u32, lpbytesreturned : *mut u32, lpoverlapped : *mut OVERLAPPED) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn DuplicateHandle(hsourceprocesshandle : HANDLE, hsourcehandle : HANDLE, htargetprocesshandle : HANDLE, lptargethandle : *mut HANDLE, dwdesiredaccess : u32, binherithandle : BOOL, dwoptions : DUPLICATE_HANDLE_OPTIONS) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn ExitProcess(uexitcode : u32) -> !);
+windows_targets::link!("kernel32.dll" "system" fn FindClose(hfindfile : HANDLE) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn FindFirstFileW(lpfilename : PCWSTR, lpfindfiledata : *mut WIN32_FIND_DATAW) -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn FindNextFileW(hfindfile : HANDLE, lpfindfiledata : *mut WIN32_FIND_DATAW) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn FlushFileBuffers(hfile : HANDLE) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn FormatMessageW(dwflags : FORMAT_MESSAGE_OPTIONS, lpsource : *const core::ffi::c_void, dwmessageid : u32, dwlanguageid : u32, lpbuffer : PWSTR, nsize : u32, arguments : *const *const i8) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn FreeEnvironmentStringsW(penv : PCWSTR) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn GetActiveProcessorCount(groupnumber : u16) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetCommandLineW() -> PCWSTR);
+windows_targets::link!("kernel32.dll" "system" fn GetConsoleMode(hconsolehandle : HANDLE, lpmode : *mut CONSOLE_MODE) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn GetCurrentDirectoryW(nbufferlength : u32, lpbuffer : PWSTR) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetCurrentProcess() -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn GetCurrentProcessId() -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetCurrentThread() -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn GetEnvironmentStringsW() -> PWSTR);
+windows_targets::link!("kernel32.dll" "system" fn GetEnvironmentVariableW(lpname : PCWSTR, lpbuffer : PWSTR, nsize : u32) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetExitCodeProcess(hprocess : HANDLE, lpexitcode : *mut u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn GetFileAttributesW(lpfilename : PCWSTR) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetFileInformationByHandle(hfile : HANDLE, lpfileinformation : *mut BY_HANDLE_FILE_INFORMATION) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn GetFileInformationByHandleEx(hfile : HANDLE, fileinformationclass : FILE_INFO_BY_HANDLE_CLASS, lpfileinformation : *mut core::ffi::c_void, dwbuffersize : u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn GetFileType(hfile : HANDLE) -> FILE_TYPE);
+windows_targets::link!("kernel32.dll" "system" fn GetFinalPathNameByHandleW(hfile : HANDLE, lpszfilepath : PWSTR, cchfilepath : u32, dwflags : GETFINALPATHNAMEBYHANDLE_FLAGS) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetFullPathNameW(lpfilename : PCWSTR, nbufferlength : u32, lpbuffer : PWSTR, lpfilepart : *mut PWSTR) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetLastError() -> WIN32_ERROR);
+windows_targets::link!("kernel32.dll" "system" fn GetModuleFileNameW(hmodule : HMODULE, lpfilename : PWSTR, nsize : u32) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetModuleHandleA(lpmodulename : PCSTR) -> HMODULE);
+windows_targets::link!("kernel32.dll" "system" fn GetModuleHandleW(lpmodulename : PCWSTR) -> HMODULE);
+windows_targets::link!("kernel32.dll" "system" fn GetOverlappedResult(hfile : HANDLE, lpoverlapped : *const OVERLAPPED, lpnumberofbytestransferred : *mut u32, bwait : BOOL) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn GetProcAddress(hmodule : HMODULE, lpprocname : PCSTR) -> FARPROC);
+windows_targets::link!("kernel32.dll" "system" fn GetProcessId(process : HANDLE) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetStdHandle(nstdhandle : STD_HANDLE) -> HANDLE);
+windows_targets::link!("kernel32.dll" "system" fn GetSystemDirectoryW(lpbuffer : PWSTR, usize : u32) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetSystemInfo(lpsysteminfo : *mut SYSTEM_INFO));
+windows_targets::link!("kernel32.dll" "system" fn GetSystemTimeAsFileTime(lpsystemtimeasfiletime : *mut FILETIME));
+windows_targets::link!("kernel32.dll" "system" fn GetSystemTimePreciseAsFileTime(lpsystemtimeasfiletime : *mut FILETIME));
+windows_targets::link!("kernel32.dll" "system" fn GetTempPathW(nbufferlength : u32, lpbuffer : PWSTR) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn GetWindowsDirectoryW(lpbuffer : PWSTR, usize : u32) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn InitOnceBeginInitialize(lpinitonce : *mut INIT_ONCE, dwflags : u32, fpending : *mut BOOL, lpcontext : *mut *mut core::ffi::c_void) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn InitOnceComplete(lpinitonce : *mut INIT_ONCE, dwflags : u32, lpcontext : *const core::ffi::c_void) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn InitializeProcThreadAttributeList(lpattributelist : LPPROC_THREAD_ATTRIBUTE_LIST, dwattributecount : u32, dwflags : u32, lpsize : *mut usize) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn LocalFree(hmem : HLOCAL) -> HLOCAL);
+windows_targets::link!("kernel32.dll" "system" fn MoveFileExW(lpexistingfilename : PCWSTR, lpnewfilename : PCWSTR, dwflags : MOVE_FILE_FLAGS) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn MultiByteToWideChar(codepage : u32, dwflags : MULTI_BYTE_TO_WIDE_CHAR_FLAGS, lpmultibytestr : PCSTR, cbmultibyte : i32, lpwidecharstr : PWSTR, cchwidechar : i32) -> i32);
+windows_targets::link!("kernel32.dll" "system" fn QueryPerformanceCounter(lpperformancecount : *mut i64) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn QueryPerformanceFrequency(lpfrequency : *mut i64) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn ReadConsoleW(hconsoleinput : HANDLE, lpbuffer : *mut core::ffi::c_void, nnumberofcharstoread : u32, lpnumberofcharsread : *mut u32, pinputcontrol : *const CONSOLE_READCONSOLE_CONTROL) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn ReadFile(hfile : HANDLE, lpbuffer : *mut u8, nnumberofbytestoread : u32, lpnumberofbytesread : *mut u32, lpoverlapped : *mut OVERLAPPED) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn ReadFileEx(hfile : HANDLE, lpbuffer : *mut u8, nnumberofbytestoread : u32, lpoverlapped : *mut OVERLAPPED, lpcompletionroutine : LPOVERLAPPED_COMPLETION_ROUTINE) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn ReleaseSRWLockExclusive(srwlock : *mut SRWLOCK));
+windows_targets::link!("kernel32.dll" "system" fn ReleaseSRWLockShared(srwlock : *mut SRWLOCK));
+windows_targets::link!("kernel32.dll" "system" fn RemoveDirectoryW(lppathname : PCWSTR) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetCurrentDirectoryW(lppathname : PCWSTR) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetEnvironmentVariableW(lpname : PCWSTR, lpvalue : PCWSTR) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetFileAttributesW(lpfilename : PCWSTR, dwfileattributes : FILE_FLAGS_AND_ATTRIBUTES) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetFileInformationByHandle(hfile : HANDLE, fileinformationclass : FILE_INFO_BY_HANDLE_CLASS, lpfileinformation : *const core::ffi::c_void, dwbuffersize : u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetFilePointerEx(hfile : HANDLE, lidistancetomove : i64, lpnewfilepointer : *mut i64, dwmovemethod : SET_FILE_POINTER_MOVE_METHOD) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetFileTime(hfile : HANDLE, lpcreationtime : *const FILETIME, lplastaccesstime : *const FILETIME, lplastwritetime : *const FILETIME) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetHandleInformation(hobject : HANDLE, dwmask : u32, dwflags : HANDLE_FLAGS) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetLastError(dwerrcode : WIN32_ERROR));
+windows_targets::link!("kernel32.dll" "system" fn SetThreadStackGuarantee(stacksizeinbytes : *mut u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SetWaitableTimer(htimer : HANDLE, lpduetime : *const i64, lperiod : i32, pfncompletionroutine : PTIMERAPCROUTINE, lpargtocompletionroutine : *const core::ffi::c_void, fresume : BOOL) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn Sleep(dwmilliseconds : u32));
+windows_targets::link!("kernel32.dll" "system" fn SleepConditionVariableSRW(conditionvariable : *mut CONDITION_VARIABLE, srwlock : *mut SRWLOCK, dwmilliseconds : u32, flags : u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn SleepEx(dwmilliseconds : u32, balertable : BOOL) -> u32);
+windows_targets::link!("kernel32.dll" "system" fn SwitchToThread() -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn TerminateProcess(hprocess : HANDLE, uexitcode : u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn TlsAlloc() -> u32);
+windows_targets::link!("kernel32.dll" "system" fn TlsFree(dwtlsindex : u32) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn TlsGetValue(dwtlsindex : u32) -> *mut core::ffi::c_void);
+windows_targets::link!("kernel32.dll" "system" fn TlsSetValue(dwtlsindex : u32, lptlsvalue : *const core::ffi::c_void) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn TryAcquireSRWLockExclusive(srwlock : *mut SRWLOCK) -> BOOLEAN);
+windows_targets::link!("kernel32.dll" "system" fn TryAcquireSRWLockShared(srwlock : *mut SRWLOCK) -> BOOLEAN);
+windows_targets::link!("kernel32.dll" "system" fn UpdateProcThreadAttribute(lpattributelist : LPPROC_THREAD_ATTRIBUTE_LIST, dwflags : u32, attribute : usize, lpvalue : *const core::ffi::c_void, cbsize : usize, lppreviousvalue : *mut core::ffi::c_void, lpreturnsize : *const usize) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn WaitForMultipleObjects(ncount : u32, lphandles : *const HANDLE, bwaitall : BOOL, dwmilliseconds : u32) -> WAIT_EVENT);
+windows_targets::link!("kernel32.dll" "system" fn WaitForSingleObject(hhandle : HANDLE, dwmilliseconds : u32) -> WAIT_EVENT);
+windows_targets::link!("kernel32.dll" "system" fn WakeAllConditionVariable(conditionvariable : *mut CONDITION_VARIABLE));
+windows_targets::link!("kernel32.dll" "system" fn WakeConditionVariable(conditionvariable : *mut CONDITION_VARIABLE));
+windows_targets::link!("kernel32.dll" "system" fn WideCharToMultiByte(codepage : u32, dwflags : u32, lpwidecharstr : PCWSTR, cchwidechar : i32, lpmultibytestr : PSTR, cbmultibyte : i32, lpdefaultchar : PCSTR, lpuseddefaultchar : *mut BOOL) -> i32);
+windows_targets::link!("kernel32.dll" "system" fn WriteConsoleW(hconsoleoutput : HANDLE, lpbuffer : PCWSTR, nnumberofcharstowrite : u32, lpnumberofcharswritten : *mut u32, lpreserved : *const core::ffi::c_void) -> BOOL);
+windows_targets::link!("kernel32.dll" "system" fn WriteFileEx(hfile : HANDLE, lpbuffer : *const u8, nnumberofbytestowrite : u32, lpoverlapped : *mut OVERLAPPED, lpcompletionroutine : LPOVERLAPPED_COMPLETION_ROUTINE) -> BOOL);
+windows_targets::link!("ntdll.dll" "system" fn NtCreateFile(filehandle : *mut HANDLE, desiredaccess : FILE_ACCESS_RIGHTS, objectattributes : *const OBJECT_ATTRIBUTES, iostatusblock : *mut IO_STATUS_BLOCK, allocationsize : *const i64, fileattributes : FILE_FLAGS_AND_ATTRIBUTES, shareaccess : FILE_SHARE_MODE, createdisposition : NTCREATEFILE_CREATE_DISPOSITION, createoptions : NTCREATEFILE_CREATE_OPTIONS, eabuffer : *const core::ffi::c_void, ealength : u32) -> NTSTATUS);
+windows_targets::link!("ntdll.dll" "system" fn NtReadFile(filehandle : HANDLE, event : HANDLE, apcroutine : PIO_APC_ROUTINE, apccontext : *const core::ffi::c_void, iostatusblock : *mut IO_STATUS_BLOCK, buffer : *mut core::ffi::c_void, length : u32, byteoffset : *const i64, key : *const u32) -> NTSTATUS);
+windows_targets::link!("ntdll.dll" "system" fn NtWriteFile(filehandle : HANDLE, event : HANDLE, apcroutine : PIO_APC_ROUTINE, apccontext : *const core::ffi::c_void, iostatusblock : *mut IO_STATUS_BLOCK, buffer : *const core::ffi::c_void, length : u32, byteoffset : *const i64, key : *const u32) -> NTSTATUS);
+windows_targets::link!("ntdll.dll" "system" fn RtlNtStatusToDosError(status : NTSTATUS) -> u32);
+windows_targets::link!("userenv.dll" "system" fn GetUserProfileDirectoryW(htoken : HANDLE, lpprofiledir : PWSTR, lpcchsize : *mut u32) -> BOOL);
+windows_targets::link!("ws2_32.dll" "system" fn WSACleanup() -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn WSADuplicateSocketW(s : SOCKET, dwprocessid : u32, lpprotocolinfo : *mut WSAPROTOCOL_INFOW) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn WSAGetLastError() -> WSA_ERROR);
+windows_targets::link!("ws2_32.dll" "system" fn WSARecv(s : SOCKET, lpbuffers : *const WSABUF, dwbuffercount : u32, lpnumberofbytesrecvd : *mut u32, lpflags : *mut u32, lpoverlapped : *mut OVERLAPPED, lpcompletionroutine : LPWSAOVERLAPPED_COMPLETION_ROUTINE) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn WSASend(s : SOCKET, lpbuffers : *const WSABUF, dwbuffercount : u32, lpnumberofbytessent : *mut u32, dwflags : u32, lpoverlapped : *mut OVERLAPPED, lpcompletionroutine : LPWSAOVERLAPPED_COMPLETION_ROUTINE) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn WSASocketW(af : i32, r#type : i32, protocol : i32, lpprotocolinfo : *const WSAPROTOCOL_INFOW, g : u32, dwflags : u32) -> SOCKET);
+windows_targets::link!("ws2_32.dll" "system" fn accept(s : SOCKET, addr : *mut SOCKADDR, addrlen : *mut i32) -> SOCKET);
+windows_targets::link!("ws2_32.dll" "system" fn bind(s : SOCKET, name : *const SOCKADDR, namelen : i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn closesocket(s : SOCKET) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn connect(s : SOCKET, name : *const SOCKADDR, namelen : i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn freeaddrinfo(paddrinfo : *const ADDRINFOA));
+windows_targets::link!("ws2_32.dll" "system" fn getaddrinfo(pnodename : PCSTR, pservicename : PCSTR, phints : *const ADDRINFOA, ppresult : *mut *mut ADDRINFOA) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn getpeername(s : SOCKET, name : *mut SOCKADDR, namelen : *mut i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn getsockname(s : SOCKET, name : *mut SOCKADDR, namelen : *mut i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn getsockopt(s : SOCKET, level : i32, optname : i32, optval : PSTR, optlen : *mut i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn ioctlsocket(s : SOCKET, cmd : i32, argp : *mut u32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn listen(s : SOCKET, backlog : i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn recv(s : SOCKET, buf : PSTR, len : i32, flags : SEND_RECV_FLAGS) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn recvfrom(s : SOCKET, buf : PSTR, len : i32, flags : i32, from : *mut SOCKADDR, fromlen : *mut i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn select(nfds : i32, readfds : *mut FD_SET, writefds : *mut FD_SET, exceptfds : *mut FD_SET, timeout : *const TIMEVAL) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn send(s : SOCKET, buf : PCSTR, len : i32, flags : SEND_RECV_FLAGS) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn sendto(s : SOCKET, buf : PCSTR, len : i32, flags : i32, to : *const SOCKADDR, tolen : i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn setsockopt(s : SOCKET, level : i32, optname : i32, optval : PCSTR, optlen : i32) -> i32);
+windows_targets::link!("ws2_32.dll" "system" fn shutdown(s : SOCKET, how : WINSOCK_SHUTDOWN_HOW) -> i32);
 pub const ABOVE_NORMAL_PRIORITY_CLASS: PROCESS_CREATION_FLAGS = 32768u32;
 pub type ADDRESS_FAMILY = u16;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct ADDRINFOA {
     pub ai_flags: i32,
     pub ai_family: i32,
@@ -851,18 +145,13 @@ pub struct ADDRINFOA {
     pub ai_addr: *mut SOCKADDR,
     pub ai_next: *mut ADDRINFOA,
 }
-impl Copy for ADDRINFOA {}
-impl Clone for ADDRINFOA {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const AF_INET: ADDRESS_FAMILY = 2u16;
 pub const AF_INET6: ADDRESS_FAMILY = 23u16;
 pub const AF_UNIX: u16 = 1u16;
 pub const AF_UNSPEC: ADDRESS_FAMILY = 0u16;
 pub const ALL_PROCESSOR_GROUPS: u16 = 65535u16;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub union ARM64_NT_NEON128 {
     pub Anonymous: ARM64_NT_NEON128_0,
     pub D: [f64; 2],
@@ -870,27 +159,17 @@ pub union ARM64_NT_NEON128 {
     pub H: [u16; 8],
     pub B: [u8; 16],
 }
-impl Copy for ARM64_NT_NEON128 {}
-impl Clone for ARM64_NT_NEON128 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct ARM64_NT_NEON128_0 {
     pub Low: u64,
     pub High: i64,
 }
-impl Copy for ARM64_NT_NEON128_0 {}
-impl Clone for ARM64_NT_NEON128_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const BELOW_NORMAL_PRIORITY_CLASS: PROCESS_CREATION_FLAGS = 16384u32;
 pub type BOOL = i32;
 pub type BOOLEAN = u8;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct BY_HANDLE_FILE_INFORMATION {
     pub dwFileAttributes: u32,
     pub ftCreationTime: FILETIME,
@@ -903,41 +182,26 @@ pub struct BY_HANDLE_FILE_INFORMATION {
     pub nFileIndexHigh: u32,
     pub nFileIndexLow: u32,
 }
-impl Copy for BY_HANDLE_FILE_INFORMATION {}
-impl Clone for BY_HANDLE_FILE_INFORMATION {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const CALLBACK_CHUNK_FINISHED: LPPROGRESS_ROUTINE_CALLBACK_REASON = 0u32;
 pub const CALLBACK_STREAM_SWITCH: LPPROGRESS_ROUTINE_CALLBACK_REASON = 1u32;
 pub type COMPARESTRING_RESULT = i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct CONDITION_VARIABLE {
     pub Ptr: *mut core::ffi::c_void,
 }
-impl Copy for CONDITION_VARIABLE {}
-impl Clone for CONDITION_VARIABLE {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type CONSOLE_MODE = u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct CONSOLE_READCONSOLE_CONTROL {
     pub nLength: u32,
     pub nInitialChars: u32,
     pub dwCtrlWakeupMask: u32,
     pub dwControlKeyState: u32,
 }
-impl Copy for CONSOLE_READCONSOLE_CONTROL {}
-impl Clone for CONSOLE_READCONSOLE_CONTROL {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(target_arch = "aarch64")]
+#[derive(Clone, Copy)]
 pub struct CONTEXT {
     pub ContextFlags: CONTEXT_FLAGS,
     pub Cpsr: u32,
@@ -952,30 +216,16 @@ pub struct CONTEXT {
     pub Wcr: [u32; 2],
     pub Wvr: [u64; 2],
 }
-#[cfg(target_arch = "aarch64")]
-impl Copy for CONTEXT {}
-#[cfg(target_arch = "aarch64")]
-impl Clone for CONTEXT {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(target_arch = "aarch64")]
+#[derive(Clone, Copy)]
 pub union CONTEXT_0 {
     pub Anonymous: CONTEXT_0_0,
     pub X: [u64; 31],
 }
-#[cfg(target_arch = "aarch64")]
-impl Copy for CONTEXT_0 {}
-#[cfg(target_arch = "aarch64")]
-impl Clone for CONTEXT_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(target_arch = "aarch64")]
+#[derive(Clone, Copy)]
 pub struct CONTEXT_0_0 {
     pub X0: u64,
     pub X1: u64,
@@ -1009,16 +259,9 @@ pub struct CONTEXT_0_0 {
     pub Fp: u64,
     pub Lr: u64,
 }
-#[cfg(target_arch = "aarch64")]
-impl Copy for CONTEXT_0_0 {}
-#[cfg(target_arch = "aarch64")]
-impl Clone for CONTEXT_0_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
+#[derive(Clone, Copy)]
 pub struct CONTEXT {
     pub P1Home: u64,
     pub P2Home: u64,
@@ -1067,30 +310,16 @@ pub struct CONTEXT {
     pub LastExceptionToRip: u64,
     pub LastExceptionFromRip: u64,
 }
-#[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Copy for CONTEXT {}
-#[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Clone for CONTEXT {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
+#[derive(Clone, Copy)]
 pub union CONTEXT_0 {
     pub FltSave: XSAVE_FORMAT,
     pub Anonymous: CONTEXT_0_0,
 }
-#[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Copy for CONTEXT_0 {}
-#[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Clone for CONTEXT_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
+#[derive(Clone, Copy)]
 pub struct CONTEXT_0_0 {
     pub Header: [M128A; 2],
     pub Legacy: [M128A; 8],
@@ -1111,16 +340,9 @@ pub struct CONTEXT_0_0 {
     pub Xmm14: M128A,
     pub Xmm15: M128A,
 }
-#[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Copy for CONTEXT_0_0 {}
-#[cfg(any(target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Clone for CONTEXT_0_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(target_arch = "x86")]
+#[derive(Clone, Copy)]
 pub struct CONTEXT {
     pub ContextFlags: CONTEXT_FLAGS,
     pub Dr0: u32,
@@ -1148,14 +370,6 @@ pub struct CONTEXT {
     pub SegSs: u32,
     pub ExtendedRegisters: [u8; 512],
 }
-#[cfg(target_arch = "x86")]
-impl Copy for CONTEXT {}
-#[cfg(target_arch = "x86")]
-impl Clone for CONTEXT {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type CONTEXT_FLAGS = u32;
 pub const CP_UTF8: u32 = 65001u32;
 pub const CREATE_ALWAYS: FILE_CREATION_DISPOSITION = 2u32;
@@ -3068,6 +2282,7 @@ pub const ERROR_XML_PARSE_ERROR: WIN32_ERROR = 1465u32;
 pub type EXCEPTION_DISPOSITION = i32;
 pub const EXCEPTION_MAXIMUM_PARAMETERS: u32 = 15u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct EXCEPTION_RECORD {
     pub ExceptionCode: NTSTATUS,
     pub ExceptionFlags: u32,
@@ -3076,12 +2291,6 @@ pub struct EXCEPTION_RECORD {
     pub NumberParameters: u32,
     pub ExceptionInformation: [usize; 15],
 }
-impl Copy for EXCEPTION_RECORD {}
-impl Clone for EXCEPTION_RECORD {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const EXCEPTION_STACK_OVERFLOW: NTSTATUS = 0xC00000FD_u32 as _;
 pub const EXTENDED_STARTUPINFO_PRESENT: PROCESS_CREATION_FLAGS = 524288u32;
 pub const E_NOTIMPL: HRESULT = 0x80004001_u32 as _;
@@ -3095,40 +2304,25 @@ pub const FALSE: BOOL = 0i32;
 pub type FARPROC = Option<unsafe extern "system" fn() -> isize>;
 pub const FAST_FAIL_FATAL_APP_EXIT: u32 = 7u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FD_SET {
     pub fd_count: u32,
     pub fd_array: [SOCKET; 64],
 }
-impl Copy for FD_SET {}
-impl Clone for FD_SET {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILETIME {
     pub dwLowDateTime: u32,
     pub dwHighDateTime: u32,
 }
-impl Copy for FILETIME {}
-impl Clone for FILETIME {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type FILE_ACCESS_RIGHTS = u32;
 pub const FILE_ADD_FILE: FILE_ACCESS_RIGHTS = 2u32;
 pub const FILE_ADD_SUBDIRECTORY: FILE_ACCESS_RIGHTS = 4u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_ALLOCATION_INFO {
     pub AllocationSize: i64,
 }
-impl Copy for FILE_ALLOCATION_INFO {}
-impl Clone for FILE_ALLOCATION_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const FILE_ALL_ACCESS: FILE_ACCESS_RIGHTS = 2032127u32;
 pub const FILE_APPEND_DATA: FILE_ACCESS_RIGHTS = 4u32;
 pub const FILE_ATTRIBUTE_ARCHIVE: FILE_FLAGS_AND_ATTRIBUTES = 32u32;
@@ -3151,20 +2345,16 @@ pub const FILE_ATTRIBUTE_REPARSE_POINT: FILE_FLAGS_AND_ATTRIBUTES = 1024u32;
 pub const FILE_ATTRIBUTE_SPARSE_FILE: FILE_FLAGS_AND_ATTRIBUTES = 512u32;
 pub const FILE_ATTRIBUTE_SYSTEM: FILE_FLAGS_AND_ATTRIBUTES = 4u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_ATTRIBUTE_TAG_INFO {
     pub FileAttributes: u32,
     pub ReparseTag: u32,
 }
-impl Copy for FILE_ATTRIBUTE_TAG_INFO {}
-impl Clone for FILE_ATTRIBUTE_TAG_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const FILE_ATTRIBUTE_TEMPORARY: FILE_FLAGS_AND_ATTRIBUTES = 256u32;
 pub const FILE_ATTRIBUTE_UNPINNED: FILE_FLAGS_AND_ATTRIBUTES = 1048576u32;
 pub const FILE_ATTRIBUTE_VIRTUAL: FILE_FLAGS_AND_ATTRIBUTES = 65536u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_BASIC_INFO {
     pub CreationTime: i64,
     pub LastAccessTime: i64,
@@ -3172,12 +2362,6 @@ pub struct FILE_BASIC_INFO {
     pub ChangeTime: i64,
     pub FileAttributes: u32,
 }
-impl Copy for FILE_BASIC_INFO {}
-impl Clone for FILE_BASIC_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const FILE_BEGIN: SET_FILE_POINTER_MOVE_METHOD = 0u32;
 pub const FILE_COMPLETE_IF_OPLOCKED: NTCREATEFILE_CREATE_OPTIONS = 256u32;
 pub const FILE_CONTAINS_EXTENDED_CREATE_INFORMATION: NTCREATEFILE_CREATE_OPTIONS = 268435456u32;
@@ -3197,37 +2381,22 @@ pub const FILE_DISPOSITION_FLAG_IGNORE_READONLY_ATTRIBUTE: FILE_DISPOSITION_INFO
 pub const FILE_DISPOSITION_FLAG_ON_CLOSE: FILE_DISPOSITION_INFO_EX_FLAGS = 8u32;
 pub const FILE_DISPOSITION_FLAG_POSIX_SEMANTICS: FILE_DISPOSITION_INFO_EX_FLAGS = 2u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_DISPOSITION_INFO {
     pub DeleteFile: BOOLEAN,
 }
-impl Copy for FILE_DISPOSITION_INFO {}
-impl Clone for FILE_DISPOSITION_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_DISPOSITION_INFO_EX {
     pub Flags: FILE_DISPOSITION_INFO_EX_FLAGS,
 }
-impl Copy for FILE_DISPOSITION_INFO_EX {}
-impl Clone for FILE_DISPOSITION_INFO_EX {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type FILE_DISPOSITION_INFO_EX_FLAGS = u32;
 pub const FILE_END: SET_FILE_POINTER_MOVE_METHOD = 2u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_END_OF_FILE_INFO {
     pub EndOfFile: i64,
 }
-impl Copy for FILE_END_OF_FILE_INFO {}
-impl Clone for FILE_END_OF_FILE_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const FILE_EXECUTE: FILE_ACCESS_RIGHTS = 32u32;
 pub type FILE_FLAGS_AND_ATTRIBUTES = u32;
 pub const FILE_FLAG_BACKUP_SEMANTICS: FILE_FLAGS_AND_ATTRIBUTES = 33554432u32;
@@ -3246,6 +2415,7 @@ pub const FILE_GENERIC_EXECUTE: FILE_ACCESS_RIGHTS = 1179808u32;
 pub const FILE_GENERIC_READ: FILE_ACCESS_RIGHTS = 1179785u32;
 pub const FILE_GENERIC_WRITE: FILE_ACCESS_RIGHTS = 1179926u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_ID_BOTH_DIR_INFO {
     pub NextEntryOffset: u32,
     pub FileIndex: u32,
@@ -3263,23 +2433,12 @@ pub struct FILE_ID_BOTH_DIR_INFO {
     pub FileId: i64,
     pub FileName: [u16; 1],
 }
-impl Copy for FILE_ID_BOTH_DIR_INFO {}
-impl Clone for FILE_ID_BOTH_DIR_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type FILE_INFO_BY_HANDLE_CLASS = i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_IO_PRIORITY_HINT_INFO {
     pub PriorityHint: PRIORITY_HINT,
 }
-impl Copy for FILE_IO_PRIORITY_HINT_INFO {}
-impl Clone for FILE_IO_PRIORITY_HINT_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const FILE_LIST_DIRECTORY: FILE_ACCESS_RIGHTS = 1u32;
 pub const FILE_NAME_NORMALIZED: GETFINALPATHNAMEBYHANDLE_FLAGS = 0u32;
 pub const FILE_NAME_OPENED: GETFINALPATHNAMEBYHANDLE_FLAGS = 8u32;
@@ -3310,6 +2469,7 @@ pub const FILE_SHARE_NONE: FILE_SHARE_MODE = 0u32;
 pub const FILE_SHARE_READ: FILE_SHARE_MODE = 1u32;
 pub const FILE_SHARE_WRITE: FILE_SHARE_MODE = 2u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct FILE_STANDARD_INFO {
     pub AllocationSize: i64,
     pub EndOfFile: i64,
@@ -3317,12 +2477,6 @@ pub struct FILE_STANDARD_INFO {
     pub DeletePending: BOOLEAN,
     pub Directory: BOOLEAN,
 }
-impl Copy for FILE_STANDARD_INFO {}
-impl Clone for FILE_STANDARD_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const FILE_SUPERSEDE: NTCREATEFILE_CREATE_DISPOSITION = 0u32;
 pub const FILE_SYNCHRONOUS_IO_ALERT: NTCREATEFILE_CREATE_OPTIONS = 16u32;
 pub const FILE_SYNCHRONOUS_IO_NONALERT: NTCREATEFILE_CREATE_OPTIONS = 32u32;
@@ -3340,6 +2494,7 @@ pub const FILE_WRITE_THROUGH: NTCREATEFILE_CREATE_OPTIONS = 2u32;
 pub const FIONBIO: i32 = -2147195266i32;
 #[repr(C)]
 #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
+#[derive(Clone, Copy)]
 pub struct FLOATING_SAVE_AREA {
     pub ControlWord: u32,
     pub StatusWord: u32,
@@ -3351,16 +2506,9 @@ pub struct FLOATING_SAVE_AREA {
     pub RegisterArea: [u8; 80],
     pub Cr0NpxState: u32,
 }
-#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Copy for FLOATING_SAVE_AREA {}
-#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Clone for FLOATING_SAVE_AREA {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(target_arch = "x86")]
+#[derive(Clone, Copy)]
 pub struct FLOATING_SAVE_AREA {
     pub ControlWord: u32,
     pub StatusWord: u32,
@@ -3372,14 +2520,6 @@ pub struct FLOATING_SAVE_AREA {
     pub RegisterArea: [u8; 80],
     pub Spare0: u32,
 }
-#[cfg(target_arch = "x86")]
-impl Copy for FLOATING_SAVE_AREA {}
-#[cfg(target_arch = "x86")]
-impl Clone for FLOATING_SAVE_AREA {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const FORMAT_MESSAGE_ALLOCATE_BUFFER: FORMAT_MESSAGE_OPTIONS = 256u32;
 pub const FORMAT_MESSAGE_ARGUMENT_ARRAY: FORMAT_MESSAGE_OPTIONS = 8192u32;
 pub const FORMAT_MESSAGE_FROM_HMODULE: FORMAT_MESSAGE_OPTIONS = 2048u32;
@@ -3422,18 +2562,13 @@ pub const GENERIC_READ: GENERIC_ACCESS_RIGHTS = 2147483648u32;
 pub const GENERIC_WRITE: GENERIC_ACCESS_RIGHTS = 1073741824u32;
 pub type GETFINALPATHNAMEBYHANDLE_FLAGS = u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct GUID {
     pub data1: u32,
     pub data2: u16,
     pub data3: u16,
     pub data4: [u8; 8],
 }
-impl Copy for GUID {}
-impl Clone for GUID {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 impl GUID {
     pub const fn from_u128(uuid: u128) -> Self {
         Self {
@@ -3454,112 +2589,67 @@ pub type HMODULE = *mut core::ffi::c_void;
 pub type HRESULT = i32;
 pub const IDLE_PRIORITY_CLASS: PROCESS_CREATION_FLAGS = 64u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct IN6_ADDR {
     pub u: IN6_ADDR_0,
 }
-impl Copy for IN6_ADDR {}
-impl Clone for IN6_ADDR {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub union IN6_ADDR_0 {
     pub Byte: [u8; 16],
     pub Word: [u16; 8],
 }
-impl Copy for IN6_ADDR_0 {}
-impl Clone for IN6_ADDR_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const INFINITE: u32 = 4294967295u32;
 pub const INHERIT_CALLER_PRIORITY: PROCESS_CREATION_FLAGS = 131072u32;
 pub const INHERIT_PARENT_AFFINITY: PROCESS_CREATION_FLAGS = 65536u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub union INIT_ONCE {
     pub Ptr: *mut core::ffi::c_void,
 }
-impl Copy for INIT_ONCE {}
-impl Clone for INIT_ONCE {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const INIT_ONCE_INIT_FAILED: u32 = 4u32;
 pub const INVALID_FILE_ATTRIBUTES: u32 = 4294967295u32;
 pub const INVALID_SOCKET: SOCKET = -1i32 as _;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct IN_ADDR {
     pub S_un: IN_ADDR_0,
 }
-impl Copy for IN_ADDR {}
-impl Clone for IN_ADDR {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub union IN_ADDR_0 {
     pub S_un_b: IN_ADDR_0_0,
     pub S_un_w: IN_ADDR_0_1,
     pub S_addr: u32,
 }
-impl Copy for IN_ADDR_0 {}
-impl Clone for IN_ADDR_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct IN_ADDR_0_0 {
     pub s_b1: u8,
     pub s_b2: u8,
     pub s_b3: u8,
     pub s_b4: u8,
 }
-impl Copy for IN_ADDR_0_0 {}
-impl Clone for IN_ADDR_0_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct IN_ADDR_0_1 {
     pub s_w1: u16,
     pub s_w2: u16,
 }
-impl Copy for IN_ADDR_0_1 {}
-impl Clone for IN_ADDR_0_1 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const IO_REPARSE_TAG_MOUNT_POINT: u32 = 2684354563u32;
 pub const IO_REPARSE_TAG_SYMLINK: u32 = 2684354572u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct IO_STATUS_BLOCK {
     pub Anonymous: IO_STATUS_BLOCK_0,
     pub Information: usize,
 }
-impl Copy for IO_STATUS_BLOCK {}
-impl Clone for IO_STATUS_BLOCK {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub union IO_STATUS_BLOCK_0 {
     pub Status: NTSTATUS,
     pub Pointer: *mut core::ffi::c_void,
 }
-impl Copy for IO_STATUS_BLOCK_0 {}
-impl Clone for IO_STATUS_BLOCK_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type IPPROTO = i32;
 pub const IPPROTO_AH: IPPROTO = 51i32;
 pub const IPPROTO_CBT: IPPROTO = 7i32;
@@ -3601,45 +2691,30 @@ pub const IPPROTO_UDP: IPPROTO = 17i32;
 pub const IPV6_ADD_MEMBERSHIP: i32 = 12i32;
 pub const IPV6_DROP_MEMBERSHIP: i32 = 13i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct IPV6_MREQ {
     pub ipv6mr_multiaddr: IN6_ADDR,
     pub ipv6mr_interface: u32,
 }
-impl Copy for IPV6_MREQ {}
-impl Clone for IPV6_MREQ {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const IPV6_MULTICAST_LOOP: i32 = 11i32;
 pub const IPV6_V6ONLY: i32 = 27i32;
 pub const IP_ADD_MEMBERSHIP: i32 = 12i32;
 pub const IP_DROP_MEMBERSHIP: i32 = 13i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct IP_MREQ {
     pub imr_multiaddr: IN_ADDR,
     pub imr_interface: IN_ADDR,
 }
-impl Copy for IP_MREQ {}
-impl Clone for IP_MREQ {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const IP_MULTICAST_LOOP: i32 = 11i32;
 pub const IP_MULTICAST_TTL: i32 = 10i32;
 pub const IP_TTL: i32 = 4i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct LINGER {
     pub l_onoff: u16,
     pub l_linger: u16,
 }
-impl Copy for LINGER {}
-impl Clone for LINGER {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type LPOVERLAPPED_COMPLETION_ROUTINE = Option<
     unsafe extern "system" fn(
         dwerrorcode: u32,
@@ -3673,16 +2748,11 @@ pub type LPWSAOVERLAPPED_COMPLETION_ROUTINE = Option<
     ),
 >;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct M128A {
     pub Low: u64,
     pub High: i64,
 }
-impl Copy for M128A {}
-impl Clone for M128A {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const MAXIMUM_REPARSE_DATA_BUFFER_SIZE: u32 = 16384u32;
 pub const MAX_PATH: u32 = 260u32;
 pub const MB_COMPOSITE: MULTI_BYTE_TO_WIDE_CHAR_FLAGS = 2u32;
@@ -3710,6 +2780,7 @@ pub type NTCREATEFILE_CREATE_DISPOSITION = u32;
 pub type NTCREATEFILE_CREATE_OPTIONS = u32;
 pub type NTSTATUS = i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct OBJECT_ATTRIBUTES {
     pub Length: u32,
     pub RootDirectory: HANDLE,
@@ -3718,50 +2789,29 @@ pub struct OBJECT_ATTRIBUTES {
     pub SecurityDescriptor: *const core::ffi::c_void,
     pub SecurityQualityOfService: *const core::ffi::c_void,
 }
-impl Copy for OBJECT_ATTRIBUTES {}
-impl Clone for OBJECT_ATTRIBUTES {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const OBJ_DONT_REPARSE: i32 = 4096i32;
 pub const OPEN_ALWAYS: FILE_CREATION_DISPOSITION = 4u32;
 pub const OPEN_EXISTING: FILE_CREATION_DISPOSITION = 3u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct OVERLAPPED {
     pub Internal: usize,
     pub InternalHigh: usize,
     pub Anonymous: OVERLAPPED_0,
     pub hEvent: HANDLE,
 }
-impl Copy for OVERLAPPED {}
-impl Clone for OVERLAPPED {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub union OVERLAPPED_0 {
     pub Anonymous: OVERLAPPED_0_0,
     pub Pointer: *mut core::ffi::c_void,
 }
-impl Copy for OVERLAPPED_0 {}
-impl Clone for OVERLAPPED_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct OVERLAPPED_0_0 {
     pub Offset: u32,
     pub OffsetHigh: u32,
 }
-impl Copy for OVERLAPPED_0_0 {}
-impl Clone for OVERLAPPED_0_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type PCSTR = *const u8;
 pub type PCWSTR = *const u16;
 pub type PIO_APC_ROUTINE = Option<
@@ -3788,18 +2838,13 @@ pub type PRIORITY_HINT = i32;
 pub type PROCESSOR_ARCHITECTURE = u16;
 pub type PROCESS_CREATION_FLAGS = u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct PROCESS_INFORMATION {
     pub hProcess: HANDLE,
     pub hThread: HANDLE,
     pub dwProcessId: u32,
     pub dwThreadId: u32,
 }
-impl Copy for PROCESS_INFORMATION {}
-impl Clone for PROCESS_INFORMATION {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const PROCESS_MODE_BACKGROUND_BEGIN: PROCESS_CREATION_FLAGS = 1048576u32;
 pub const PROCESS_MODE_BACKGROUND_END: PROCESS_CREATION_FLAGS = 2097152u32;
 pub const PROFILE_KERNEL: PROCESS_CREATION_FLAGS = 536870912u32;
@@ -3822,17 +2867,12 @@ pub const SD_RECEIVE: WINSOCK_SHUTDOWN_HOW = 0i32;
 pub const SD_SEND: WINSOCK_SHUTDOWN_HOW = 1i32;
 pub const SECURITY_ANONYMOUS: FILE_FLAGS_AND_ATTRIBUTES = 0u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct SECURITY_ATTRIBUTES {
     pub nLength: u32,
     pub lpSecurityDescriptor: *mut core::ffi::c_void,
     pub bInheritHandle: BOOL,
 }
-impl Copy for SECURITY_ATTRIBUTES {}
-impl Clone for SECURITY_ATTRIBUTES {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const SECURITY_CONTEXT_TRACKING: FILE_FLAGS_AND_ATTRIBUTES = 262144u32;
 pub const SECURITY_DELEGATION: FILE_FLAGS_AND_ATTRIBUTES = 196608u32;
 pub const SECURITY_EFFECTIVE_ONLY: FILE_FLAGS_AND_ATTRIBUTES = 524288u32;
@@ -3843,27 +2883,17 @@ pub const SECURITY_VALID_SQOS_FLAGS: FILE_FLAGS_AND_ATTRIBUTES = 2031616u32;
 pub type SEND_RECV_FLAGS = i32;
 pub type SET_FILE_POINTER_MOVE_METHOD = u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct SOCKADDR {
     pub sa_family: ADDRESS_FAMILY,
     pub sa_data: [i8; 14],
 }
-impl Copy for SOCKADDR {}
-impl Clone for SOCKADDR {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct SOCKADDR_UN {
     pub sun_family: ADDRESS_FAMILY,
     pub sun_path: [i8; 108],
 }
-impl Copy for SOCKADDR_UN {}
-impl Clone for SOCKADDR_UN {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type SOCKET = usize;
 pub const SOCKET_ERROR: i32 = -1i32;
 pub const SOCK_DGRAM: WINSOCK_SOCKET_TYPE = 2i32;
@@ -3879,15 +2909,10 @@ pub const SO_RCVTIMEO: i32 = 4102i32;
 pub const SO_SNDTIMEO: i32 = 4101i32;
 pub const SPECIFIC_RIGHTS_ALL: FILE_ACCESS_RIGHTS = 65535u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct SRWLOCK {
     pub Ptr: *mut core::ffi::c_void,
 }
-impl Copy for SRWLOCK {}
-impl Clone for SRWLOCK {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const STACK_SIZE_PARAM_IS_A_RESERVATION: THREAD_CREATION_FLAGS = 65536u32;
 pub const STANDARD_RIGHTS_ALL: FILE_ACCESS_RIGHTS = 2031616u32;
 pub const STANDARD_RIGHTS_EXECUTE: FILE_ACCESS_RIGHTS = 131072u32;
@@ -3909,17 +2934,13 @@ pub const STARTF_USESHOWWINDOW: STARTUPINFOW_FLAGS = 1u32;
 pub const STARTF_USESIZE: STARTUPINFOW_FLAGS = 2u32;
 pub const STARTF_USESTDHANDLES: STARTUPINFOW_FLAGS = 256u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct STARTUPINFOEXW {
     pub StartupInfo: STARTUPINFOW,
     pub lpAttributeList: LPPROC_THREAD_ATTRIBUTE_LIST,
 }
-impl Copy for STARTUPINFOEXW {}
-impl Clone for STARTUPINFOEXW {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct STARTUPINFOW {
     pub cb: u32,
     pub lpReserved: PWSTR,
@@ -3940,12 +2961,6 @@ pub struct STARTUPINFOW {
     pub hStdOutput: HANDLE,
     pub hStdError: HANDLE,
 }
-impl Copy for STARTUPINFOW {}
-impl Clone for STARTUPINFOW {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type STARTUPINFOW_FLAGS = u32;
 pub const STATUS_DELETE_PENDING: NTSTATUS = 0xC0000056_u32 as _;
 pub const STATUS_END_OF_FILE: NTSTATUS = 0xC0000011_u32 as _;
@@ -3964,6 +2979,7 @@ pub const SYMLINK_FLAG_RELATIVE: u32 = 1u32;
 pub type SYNCHRONIZATION_ACCESS_RIGHTS = u32;
 pub const SYNCHRONIZE: FILE_ACCESS_RIGHTS = 1048576u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct SYSTEM_INFO {
     pub Anonymous: SYSTEM_INFO_0,
     pub dwPageSize: u32,
@@ -3976,34 +2992,18 @@ pub struct SYSTEM_INFO {
     pub wProcessorLevel: u16,
     pub wProcessorRevision: u16,
 }
-impl Copy for SYSTEM_INFO {}
-impl Clone for SYSTEM_INFO {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub union SYSTEM_INFO_0 {
     pub dwOemId: u32,
     pub Anonymous: SYSTEM_INFO_0_0,
 }
-impl Copy for SYSTEM_INFO_0 {}
-impl Clone for SYSTEM_INFO_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct SYSTEM_INFO_0_0 {
     pub wProcessorArchitecture: PROCESSOR_ARCHITECTURE,
     pub wReserved: u16,
 }
-impl Copy for SYSTEM_INFO_0_0 {}
-impl Clone for SYSTEM_INFO_0_0 {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const TCP_NODELAY: i32 = 1i32;
 pub const THREAD_CREATE_RUN_IMMEDIATELY: THREAD_CREATION_FLAGS = 0u32;
 pub const THREAD_CREATE_SUSPENDED: THREAD_CREATION_FLAGS = 4u32;
@@ -4011,16 +3011,11 @@ pub type THREAD_CREATION_FLAGS = u32;
 pub const TIMER_ALL_ACCESS: SYNCHRONIZATION_ACCESS_RIGHTS = 2031619u32;
 pub const TIMER_MODIFY_STATE: SYNCHRONIZATION_ACCESS_RIGHTS = 2u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct TIMEVAL {
     pub tv_sec: i32,
     pub tv_usec: i32,
 }
-impl Copy for TIMEVAL {}
-impl Clone for TIMEVAL {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const TLS_OUT_OF_INDEXES: u32 = 4294967295u32;
 pub type TOKEN_ACCESS_MASK = u32;
 pub const TOKEN_ACCESS_PSEUDO_HANDLE: TOKEN_ACCESS_MASK = 24u32;
@@ -4047,17 +3042,12 @@ pub const TOKEN_WRITE_OWNER: TOKEN_ACCESS_MASK = 524288u32;
 pub const TRUE: BOOL = 1i32;
 pub const TRUNCATE_EXISTING: FILE_CREATION_DISPOSITION = 5u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct UNICODE_STRING {
     pub Length: u16,
     pub MaximumLength: u16,
     pub Buffer: PWSTR,
 }
-impl Copy for UNICODE_STRING {}
-impl Clone for UNICODE_STRING {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const VOLUME_NAME_DOS: GETFINALPATHNAMEBYHANDLE_FLAGS = 0u32;
 pub const VOLUME_NAME_GUID: GETFINALPATHNAMEBYHANDLE_FLAGS = 1u32;
 pub const VOLUME_NAME_NONE: GETFINALPATHNAMEBYHANDLE_FLAGS = 4u32;
@@ -4071,6 +3061,7 @@ pub const WAIT_TIMEOUT: WAIT_EVENT = 258u32;
 pub const WC_ERR_INVALID_CHARS: u32 = 128u32;
 pub type WIN32_ERROR = u32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct WIN32_FIND_DATAW {
     pub dwFileAttributes: u32,
     pub ftCreationTime: FILETIME,
@@ -4083,30 +3074,20 @@ pub struct WIN32_FIND_DATAW {
     pub cFileName: [u16; 260],
     pub cAlternateFileName: [u16; 14],
 }
-impl Copy for WIN32_FIND_DATAW {}
-impl Clone for WIN32_FIND_DATAW {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub type WINSOCK_SHUTDOWN_HOW = i32;
 pub type WINSOCK_SOCKET_TYPE = i32;
 pub const WRITE_DAC: FILE_ACCESS_RIGHTS = 262144u32;
 pub const WRITE_OWNER: FILE_ACCESS_RIGHTS = 524288u32;
 pub const WSABASEERR: WSA_ERROR = 10000i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct WSABUF {
     pub len: u32,
     pub buf: PSTR,
 }
-impl Copy for WSABUF {}
-impl Clone for WSABUF {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
+#[derive(Clone, Copy)]
 pub struct WSADATA {
     pub wVersion: u16,
     pub wHighVersion: u16,
@@ -4116,16 +3097,9 @@ pub struct WSADATA {
     pub szDescription: [i8; 257],
     pub szSystemStatus: [i8; 129],
 }
-#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Copy for WSADATA {}
-#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Clone for WSADATA {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(target_arch = "x86")]
+#[derive(Clone, Copy)]
 pub struct WSADATA {
     pub wVersion: u16,
     pub wHighVersion: u16,
@@ -4135,14 +3109,6 @@ pub struct WSADATA {
     pub iMaxUdpDg: u16,
     pub lpVendorInfo: PSTR,
 }
-#[cfg(target_arch = "x86")]
-impl Copy for WSADATA {}
-#[cfg(target_arch = "x86")]
-impl Clone for WSADATA {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const WSAEACCES: WSA_ERROR = 10013i32;
 pub const WSAEADDRINUSE: WSA_ERROR = 10048i32;
 pub const WSAEADDRNOTAVAIL: WSA_ERROR = 10049i32;
@@ -4198,17 +3164,13 @@ pub const WSANOTINITIALISED: WSA_ERROR = 10093i32;
 pub const WSANO_DATA: WSA_ERROR = 11004i32;
 pub const WSANO_RECOVERY: WSA_ERROR = 11003i32;
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct WSAPROTOCOLCHAIN {
     pub ChainLen: i32,
     pub ChainEntries: [u32; 7],
 }
-impl Copy for WSAPROTOCOLCHAIN {}
-impl Clone for WSAPROTOCOLCHAIN {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct WSAPROTOCOL_INFOW {
     pub dwServiceFlags1: u32,
     pub dwServiceFlags2: u32,
@@ -4231,12 +3193,6 @@ pub struct WSAPROTOCOL_INFOW {
     pub dwProviderReserved: u32,
     pub szProtocol: [u16; 256],
 }
-impl Copy for WSAPROTOCOL_INFOW {}
-impl Clone for WSAPROTOCOL_INFOW {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 pub const WSASERVICE_NOT_FOUND: WSA_ERROR = 10108i32;
 pub const WSASYSCALLFAILURE: WSA_ERROR = 10107i32;
 pub const WSASYSNOTREADY: WSA_ERROR = 10091i32;
@@ -4287,6 +3243,7 @@ pub const WSA_WAIT_EVENT_0: WSA_ERROR = 0i32;
 pub const WSA_WAIT_IO_COMPLETION: WSA_ERROR = 192i32;
 #[repr(C)]
 #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
+#[derive(Clone, Copy)]
 pub struct XSAVE_FORMAT {
     pub ControlWord: u16,
     pub StatusWord: u16,
@@ -4305,16 +3262,9 @@ pub struct XSAVE_FORMAT {
     pub XmmRegisters: [M128A; 16],
     pub Reserved4: [u8; 96],
 }
-#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Copy for XSAVE_FORMAT {}
-#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "x86_64"))]
-impl Clone for XSAVE_FORMAT {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 #[repr(C)]
 #[cfg(target_arch = "x86")]
+#[derive(Clone, Copy)]
 pub struct XSAVE_FORMAT {
     pub ControlWord: u16,
     pub StatusWord: u16,
@@ -4333,12 +3283,5 @@ pub struct XSAVE_FORMAT {
     pub XmmRegisters: [M128A; 8],
     pub Reserved4: [u8; 224],
 }
-#[cfg(target_arch = "x86")]
-impl Copy for XSAVE_FORMAT {}
-#[cfg(target_arch = "x86")]
-impl Clone for XSAVE_FORMAT {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
 // ignore-tidy-filelength
+use super::windows_targets;
diff --git a/std/src/sys/pal/windows/c/windows_targets.rs b/std/src/sys/pal/windows/c/windows_targets.rs
new file mode 100644
index 0000000000000..252bceb70942b
--- /dev/null
+++ b/std/src/sys/pal/windows/c/windows_targets.rs
@@ -0,0 +1,37 @@
+//! Provides the `link!` macro used by the generated windows bindings.
+//!
+//! This is a simple wrapper around an `extern` block with a `#[link]` attribute.
+//! It's very roughly equivalent to the windows-targets crate.
+
+#[cfg(feature = "windows_raw_dylib")]
+pub macro link {
+    ($library:literal $abi:literal $($link_name:literal)? $(#[$doc:meta])? fn $($function:tt)*) => (
+        #[cfg_attr(not(target_arch = "x86"), link(name = $library, kind = "raw-dylib", modifiers = "+verbatim"))]
+        #[cfg_attr(target_arch = "x86", link(name = $library, kind = "raw-dylib", modifiers = "+verbatim", import_name_type = "undecorated"))]
+        extern $abi {
+            $(#[link_name=$link_name])?
+            pub fn $($function)*;
+        }
+    )
+}
+#[cfg(not(feature = "windows_raw_dylib"))]
+pub macro link {
+    ($library:literal $abi:literal $($link_name:literal)? $(#[$doc:meta])? fn $($function:tt)*) => (
+        // Note: the windows-targets crate uses a pre-built Windows.lib import library which we don't
+        // have in this repo. So instead we always link kernel32.lib and add the rest of the import
+        // libraries below by using an empty extern block. This works because extern blocks are not
+        // connected to the library given in the #[link] attribute.
+        #[link(name = "kernel32")]
+        extern $abi {
+            $(#[link_name=$link_name])?
+            pub fn $($function)*;
+        }
+    )
+}
+
+#[cfg(not(feature = "windows_raw_dylib"))]
+#[link(name = "advapi32")]
+#[link(name = "ntdll")]
+#[link(name = "userenv")]
+#[link(name = "ws2_32")]
+extern "C" {}
diff --git a/std/src/sys/pal/windows/compat.rs b/std/src/sys/pal/windows/compat.rs
index f5d57a28db69a..49fa1603f3e1e 100644
--- a/std/src/sys/pal/windows/compat.rs
+++ b/std/src/sys/pal/windows/compat.rs
@@ -112,8 +112,10 @@ impl Module {
     /// (e.g. kernel32 and ntdll).
     pub unsafe fn new(name: &CStr) -> Option<Self> {
         // SAFETY: A CStr is always null terminated.
-        let module = c::GetModuleHandleA(name.as_ptr().cast::<u8>());
-        NonNull::new(module).map(Self)
+        unsafe {
+            let module = c::GetModuleHandleA(name.as_ptr().cast::<u8>());
+            NonNull::new(module).map(Self)
+        }
     }
 
     // Try to get the address of a function.
diff --git a/std/src/sys/pal/windows/fs.rs b/std/src/sys/pal/windows/fs.rs
index e92c5e80eac9c..48c39392047f0 100644
--- a/std/src/sys/pal/windows/fs.rs
+++ b/std/src/sys/pal/windows/fs.rs
@@ -18,7 +18,8 @@ use crate::sys::{c, cvt, Align8};
 use crate::sys_common::{AsInner, FromInner, IntoInner};
 use crate::thread;
 
-use super::{api, to_u16s, IoResult};
+use super::api::{self, WinError};
+use super::{to_u16s, IoResult};
 use crate::sys::path::maybe_verbatim;
 
 pub struct File {
@@ -27,12 +28,12 @@ pub struct File {
 
 #[derive(Clone)]
 pub struct FileAttr {
-    attributes: c::DWORD,
+    attributes: u32,
     creation_time: c::FILETIME,
     last_access_time: c::FILETIME,
     last_write_time: c::FILETIME,
     file_size: u64,
-    reparse_tag: c::DWORD,
+    reparse_tag: u32,
     volume_serial_number: Option<u32>,
     number_of_links: Option<u32>,
     file_index: Option<u64>,
@@ -40,8 +41,8 @@ pub struct FileAttr {
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub struct FileType {
-    attributes: c::DWORD,
-    reparse_tag: c::DWORD,
+    attributes: u32,
+    reparse_tag: u32,
 }
 
 pub struct ReadDir {
@@ -74,16 +75,16 @@ pub struct OpenOptions {
     create_new: bool,
     // system-specific
     custom_flags: u32,
-    access_mode: Option<c::DWORD>,
-    attributes: c::DWORD,
-    share_mode: c::DWORD,
-    security_qos_flags: c::DWORD,
-    security_attributes: c::LPSECURITY_ATTRIBUTES,
+    access_mode: Option<u32>,
+    attributes: u32,
+    share_mode: u32,
+    security_qos_flags: u32,
+    security_attributes: *mut c::SECURITY_ATTRIBUTES,
 }
 
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct FilePermissions {
-    attrs: c::DWORD,
+    attrs: u32,
 }
 
 #[derive(Copy, Clone, Debug, Default)]
@@ -130,10 +131,11 @@ impl Iterator for ReadDir {
             let mut wfd = mem::zeroed();
             loop {
                 if c::FindNextFileW(self.handle.0, &mut wfd) == 0 {
-                    if api::get_last_error().code == c::ERROR_NO_MORE_FILES {
-                        return None;
-                    } else {
-                        return Some(Err(Error::last_os_error()));
+                    match api::get_last_error() {
+                        WinError::NO_MORE_FILES => return None,
+                        WinError { code } => {
+                            return Some(Err(Error::from_raw_os_error(code as i32)));
+                        }
                     }
                 }
                 if let Some(e) = DirEntry::new(&self.root, &wfd) {
@@ -239,13 +241,11 @@ impl OpenOptions {
         // receive is `SECURITY_ANONYMOUS = 0x0`, which we can't check for later on.
         self.security_qos_flags = flags | c::SECURITY_SQOS_PRESENT;
     }
-    pub fn security_attributes(&mut self, attrs: c::LPSECURITY_ATTRIBUTES) {
+    pub fn security_attributes(&mut self, attrs: *mut c::SECURITY_ATTRIBUTES) {
         self.security_attributes = attrs;
     }
 
-    fn get_access_mode(&self) -> io::Result<c::DWORD> {
-        const ERROR_INVALID_PARAMETER: i32 = 87;
-
+    fn get_access_mode(&self) -> io::Result<u32> {
         match (self.read, self.write, self.append, self.access_mode) {
             (.., Some(mode)) => Ok(mode),
             (true, false, false, None) => Ok(c::GENERIC_READ),
@@ -255,23 +255,23 @@ impl OpenOptions {
             (true, _, true, None) => {
                 Ok(c::GENERIC_READ | (c::FILE_GENERIC_WRITE & !c::FILE_WRITE_DATA))
             }
-            (false, false, false, None) => Err(Error::from_raw_os_error(ERROR_INVALID_PARAMETER)),
+            (false, false, false, None) => {
+                Err(Error::from_raw_os_error(c::ERROR_INVALID_PARAMETER as i32))
+            }
         }
     }
 
-    fn get_creation_mode(&self) -> io::Result<c::DWORD> {
-        const ERROR_INVALID_PARAMETER: i32 = 87;
-
+    fn get_creation_mode(&self) -> io::Result<u32> {
         match (self.write, self.append) {
             (true, false) => {}
             (false, false) => {
                 if self.truncate || self.create || self.create_new {
-                    return Err(Error::from_raw_os_error(ERROR_INVALID_PARAMETER));
+                    return Err(Error::from_raw_os_error(c::ERROR_INVALID_PARAMETER as i32));
                 }
             }
             (_, true) => {
                 if self.truncate && !self.create_new {
-                    return Err(Error::from_raw_os_error(ERROR_INVALID_PARAMETER));
+                    return Err(Error::from_raw_os_error(c::ERROR_INVALID_PARAMETER as i32));
                 }
             }
         }
@@ -287,7 +287,7 @@ impl OpenOptions {
         })
     }
 
-    fn get_flags_and_attributes(&self) -> c::DWORD {
+    fn get_flags_and_attributes(&self) -> u32 {
         self.custom_flags
             | self.attributes
             | self.security_qos_flags
@@ -315,7 +315,7 @@ impl File {
             // Manual truncation. See #115745.
             if opts.truncate
                 && creation == c::OPEN_ALWAYS
-                && unsafe { c::GetLastError() } == c::ERROR_ALREADY_EXISTS
+                && api::get_last_error() == WinError::ALREADY_EXISTS
             {
                 unsafe {
                     // This originally used `FileAllocationInfo` instead of
@@ -397,21 +397,21 @@ impl File {
                 self.handle.as_raw_handle(),
                 c::FileBasicInfo,
                 core::ptr::addr_of_mut!(info) as *mut c_void,
-                size as c::DWORD,
+                size as u32,
             ))?;
             let mut attr = FileAttr {
                 attributes: info.FileAttributes,
                 creation_time: c::FILETIME {
-                    dwLowDateTime: info.CreationTime as c::DWORD,
-                    dwHighDateTime: (info.CreationTime >> 32) as c::DWORD,
+                    dwLowDateTime: info.CreationTime as u32,
+                    dwHighDateTime: (info.CreationTime >> 32) as u32,
                 },
                 last_access_time: c::FILETIME {
-                    dwLowDateTime: info.LastAccessTime as c::DWORD,
-                    dwHighDateTime: (info.LastAccessTime >> 32) as c::DWORD,
+                    dwLowDateTime: info.LastAccessTime as u32,
+                    dwHighDateTime: (info.LastAccessTime >> 32) as u32,
                 },
                 last_write_time: c::FILETIME {
-                    dwLowDateTime: info.LastWriteTime as c::DWORD,
-                    dwHighDateTime: (info.LastWriteTime >> 32) as c::DWORD,
+                    dwLowDateTime: info.LastWriteTime as u32,
+                    dwHighDateTime: (info.LastWriteTime >> 32) as u32,
                 },
                 file_size: 0,
                 reparse_tag: 0,
@@ -425,7 +425,7 @@ impl File {
                 self.handle.as_raw_handle(),
                 c::FileStandardInfo,
                 core::ptr::addr_of_mut!(info) as *mut c_void,
-                size as c::DWORD,
+                size as u32,
             ))?;
             attr.file_size = info.AllocationSize as u64;
             attr.number_of_links = Some(info.NumberOfLinks);
@@ -495,7 +495,7 @@ impl File {
             SeekFrom::End(n) => (c::FILE_END, n),
             SeekFrom::Current(n) => (c::FILE_CURRENT, n),
         };
-        let pos = pos as c::LARGE_INTEGER;
+        let pos = pos as i64;
         let mut newpos = 0;
         cvt(unsafe { c::SetFilePointerEx(self.handle.as_raw_handle(), pos, &mut newpos, whence) })?;
         Ok(newpos as u64)
@@ -511,7 +511,7 @@ impl File {
     fn reparse_point(
         &self,
         space: &mut Align8<[MaybeUninit<u8>]>,
-    ) -> io::Result<(c::DWORD, *mut c::REPARSE_DATA_BUFFER)> {
+    ) -> io::Result<(u32, *mut c::REPARSE_DATA_BUFFER)> {
         unsafe {
             let mut bytes = 0;
             cvt({
@@ -524,7 +524,7 @@ impl File {
                     ptr::null_mut(),
                     0,
                     space.0.as_mut_ptr().cast(),
-                    len as c::DWORD,
+                    len as u32,
                     &mut bytes,
                     ptr::null_mut(),
                 )
@@ -609,8 +609,7 @@ impl File {
                 "Cannot set file timestamp to 0",
             ));
         }
-        let is_max =
-            |t: c::FILETIME| t.dwLowDateTime == c::DWORD::MAX && t.dwHighDateTime == c::DWORD::MAX;
+        let is_max = |t: c::FILETIME| t.dwLowDateTime == u32::MAX && t.dwHighDateTime == u32::MAX;
         if times.accessed.map_or(false, is_max)
             || times.modified.map_or(false, is_max)
             || times.created.map_or(false, is_max)
@@ -641,7 +640,7 @@ impl File {
                 self.handle.as_raw_handle(),
                 c::FileBasicInfo,
                 core::ptr::addr_of_mut!(info) as *mut c_void,
-                size as c::DWORD,
+                size as u32,
             ))?;
             Ok(info)
         }
@@ -795,10 +794,12 @@ impl<'a> Iterator for DirBuffIter<'a> {
 }
 
 unsafe fn from_maybe_unaligned<'a>(p: *const u16, len: usize) -> Cow<'a, [u16]> {
-    if p.is_aligned() {
-        Cow::Borrowed(crate::slice::from_raw_parts(p, len))
-    } else {
-        Cow::Owned((0..len).map(|i| p.add(i).read_unaligned()).collect())
+    unsafe {
+        if p.is_aligned() {
+            Cow::Borrowed(crate::slice::from_raw_parts(p, len))
+        } else {
+            Cow::Owned((0..len).map(|i| p.add(i).read_unaligned()).collect())
+        }
     }
 }
 
@@ -845,7 +846,7 @@ fn open_link_no_reparse(parent: &File, name: &[u16], access: u32) -> io::Result<
             // We make a special exception for `STATUS_DELETE_PENDING` because
             // otherwise this will be mapped to `ERROR_ACCESS_DENIED` which is
             // very unhelpful.
-            Err(io::Error::from_raw_os_error(c::ERROR_DELETE_PENDING as _))
+            Err(io::Error::from_raw_os_error(c::ERROR_DELETE_PENDING as i32))
         } else if status == c::STATUS_INVALID_PARAMETER
             && ATTRIBUTES.load(Ordering::Relaxed) == c::OBJ_DONT_REPARSE
         {
@@ -897,7 +898,9 @@ impl IntoRawHandle for File {
 
 impl FromRawHandle for File {
     unsafe fn from_raw_handle(raw_handle: RawHandle) -> Self {
-        Self { handle: FromInner::from_inner(FromRawHandle::from_raw_handle(raw_handle)) }
+        unsafe {
+            Self { handle: FromInner::from_inner(FromRawHandle::from_raw_handle(raw_handle)) }
+        }
     }
 }
 
@@ -1020,7 +1023,7 @@ impl FileTimes {
 }
 
 impl FileType {
-    fn new(attrs: c::DWORD, reparse_tag: c::DWORD) -> FileType {
+    fn new(attrs: u32, reparse_tag: u32) -> FileType {
         FileType { attributes: attrs, reparse_tag }
     }
     pub fn is_dir(&self) -> bool {
@@ -1097,7 +1100,7 @@ pub fn readdir(p: &Path) -> io::Result<ReadDir> {
             //
             // See issue #120040: https://github.com/rust-lang/rust/issues/120040.
             let last_error = api::get_last_error();
-            if last_error.code == c::ERROR_FILE_NOT_FOUND {
+            if last_error == WinError::FILE_NOT_FOUND {
                 return Ok(ReadDir {
                     handle: FindNextFileHandle(find_handle),
                     root: Arc::new(root),
@@ -1417,20 +1420,22 @@ pub fn canonicalize(p: &Path) -> io::Result<PathBuf> {
 
 pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
     unsafe extern "system" fn callback(
-        _TotalFileSize: c::LARGE_INTEGER,
-        _TotalBytesTransferred: c::LARGE_INTEGER,
-        _StreamSize: c::LARGE_INTEGER,
-        StreamBytesTransferred: c::LARGE_INTEGER,
-        dwStreamNumber: c::DWORD,
-        _dwCallbackReason: c::DWORD,
+        _TotalFileSize: i64,
+        _TotalBytesTransferred: i64,
+        _StreamSize: i64,
+        StreamBytesTransferred: i64,
+        dwStreamNumber: u32,
+        _dwCallbackReason: u32,
         _hSourceFile: c::HANDLE,
         _hDestinationFile: c::HANDLE,
-        lpData: c::LPCVOID,
-    ) -> c::DWORD {
-        if dwStreamNumber == 1 {
-            *(lpData as *mut i64) = StreamBytesTransferred;
+        lpData: *const c_void,
+    ) -> u32 {
+        unsafe {
+            if dwStreamNumber == 1 {
+                *(lpData as *mut i64) = StreamBytesTransferred;
+            }
+            c::PROGRESS_CONTINUE
         }
-        c::PROGRESS_CONTINUE
     }
     let pfrom = maybe_verbatim(from)?;
     let pto = maybe_verbatim(to)?;
@@ -1531,7 +1536,7 @@ pub fn junction_point(original: &Path, link: &Path) -> io::Result<()> {
 }
 
 // Try to see if a file exists but, unlike `exists`, report I/O errors.
-pub fn try_exists(path: &Path) -> io::Result<bool> {
+pub fn exists(path: &Path) -> io::Result<bool> {
     // Open the file to ensure any symlinks are followed to their target.
     let mut opts = OpenOptions::new();
     // No read, write, etc access rights are needed.
diff --git a/std/src/sys/pal/windows/futex.rs b/std/src/sys/pal/windows/futex.rs
index bc19c402d9c12..c54810e06cdd6 100644
--- a/std/src/sys/pal/windows/futex.rs
+++ b/std/src/sys/pal/windows/futex.rs
@@ -1,4 +1,4 @@
-use super::api;
+use super::api::{self, WinError};
 use crate::sys::c;
 use crate::sys::dur2timeout;
 use core::ffi::c_void;
@@ -10,6 +10,12 @@ use core::sync::atomic::{
 };
 use core::time::Duration;
 
+/// An atomic for use as a futex that is at least 8-bits but may be larger.
+pub type SmallAtomic = AtomicU8;
+/// Must be the underlying type of SmallAtomic
+pub type SmallPrimitive = u8;
+
+pub unsafe trait Futex {}
 pub unsafe trait Waitable {
     type Atomic;
 }
@@ -19,6 +25,7 @@ macro_rules! unsafe_waitable_int {
             unsafe impl Waitable for $int {
                 type Atomic = $atomic;
             }
+            unsafe impl Futex for $atomic {}
         )*
     };
 }
@@ -41,6 +48,7 @@ unsafe impl<T> Waitable for *const T {
 unsafe impl<T> Waitable for *mut T {
     type Atomic = AtomicPtr<T>;
 }
+unsafe impl<T> Futex for AtomicPtr<T> {}
 
 pub fn wait_on_address<W: Waitable>(
     address: &W::Atomic,
@@ -56,14 +64,14 @@ pub fn wait_on_address<W: Waitable>(
     }
 }
 
-pub fn wake_by_address_single<T>(address: &T) {
+pub fn wake_by_address_single<T: Futex>(address: &T) {
     unsafe {
         let addr = ptr::from_ref(address).cast::<c_void>();
         c::WakeByAddressSingle(addr);
     }
 }
 
-pub fn wake_by_address_all<T>(address: &T) {
+pub fn wake_by_address_all<T: Futex>(address: &T) {
     unsafe {
         let addr = ptr::from_ref(address).cast::<c_void>();
         c::WakeByAddressAll(addr);
@@ -72,14 +80,14 @@ pub fn wake_by_address_all<T>(address: &T) {
 
 pub fn futex_wait<W: Waitable>(futex: &W::Atomic, expected: W, timeout: Option<Duration>) -> bool {
     // return false only on timeout
-    wait_on_address(futex, expected, timeout) || api::get_last_error().code != c::ERROR_TIMEOUT
+    wait_on_address(futex, expected, timeout) || api::get_last_error() != WinError::TIMEOUT
 }
 
-pub fn futex_wake<T>(futex: &T) -> bool {
+pub fn futex_wake<T: Futex>(futex: &T) -> bool {
     wake_by_address_single(futex);
     false
 }
 
-pub fn futex_wake_all<T>(futex: &T) {
+pub fn futex_wake_all<T: Futex>(futex: &T) {
     wake_by_address_all(futex)
 }
diff --git a/std/src/sys/pal/windows/handle.rs b/std/src/sys/pal/windows/handle.rs
index 3f85bb0a099a9..aaa1831dcc24d 100644
--- a/std/src/sys/pal/windows/handle.rs
+++ b/std/src/sys/pal/windows/handle.rs
@@ -72,7 +72,7 @@ impl IntoRawHandle for Handle {
 
 impl FromRawHandle for Handle {
     unsafe fn from_raw_handle(raw_handle: RawHandle) -> Self {
-        Self(FromRawHandle::from_raw_handle(raw_handle))
+        unsafe { Self(FromRawHandle::from_raw_handle(raw_handle)) }
     }
 }
 
@@ -138,13 +138,23 @@ impl Handle {
 
     pub unsafe fn read_overlapped(
         &self,
-        buf: &mut [u8],
+        buf: &mut [mem::MaybeUninit<u8>],
         overlapped: *mut c::OVERLAPPED,
     ) -> io::Result<Option<usize>> {
-        let len = cmp::min(buf.len(), <c::DWORD>::MAX as usize) as c::DWORD;
-        let mut amt = 0;
-        let res =
-            cvt(c::ReadFile(self.as_raw_handle(), buf.as_mut_ptr(), len, &mut amt, overlapped));
+        // SAFETY: We have exclusive access to the buffer and it's up to the caller to
+        // ensure the OVERLAPPED pointer is valid for the lifetime of this function.
+        let (res, amt) = unsafe {
+            let len = cmp::min(buf.len(), u32::MAX as usize) as u32;
+            let mut amt = 0;
+            let res = cvt(c::ReadFile(
+                self.as_raw_handle(),
+                buf.as_mut_ptr().cast::<u8>(),
+                len,
+                &mut amt,
+                overlapped,
+            ));
+            (res, amt)
+        };
         match res {
             Ok(_) => Ok(Some(amt as usize)),
             Err(e) => {
@@ -209,12 +219,7 @@ impl Handle {
         Ok(Self(self.0.try_clone()?))
     }
 
-    pub fn duplicate(
-        &self,
-        access: c::DWORD,
-        inherit: bool,
-        options: c::DWORD,
-    ) -> io::Result<Self> {
+    pub fn duplicate(&self, access: u32, inherit: bool, options: u32) -> io::Result<Self> {
         Ok(Self(self.0.as_handle().duplicate(access, inherit, options)?))
     }
 
@@ -233,21 +238,25 @@ impl Handle {
         let mut io_status = c::IO_STATUS_BLOCK::PENDING;
 
         // The length is clamped at u32::MAX.
-        let len = cmp::min(len, c::DWORD::MAX as usize) as c::DWORD;
-        let status = c::NtReadFile(
-            self.as_handle(),
-            ptr::null_mut(),
-            None,
-            ptr::null_mut(),
-            &mut io_status,
-            buf,
-            len,
-            offset.map(|n| n as _).as_ref(),
-            None,
-        );
+        let len = cmp::min(len, u32::MAX as usize) as u32;
+        // SAFETY: It's up to the caller to ensure `buf` is writeable up to
+        // the provided `len`.
+        let status = unsafe {
+            c::NtReadFile(
+                self.as_handle(),
+                ptr::null_mut(),
+                None,
+                ptr::null_mut(),
+                &mut io_status,
+                buf,
+                len,
+                offset.map(|n| n as _).as_ref(),
+                None,
+            )
+        };
 
         let status = if status == c::STATUS_PENDING {
-            c::WaitForSingleObject(self.as_raw_handle(), c::INFINITE);
+            unsafe { c::WaitForSingleObject(self.as_raw_handle(), c::INFINITE) };
             io_status.status()
         } else {
             status
@@ -265,7 +274,7 @@ impl Handle {
             status if c::nt_success(status) => Ok(io_status.Information),
 
             status => {
-                let error = c::RtlNtStatusToDosError(status);
+                let error = unsafe { c::RtlNtStatusToDosError(status) };
                 Err(io::Error::from_raw_os_error(error as _))
             }
         }
@@ -281,7 +290,7 @@ impl Handle {
         let mut io_status = c::IO_STATUS_BLOCK::PENDING;
 
         // The length is clamped at u32::MAX.
-        let len = cmp::min(buf.len(), c::DWORD::MAX as usize) as c::DWORD;
+        let len = cmp::min(buf.len(), u32::MAX as usize) as u32;
         let status = unsafe {
             c::NtWriteFile(
                 self.as_handle(),
diff --git a/std/src/sys/pal/windows/io.rs b/std/src/sys/pal/windows/io.rs
index 77b8f3c410eb8..bf3dfdfdd3e7d 100644
--- a/std/src/sys/pal/windows/io.rs
+++ b/std/src/sys/pal/windows/io.rs
@@ -15,9 +15,9 @@ pub struct IoSlice<'a> {
 impl<'a> IoSlice<'a> {
     #[inline]
     pub fn new(buf: &'a [u8]) -> IoSlice<'a> {
-        assert!(buf.len() <= c::ULONG::MAX as usize);
+        assert!(buf.len() <= u32::MAX as usize);
         IoSlice {
-            vec: c::WSABUF { len: buf.len() as c::ULONG, buf: buf.as_ptr() as *mut u8 },
+            vec: c::WSABUF { len: buf.len() as u32, buf: buf.as_ptr() as *mut u8 },
             _p: PhantomData,
         }
     }
@@ -29,7 +29,7 @@ impl<'a> IoSlice<'a> {
         }
 
         unsafe {
-            self.vec.len -= n as c::ULONG;
+            self.vec.len -= n as u32;
             self.vec.buf = self.vec.buf.add(n);
         }
     }
@@ -49,9 +49,9 @@ pub struct IoSliceMut<'a> {
 impl<'a> IoSliceMut<'a> {
     #[inline]
     pub fn new(buf: &'a mut [u8]) -> IoSliceMut<'a> {
-        assert!(buf.len() <= c::ULONG::MAX as usize);
+        assert!(buf.len() <= u32::MAX as usize);
         IoSliceMut {
-            vec: c::WSABUF { len: buf.len() as c::ULONG, buf: buf.as_mut_ptr() },
+            vec: c::WSABUF { len: buf.len() as u32, buf: buf.as_mut_ptr() },
             _p: PhantomData,
         }
     }
@@ -63,7 +63,7 @@ impl<'a> IoSliceMut<'a> {
         }
 
         unsafe {
-            self.vec.len -= n as c::ULONG;
+            self.vec.len -= n as u32;
             self.vec.buf = self.vec.buf.add(n);
         }
     }
@@ -80,19 +80,17 @@ impl<'a> IoSliceMut<'a> {
 }
 
 pub fn is_terminal(h: &impl AsHandle) -> bool {
-    unsafe { handle_is_console(h.as_handle()) }
+    handle_is_console(h.as_handle())
 }
 
-unsafe fn handle_is_console(handle: BorrowedHandle<'_>) -> bool {
-    let handle = handle.as_raw_handle();
-
+fn handle_is_console(handle: BorrowedHandle<'_>) -> bool {
     // A null handle means the process has no console.
-    if handle.is_null() {
+    if handle.as_raw_handle().is_null() {
         return false;
     }
 
     let mut out = 0;
-    if c::GetConsoleMode(handle, &mut out) != 0 {
+    if unsafe { c::GetConsoleMode(handle.as_raw_handle(), &mut out) != 0 } {
         // False positives aren't possible. If we got a console then we definitely have a console.
         return true;
     }
@@ -101,9 +99,9 @@ unsafe fn handle_is_console(handle: BorrowedHandle<'_>) -> bool {
     msys_tty_on(handle)
 }
 
-unsafe fn msys_tty_on(handle: c::HANDLE) -> bool {
+fn msys_tty_on(handle: BorrowedHandle<'_>) -> bool {
     // Early return if the handle is not a pipe.
-    if c::GetFileType(handle) != c::FILE_TYPE_PIPE {
+    if unsafe { c::GetFileType(handle.as_raw_handle()) != c::FILE_TYPE_PIPE } {
         return false;
     }
 
@@ -119,12 +117,14 @@ unsafe fn msys_tty_on(handle: c::HANDLE) -> bool {
     }
     let mut name_info = FILE_NAME_INFO { FileNameLength: 0, FileName: [0; c::MAX_PATH as usize] };
     // Safety: buffer length is fixed.
-    let res = c::GetFileInformationByHandleEx(
-        handle,
-        c::FileNameInfo,
-        core::ptr::addr_of_mut!(name_info) as *mut c_void,
-        size_of::<FILE_NAME_INFO>() as u32,
-    );
+    let res = unsafe {
+        c::GetFileInformationByHandleEx(
+            handle.as_raw_handle(),
+            c::FileNameInfo,
+            core::ptr::addr_of_mut!(name_info) as *mut c_void,
+            size_of::<FILE_NAME_INFO>() as u32,
+        )
+    };
     if res == 0 {
         return false;
     }
diff --git a/std/src/sys/pal/windows/mod.rs b/std/src/sys/pal/windows/mod.rs
index 402a205977b07..b85a8318bcbbd 100644
--- a/std/src/sys/pal/windows/mod.rs
+++ b/std/src/sys/pal/windows/mod.rs
@@ -1,4 +1,5 @@
 #![allow(missing_docs, nonstandard_style)]
+#![deny(unsafe_op_in_unsafe_fn)]
 
 use crate::ffi::{OsStr, OsString};
 use crate::io::ErrorKind;
@@ -31,8 +32,6 @@ pub mod process;
 pub mod rand;
 pub mod stdio;
 pub mod thread;
-pub mod thread_local_dtor;
-pub mod thread_local_key;
 pub mod time;
 cfg_if::cfg_if! {
     if #[cfg(not(target_vendor = "uwp"))] {
@@ -56,11 +55,13 @@ impl<T> IoResult<T> for Result<T, api::WinError> {
 // SAFETY: must be called only once during runtime initialization.
 // NOTE: this is not guaranteed to run, for example when Rust code is called externally.
 pub unsafe fn init(_argc: isize, _argv: *const *const u8, _sigpipe: u8) {
-    stack_overflow::init();
+    unsafe {
+        stack_overflow::init();
 
-    // Normally, `thread::spawn` will call `Thread::set_name` but since this thread already
-    // exists, we have to call it ourselves.
-    thread::Thread::set_name_wide(wide_str!("main"));
+        // Normally, `thread::spawn` will call `Thread::set_name` but since this thread already
+        // exists, we have to call it ourselves.
+        thread::Thread::set_name_wide(wide_str!("main"));
+    }
 }
 
 // SAFETY: must be called only once during runtime cleanup.
@@ -77,7 +78,7 @@ pub fn is_interrupted(_errno: i32) -> bool {
 pub fn decode_error_kind(errno: i32) -> ErrorKind {
     use ErrorKind::*;
 
-    match errno as c::DWORD {
+    match errno as u32 {
         c::ERROR_ACCESS_DENIED => return PermissionDenied,
         c::ERROR_ALREADY_EXISTS => return AlreadyExists,
         c::ERROR_FILE_EXISTS => return AlreadyExists,
@@ -218,7 +219,7 @@ pub fn to_u16s<S: AsRef<OsStr>>(s: S) -> crate::io::Result<Vec<u16>> {
 // from this closure is then the return value of the function.
 pub fn fill_utf16_buf<F1, F2, T>(mut f1: F1, f2: F2) -> crate::io::Result<T>
 where
-    F1: FnMut(*mut u16, c::DWORD) -> c::DWORD,
+    F1: FnMut(*mut u16, u32) -> u32,
     F2: FnOnce(&[u16]) -> T,
 {
     // Start off with a stack buf but then spill over to the heap if we end up
@@ -227,7 +228,7 @@ where
     // This initial size also works around `GetFullPathNameW` returning
     // incorrect size hints for some short paths:
     // https://github.com/dylni/normpath/issues/5
-    let mut stack_buf: [MaybeUninit<u16>; 512] = MaybeUninit::uninit_array();
+    let mut stack_buf: [MaybeUninit<u16>; 512] = [MaybeUninit::uninit(); 512];
     let mut heap_buf: Vec<MaybeUninit<u16>> = Vec::new();
     unsafe {
         let mut n = stack_buf.len();
@@ -240,7 +241,7 @@ where
                 // We used `reserve` and not `reserve_exact`, so in theory we
                 // may have gotten more than requested. If so, we'd like to use
                 // it... so long as we won't cause overflow.
-                n = heap_buf.capacity().min(c::DWORD::MAX as usize);
+                n = heap_buf.capacity().min(u32::MAX as usize);
                 // Safety: MaybeUninit<u16> does not need initialization
                 heap_buf.set_len(n);
                 &mut heap_buf[..]
@@ -256,13 +257,13 @@ where
             // error" is still 0 then we interpret it as a 0 length buffer and
             // not an actual error.
             c::SetLastError(0);
-            let k = match f1(buf.as_mut_ptr().cast::<u16>(), n as c::DWORD) {
+            let k = match f1(buf.as_mut_ptr().cast::<u16>(), n as u32) {
                 0 if api::get_last_error().code == 0 => 0,
                 0 => return Err(crate::io::Error::last_os_error()),
                 n => n,
             } as usize;
             if k == n && api::get_last_error().code == c::ERROR_INSUFFICIENT_BUFFER {
-                n = n.saturating_mul(2).min(c::DWORD::MAX as usize);
+                n = n.saturating_mul(2).min(u32::MAX as usize);
             } else if k > n {
                 n = k;
             } else if k == n {
@@ -310,7 +311,7 @@ pub fn cvt<I: IsZero>(i: I) -> crate::io::Result<I> {
     if i.is_zero() { Err(crate::io::Error::last_os_error()) } else { Ok(i) }
 }
 
-pub fn dur2timeout(dur: Duration) -> c::DWORD {
+pub fn dur2timeout(dur: Duration) -> u32 {
     // Note that a duration is a (u64, u32) (seconds, nanoseconds) pair, and the
     // timeouts in windows APIs are typically u32 milliseconds. To translate, we
     // have two pieces to take care of:
@@ -322,7 +323,7 @@ pub fn dur2timeout(dur: Duration) -> c::DWORD {
         .checked_mul(1000)
         .and_then(|ms| ms.checked_add((dur.subsec_nanos() as u64) / 1_000_000))
         .and_then(|ms| ms.checked_add(if dur.subsec_nanos() % 1_000_000 > 0 { 1 } else { 0 }))
-        .map(|ms| if ms > <c::DWORD>::MAX as u64 { c::INFINITE } else { ms as c::DWORD })
+        .map(|ms| if ms > <u32>::MAX as u64 { c::INFINITE } else { ms as u32 })
         .unwrap_or(c::INFINITE)
 }
 
diff --git a/std/src/sys/pal/windows/net.rs b/std/src/sys/pal/windows/net.rs
index 9e15b15a3513a..d51fb56238f2c 100644
--- a/std/src/sys/pal/windows/net.rs
+++ b/std/src/sys/pal/windows/net.rs
@@ -250,7 +250,7 @@ impl Socket {
     pub fn read_vectored(&self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
         // On unix when a socket is shut down all further reads return 0, so we
         // do the same on windows to map a shut down socket to returning EOF.
-        let length = cmp::min(bufs.len(), c::DWORD::MAX as usize) as c::DWORD;
+        let length = cmp::min(bufs.len(), u32::MAX as usize) as u32;
         let mut nread = 0;
         let mut flags = 0;
         let result = unsafe {
@@ -335,7 +335,7 @@ impl Socket {
     }
 
     pub fn write_vectored(&self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
-        let length = cmp::min(bufs.len(), c::DWORD::MAX as usize) as c::DWORD;
+        let length = cmp::min(bufs.len(), u32::MAX as usize) as u32;
         let mut nwritten = 0;
         let result = unsafe {
             c::WSASend(
@@ -371,7 +371,7 @@ impl Socket {
     }
 
     pub fn timeout(&self, kind: c_int) -> io::Result<Option<Duration>> {
-        let raw: c::DWORD = net::getsockopt(self, c::SOL_SOCKET, kind)?;
+        let raw: u32 = net::getsockopt(self, c::SOL_SOCKET, kind)?;
         if raw == 0 {
             Ok(None)
         } else {
@@ -436,7 +436,7 @@ impl Socket {
     pub unsafe fn from_raw(raw: c::SOCKET) -> Self {
         debug_assert_eq!(mem::size_of::<c::SOCKET>(), mem::size_of::<RawSocket>());
         debug_assert_eq!(mem::align_of::<c::SOCKET>(), mem::align_of::<RawSocket>());
-        Self::from_raw_socket(raw as RawSocket)
+        unsafe { Self::from_raw_socket(raw as RawSocket) }
     }
 }
 
@@ -486,6 +486,6 @@ impl IntoRawSocket for Socket {
 
 impl FromRawSocket for Socket {
     unsafe fn from_raw_socket(raw_socket: RawSocket) -> Self {
-        Self(FromRawSocket::from_raw_socket(raw_socket))
+        unsafe { Self(FromRawSocket::from_raw_socket(raw_socket)) }
     }
 }
diff --git a/std/src/sys/pal/windows/os.rs b/std/src/sys/pal/windows/os.rs
index 64d8b72aed282..f1f4d3a5d26ef 100644
--- a/std/src/sys/pal/windows/os.rs
+++ b/std/src/sys/pal/windows/os.rs
@@ -17,7 +17,8 @@ use crate::ptr;
 use crate::slice;
 use crate::sys::{c, cvt};
 
-use super::{api, to_u16s};
+use super::api::{self, WinError};
+use super::to_u16s;
 
 pub fn errno() -> i32 {
     api::get_last_error().code as i32
@@ -51,10 +52,10 @@ pub fn error_string(mut errnum: i32) -> String {
         let res = c::FormatMessageW(
             flags | c::FORMAT_MESSAGE_FROM_SYSTEM | c::FORMAT_MESSAGE_IGNORE_INSERTS,
             module,
-            errnum as c::DWORD,
+            errnum as u32,
             0,
             buf.as_mut_ptr(),
-            buf.len() as c::DWORD,
+            buf.len() as u32,
             ptr::null(),
         ) as usize;
         if res == 0 {
@@ -80,7 +81,7 @@ pub fn error_string(mut errnum: i32) -> String {
 }
 
 pub struct Env {
-    base: c::LPWCH,
+    base: *mut c::WCHAR,
     iter: EnvIterator,
 }
 
@@ -125,7 +126,7 @@ impl Iterator for Env {
 }
 
 #[derive(Clone)]
-struct EnvIterator(c::LPWCH);
+struct EnvIterator(*mut c::WCHAR);
 
 impl Iterator for EnvIterator {
     type Item = (OsString, OsString);
@@ -302,16 +303,22 @@ pub fn getenv(k: &OsStr) -> Option<OsString> {
     .ok()
 }
 
-pub fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
-    let k = to_u16s(k)?;
-    let v = to_u16s(v)?;
+pub unsafe fn setenv(k: &OsStr, v: &OsStr) -> io::Result<()> {
+    // SAFETY: We ensure that k and v are null-terminated wide strings.
+    unsafe {
+        let k = to_u16s(k)?;
+        let v = to_u16s(v)?;
 
-    cvt(unsafe { c::SetEnvironmentVariableW(k.as_ptr(), v.as_ptr()) }).map(drop)
+        cvt(c::SetEnvironmentVariableW(k.as_ptr(), v.as_ptr())).map(drop)
+    }
 }
 
-pub fn unsetenv(n: &OsStr) -> io::Result<()> {
-    let v = to_u16s(n)?;
-    cvt(unsafe { c::SetEnvironmentVariableW(v.as_ptr(), ptr::null()) }).map(drop)
+pub unsafe fn unsetenv(n: &OsStr) -> io::Result<()> {
+    // SAFETY: We ensure that v is a null-terminated wide strings.
+    unsafe {
+        let v = to_u16s(n)?;
+        cvt(c::SetEnvironmentVariableW(v.as_ptr(), ptr::null())).map(drop)
+    }
 }
 
 pub fn temp_dir() -> PathBuf {
@@ -333,7 +340,7 @@ fn home_dir_crt() -> Option<PathBuf> {
                     buf,
                     &mut sz,
                 ) {
-                    0 if api::get_last_error().code != c::ERROR_INSUFFICIENT_BUFFER => 0,
+                    0 if api::get_last_error() != WinError::INSUFFICIENT_BUFFER => 0,
                     0 => sz,
                     _ => sz - 1, // sz includes the null terminator
                 }
@@ -358,7 +365,7 @@ fn home_dir_crt() -> Option<PathBuf> {
         super::fill_utf16_buf(
             |buf, mut sz| {
                 match c::GetUserProfileDirectoryW(token, buf, &mut sz) {
-                    0 if api::get_last_error().code != c::ERROR_INSUFFICIENT_BUFFER => 0,
+                    0 if api::get_last_error() != WinError::INSUFFICIENT_BUFFER => 0,
                     0 => sz,
                     _ => sz - 1, // sz includes the null terminator
                 }
@@ -382,7 +389,7 @@ pub fn home_dir() -> Option<PathBuf> {
 }
 
 pub fn exit(code: i32) -> ! {
-    unsafe { c::ExitProcess(code as c::UINT) }
+    unsafe { c::ExitProcess(code as u32) }
 }
 
 pub fn getpid() -> u32 {
diff --git a/std/src/sys/pal/windows/pipe.rs b/std/src/sys/pal/windows/pipe.rs
index dfa938d4d5769..7a309b9638bd2 100644
--- a/std/src/sys/pal/windows/pipe.rs
+++ b/std/src/sys/pal/windows/pipe.rs
@@ -5,13 +5,13 @@ use crate::io::{self, BorrowedCursor, IoSlice, IoSliceMut, Read};
 use crate::mem;
 use crate::path::Path;
 use crate::ptr;
-use crate::slice;
 use crate::sync::atomic::AtomicUsize;
 use crate::sync::atomic::Ordering::Relaxed;
 use crate::sys::c;
 use crate::sys::fs::{File, OpenOptions};
 use crate::sys::handle::Handle;
 use crate::sys::hashmap_random_keys;
+use crate::sys::pal::windows::api::{self, WinError};
 use crate::sys_common::{FromInner, IntoInner};
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -124,20 +124,19 @@ pub fn anon_pipe(ours_readable: bool, their_handle_inheritable: bool) -> io::Res
             // testing strategy
             // For more info, see https://github.com/rust-lang/rust/pull/37677.
             if handle == c::INVALID_HANDLE_VALUE {
-                let err = io::Error::last_os_error();
-                let raw_os_err = err.raw_os_error();
+                let error = api::get_last_error();
                 if tries < 10 {
-                    if raw_os_err == Some(c::ERROR_ACCESS_DENIED as i32) {
+                    if error == WinError::ACCESS_DENIED {
                         continue;
                     } else if reject_remote_clients_flag != 0
-                        && raw_os_err == Some(c::ERROR_INVALID_PARAMETER as i32)
+                        && error == WinError::INVALID_PARAMETER
                     {
                         reject_remote_clients_flag = 0;
                         tries -= 1;
                         continue;
                     }
                 }
-                return Err(err);
+                return Err(io::Error::from_raw_os_error(error.code as i32));
             }
             ours = Handle::from_raw_handle(handle);
             break;
@@ -156,7 +155,7 @@ pub fn anon_pipe(ours_readable: bool, their_handle_inheritable: bool) -> io::Res
         opts.share_mode(0);
         let size = mem::size_of::<c::SECURITY_ATTRIBUTES>();
         let mut sa = c::SECURITY_ATTRIBUTES {
-            nLength: size as c::DWORD,
+            nLength: size as u32,
             lpSecurityDescriptor: ptr::null_mut(),
             bInheritHandle: their_handle_inheritable as i32,
         };
@@ -225,9 +224,9 @@ fn random_number() -> usize {
 // Abstracts over `ReadFileEx` and `WriteFileEx`
 type AlertableIoFn = unsafe extern "system" fn(
     BorrowedHandle<'_>,
-    c::LPVOID,
-    c::DWORD,
-    c::LPOVERLAPPED,
+    *mut core::ffi::c_void,
+    u32,
+    *mut c::OVERLAPPED,
     c::LPOVERLAPPED_COMPLETION_ROUTINE,
 ) -> c::BOOL;
 
@@ -244,7 +243,7 @@ impl AnonPipe {
 
     pub fn read(&self, buf: &mut [u8]) -> io::Result<usize> {
         let result = unsafe {
-            let len = crate::cmp::min(buf.len(), c::DWORD::MAX as usize) as c::DWORD;
+            let len = crate::cmp::min(buf.len(), u32::MAX as usize) as u32;
             self.alertable_io_internal(c::ReadFileEx, buf.as_mut_ptr() as _, len)
         };
 
@@ -260,7 +259,7 @@ impl AnonPipe {
 
     pub fn read_buf(&self, mut buf: BorrowedCursor<'_>) -> io::Result<()> {
         let result = unsafe {
-            let len = crate::cmp::min(buf.capacity(), c::DWORD::MAX as usize) as c::DWORD;
+            let len = crate::cmp::min(buf.capacity(), u32::MAX as usize) as u32;
             self.alertable_io_internal(c::ReadFileEx, buf.as_mut().as_mut_ptr() as _, len)
         };
 
@@ -295,7 +294,7 @@ impl AnonPipe {
 
     pub fn write(&self, buf: &[u8]) -> io::Result<usize> {
         unsafe {
-            let len = crate::cmp::min(buf.len(), c::DWORD::MAX as usize) as c::DWORD;
+            let len = crate::cmp::min(buf.len(), u32::MAX as usize) as u32;
             self.alertable_io_internal(c::WriteFileEx, buf.as_ptr() as _, len)
         }
     }
@@ -324,11 +323,12 @@ impl AnonPipe {
     /// [`ReadFileEx`]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfileex
     /// [`WriteFileEx`]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-writefileex
     /// [Asynchronous Procedure Call]: https://docs.microsoft.com/en-us/windows/win32/sync/asynchronous-procedure-calls
+    #[allow(unsafe_op_in_unsafe_fn)]
     unsafe fn alertable_io_internal(
         &self,
         io: AlertableIoFn,
-        buf: c::LPVOID,
-        len: c::DWORD,
+        buf: *mut core::ffi::c_void,
+        len: u32,
     ) -> io::Result<usize> {
         // Use "alertable I/O" to synchronize the pipe I/O.
         // This has four steps.
@@ -478,8 +478,11 @@ impl<'a> AsyncPipe<'a> {
     fn schedule_read(&mut self) -> io::Result<bool> {
         assert_eq!(self.state, State::NotReading);
         let amt = unsafe {
-            let slice = slice_to_end(self.dst);
-            self.pipe.read_overlapped(slice, &mut *self.overlapped)?
+            if self.dst.capacity() == self.dst.len() {
+                let additional = if self.dst.capacity() == 0 { 16 } else { 1 };
+                self.dst.reserve(additional);
+            }
+            self.pipe.read_overlapped(self.dst.spare_capacity_mut(), &mut *self.overlapped)?
         };
 
         // If this read finished immediately then our overlapped event will
@@ -559,13 +562,3 @@ impl<'a> Drop for AsyncPipe<'a> {
         }
     }
 }
-
-unsafe fn slice_to_end(v: &mut Vec<u8>) -> &mut [u8] {
-    if v.capacity() == 0 {
-        v.reserve(16);
-    }
-    if v.capacity() == v.len() {
-        v.reserve(1);
-    }
-    slice::from_raw_parts_mut(v.as_mut_ptr().add(v.len()), v.capacity() - v.len())
-}
diff --git a/std/src/sys/pal/windows/process.rs b/std/src/sys/pal/windows/process.rs
index e4ab2ca7da1ce..76d2cb77d474c 100644
--- a/std/src/sys/pal/windows/process.rs
+++ b/std/src/sys/pal/windows/process.rs
@@ -19,7 +19,7 @@ use crate::path::{Path, PathBuf};
 use crate::ptr;
 use crate::sync::Mutex;
 use crate::sys::args::{self, Arg};
-use crate::sys::c::{self, NonZeroDWORD, EXIT_FAILURE, EXIT_SUCCESS};
+use crate::sys::c::{self, EXIT_FAILURE, EXIT_SUCCESS};
 use crate::sys::cvt;
 use crate::sys::fs::{File, OpenOptions};
 use crate::sys::handle::Handle;
@@ -31,6 +31,8 @@ use crate::sys_common::IntoInner;
 
 use core::ffi::c_void;
 
+use super::api::{self, WinError};
+
 ////////////////////////////////////////////////////////////////////////////////
 // Command
 ////////////////////////////////////////////////////////////////////////////////
@@ -161,6 +163,7 @@ pub struct Command {
     env: CommandEnv,
     cwd: Option<OsString>,
     flags: u32,
+    show_window: Option<u16>,
     detach: bool, // not currently exposed in std::process
     stdin: Option<Stdio>,
     stdout: Option<Stdio>,
@@ -171,7 +174,7 @@ pub struct Command {
 
 pub enum Stdio {
     Inherit,
-    InheritSpecific { from_stdio_id: c::DWORD },
+    InheritSpecific { from_stdio_id: u32 },
     Null,
     MakePipe,
     Pipe(AnonPipe),
@@ -192,6 +195,7 @@ impl Command {
             env: Default::default(),
             cwd: None,
             flags: 0,
+            show_window: None,
             detach: false,
             stdin: None,
             stdout: None,
@@ -222,6 +226,9 @@ impl Command {
     pub fn creation_flags(&mut self, flags: u32) {
         self.flags = flags;
     }
+    pub fn show_window(&mut self, cmd_show: Option<u16>) {
+        self.show_window = cmd_show;
+    }
 
     pub fn force_quotes(&mut self, enabled: bool) {
         self.force_quotes_enabled = enabled;
@@ -335,6 +342,11 @@ impl Command {
             si.hStdError = stderr.as_raw_handle();
         }
 
+        if let Some(cmd_show) = self.show_window {
+            si.dwFlags |= c::STARTF_USESHOWWINDOW;
+            si.wShowWindow = cmd_show;
+        }
+
         let si_ptr: *mut c::STARTUPINFOW;
 
         let mut proc_thread_attribute_list;
@@ -352,7 +364,7 @@ impl Command {
             };
             si_ptr = core::ptr::addr_of_mut!(si_ex) as _;
         } else {
-            si.cb = mem::size_of::<c::STARTUPINFOW>() as c::DWORD;
+            si.cb = mem::size_of::<c::STARTUPINFOW>() as u32;
             si_ptr = core::ptr::addr_of_mut!(si) as _;
         }
 
@@ -554,7 +566,7 @@ fn program_exists(path: &Path) -> Option<Vec<u16>> {
 }
 
 impl Stdio {
-    fn to_handle(&self, stdio_id: c::DWORD, pipe: &mut Option<AnonPipe>) -> io::Result<Handle> {
+    fn to_handle(&self, stdio_id: u32, pipe: &mut Option<AnonPipe>) -> io::Result<Handle> {
         let use_stdio_id = |stdio_id| match stdio::get_handle(stdio_id) {
             Ok(io) => unsafe {
                 let io = Handle::from_raw_handle(io);
@@ -589,7 +601,7 @@ impl Stdio {
             Stdio::Null => {
                 let size = mem::size_of::<c::SECURITY_ATTRIBUTES>();
                 let mut sa = c::SECURITY_ATTRIBUTES {
-                    nLength: size as c::DWORD,
+                    nLength: size as u32,
                     lpSecurityDescriptor: ptr::null_mut(),
                     bInheritHandle: 1,
                 };
@@ -645,12 +657,12 @@ impl Process {
     pub fn kill(&mut self) -> io::Result<()> {
         let result = unsafe { c::TerminateProcess(self.handle.as_raw_handle(), 1) };
         if result == c::FALSE {
-            let error = unsafe { c::GetLastError() };
+            let error = api::get_last_error();
             // TerminateProcess returns ERROR_ACCESS_DENIED if the process has already been
             // terminated (by us, or for any other reason). So check if the process was actually
             // terminated, and if so, do not return an error.
-            if error != c::ERROR_ACCESS_DENIED || self.try_wait().is_err() {
-                return Err(crate::io::Error::from_raw_os_error(error as i32));
+            if error != WinError::ACCESS_DENIED || self.try_wait().is_err() {
+                return Err(crate::io::Error::from_raw_os_error(error.code as i32));
             }
         }
         Ok(())
@@ -701,11 +713,11 @@ impl Process {
 }
 
 #[derive(PartialEq, Eq, Clone, Copy, Debug, Default)]
-pub struct ExitStatus(c::DWORD);
+pub struct ExitStatus(u32);
 
 impl ExitStatus {
     pub fn exit_ok(&self) -> Result<(), ExitStatusError> {
-        match NonZeroDWORD::try_from(self.0) {
+        match NonZero::<u32>::try_from(self.0) {
             /* was nonzero */ Ok(failure) => Err(ExitStatusError(failure)),
             /* was zero, couldn't convert */ Err(_) => Ok(()),
         }
@@ -715,9 +727,9 @@ impl ExitStatus {
     }
 }
 
-/// Converts a raw `c::DWORD` to a type-safe `ExitStatus` by wrapping it without copying.
-impl From<c::DWORD> for ExitStatus {
-    fn from(u: c::DWORD) -> ExitStatus {
+/// Converts a raw `u32` to a type-safe `ExitStatus` by wrapping it without copying.
+impl From<u32> for ExitStatus {
+    fn from(u: u32) -> ExitStatus {
         ExitStatus(u)
     }
 }
@@ -738,7 +750,7 @@ impl fmt::Display for ExitStatus {
 }
 
 #[derive(PartialEq, Eq, Clone, Copy, Debug)]
-pub struct ExitStatusError(c::NonZeroDWORD);
+pub struct ExitStatusError(NonZero<u32>);
 
 impl Into<ExitStatus> for ExitStatusError {
     fn into(self) -> ExitStatus {
@@ -753,7 +765,7 @@ impl ExitStatusError {
 }
 
 #[derive(PartialEq, Eq, Clone, Copy, Debug)]
-pub struct ExitCode(c::DWORD);
+pub struct ExitCode(u32);
 
 impl ExitCode {
     pub const SUCCESS: ExitCode = ExitCode(EXIT_SUCCESS as _);
@@ -767,13 +779,13 @@ impl ExitCode {
 
 impl From<u8> for ExitCode {
     fn from(code: u8) -> Self {
-        ExitCode(c::DWORD::from(code))
+        ExitCode(u32::from(code))
     }
 }
 
 impl From<u32> for ExitCode {
     fn from(code: u32) -> Self {
-        ExitCode(c::DWORD::from(code))
+        ExitCode(u32::from(code))
     }
 }
 
diff --git a/std/src/sys/pal/windows/rand.rs b/std/src/sys/pal/windows/rand.rs
index e427546222aea..e366bb995626a 100644
--- a/std/src/sys/pal/windows/rand.rs
+++ b/std/src/sys/pal/windows/rand.rs
@@ -1,6 +1,6 @@
+use core::{mem, ptr};
+
 use crate::sys::c;
-use core::mem;
-use core::ptr;
 
 #[cfg(not(target_vendor = "win7"))]
 #[inline]
@@ -20,7 +20,7 @@ pub fn hashmap_random_keys() -> (u64, u64) {
 
     let mut v = (0, 0);
     let ret = unsafe {
-        c::RtlGenRandom(ptr::addr_of_mut!(v).cast::<c_void>(), mem::size_of_val(&v) as c::ULONG)
+        c::RtlGenRandom(ptr::addr_of_mut!(v).cast::<c_void>(), mem::size_of_val(&v) as u32)
     };
 
     if ret != 0 { v } else { panic!("RNG broken: {}", io::Error::last_os_error()) }
diff --git a/std/src/sys/pal/windows/stack_overflow.rs b/std/src/sys/pal/windows/stack_overflow.rs
index f93f31026f818..467e21ab56a28 100644
--- a/std/src/sys/pal/windows/stack_overflow.rs
+++ b/std/src/sys/pal/windows/stack_overflow.rs
@@ -4,14 +4,15 @@ use crate::sys::c;
 use crate::thread;
 
 /// Reserve stack space for use in stack overflow exceptions.
-pub unsafe fn reserve_stack() {
-    let result = c::SetThreadStackGuarantee(&mut 0x5000);
+pub fn reserve_stack() {
+    let result = unsafe { c::SetThreadStackGuarantee(&mut 0x5000) };
     // Reserving stack space is not critical so we allow it to fail in the released build of libstd.
     // We still use debug assert here so that CI will test that we haven't made a mistake calling the function.
     debug_assert_ne!(result, 0, "failed to reserve stack space for exception handling");
 }
 
-unsafe extern "system" fn vectored_handler(ExceptionInfo: *mut c::EXCEPTION_POINTERS) -> c::LONG {
+unsafe extern "system" fn vectored_handler(ExceptionInfo: *mut c::EXCEPTION_POINTERS) -> i32 {
+    // SAFETY: It's up to the caller (which in this case is the OS) to ensure that `ExceptionInfo` is valid.
     unsafe {
         let rec = &(*(*ExceptionInfo).ExceptionRecord);
         let code = rec.ExceptionCode;
@@ -26,11 +27,14 @@ unsafe extern "system" fn vectored_handler(ExceptionInfo: *mut c::EXCEPTION_POIN
     }
 }
 
-pub unsafe fn init() {
-    let result = c::AddVectoredExceptionHandler(0, Some(vectored_handler));
-    // Similar to the above, adding the stack overflow handler is allowed to fail
-    // but a debug assert is used so CI will still test that it normally works.
-    debug_assert!(!result.is_null(), "failed to install exception handler");
+pub fn init() {
+    // SAFETY: `vectored_handler` has the correct ABI and is safe to call during exception handling.
+    unsafe {
+        let result = c::AddVectoredExceptionHandler(0, Some(vectored_handler));
+        // Similar to the above, adding the stack overflow handler is allowed to fail
+        // but a debug assert is used so CI will still test that it normally works.
+        debug_assert!(!result.is_null(), "failed to install exception handler");
+    }
     // Set the thread stack guarantee for the main thread.
     reserve_stack();
 }
diff --git a/std/src/sys/pal/windows/stdio.rs b/std/src/sys/pal/windows/stdio.rs
index 96c23f82aec2e..c6a21665157d7 100644
--- a/std/src/sys/pal/windows/stdio.rs
+++ b/std/src/sys/pal/windows/stdio.rs
@@ -1,6 +1,6 @@
 #![unstable(issue = "none", feature = "windows_stdio")]
 
-use super::api;
+use super::api::{self, WinError};
 use crate::cmp;
 use crate::io;
 use crate::mem::MaybeUninit;
@@ -68,7 +68,7 @@ const MAX_BUFFER_SIZE: usize = 8192;
 // UTF-16 to UTF-8.
 pub const STDIN_BUF_SIZE: usize = MAX_BUFFER_SIZE / 2 * 3;
 
-pub fn get_handle(handle_id: c::DWORD) -> io::Result<c::HANDLE> {
+pub fn get_handle(handle_id: u32) -> io::Result<c::HANDLE> {
     let handle = unsafe { c::GetStdHandle(handle_id) };
     if handle == c::INVALID_HANDLE_VALUE {
         Err(io::Error::last_os_error())
@@ -87,11 +87,7 @@ fn is_console(handle: c::HANDLE) -> bool {
     unsafe { c::GetConsoleMode(handle, &mut mode) != 0 }
 }
 
-fn write(
-    handle_id: c::DWORD,
-    data: &[u8],
-    incomplete_utf8: &mut IncompleteUtf8,
-) -> io::Result<usize> {
+fn write(handle_id: u32, data: &[u8], incomplete_utf8: &mut IncompleteUtf8) -> io::Result<usize> {
     if data.is_empty() {
         return Ok(0);
     }
@@ -182,12 +178,12 @@ fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usiz
         // Note that this theoretically checks validity twice in the (most common) case
         // where the underlying byte sequence is valid utf-8 (given the check in `write()`).
         let result = c::MultiByteToWideChar(
-            c::CP_UTF8,                      // CodePage
-            c::MB_ERR_INVALID_CHARS,         // dwFlags
-            utf8.as_ptr(),                   // lpMultiByteStr
-            utf8.len() as c::c_int,          // cbMultiByte
-            utf16.as_mut_ptr() as c::LPWSTR, // lpWideCharStr
-            utf16.len() as c::c_int,         // cchWideChar
+            c::CP_UTF8,                          // CodePage
+            c::MB_ERR_INVALID_CHARS,             // dwFlags
+            utf8.as_ptr(),                       // lpMultiByteStr
+            utf8.len() as i32,                   // cbMultiByte
+            utf16.as_mut_ptr() as *mut c::WCHAR, // lpWideCharStr
+            utf16.len() as i32,                  // cchWideChar
         );
         assert!(result != 0, "Unexpected error in MultiByteToWideChar");
 
@@ -232,13 +228,7 @@ fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
     debug_assert!(data.len() < u32::MAX as usize);
     let mut written = 0;
     cvt(unsafe {
-        c::WriteConsoleW(
-            handle,
-            data.as_ptr() as c::LPCVOID,
-            data.len() as u32,
-            &mut written,
-            ptr::null_mut(),
-        )
+        c::WriteConsoleW(handle, data.as_ptr(), data.len() as u32, &mut written, ptr::null_mut())
     })?;
     Ok(written as usize)
 }
@@ -347,9 +337,9 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usiz
     // traditional DOS method to indicate end of character stream / user input (SUB).
     // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole.
     const CTRL_Z: u16 = 0x1A;
-    const CTRL_Z_MASK: c::ULONG = 1 << CTRL_Z;
+    const CTRL_Z_MASK: u32 = 1 << CTRL_Z;
     let input_control = c::CONSOLE_READCONSOLE_CONTROL {
-        nLength: crate::mem::size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as c::ULONG,
+        nLength: crate::mem::size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as u32,
         nInitialChars: 0,
         dwCtrlWakeupMask: CTRL_Z_MASK,
         dwControlKeyState: 0,
@@ -361,7 +351,7 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usiz
             c::SetLastError(0);
             c::ReadConsoleW(
                 handle,
-                buf.as_mut_ptr() as c::LPVOID,
+                buf.as_mut_ptr() as *mut core::ffi::c_void,
                 buf.len() as u32,
                 &mut amount,
                 &input_control,
@@ -370,7 +360,7 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usiz
 
         // ReadConsoleW returns success with ERROR_OPERATION_ABORTED for Ctrl-C or Ctrl-Break.
         // Explicitly check for that case here and try again.
-        if amount == 0 && api::get_last_error().code == c::ERROR_OPERATION_ABORTED {
+        if amount == 0 && api::get_last_error() == WinError::OPERATION_ABORTED {
             continue;
         }
         break;
@@ -384,8 +374,8 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usiz
 }
 
 fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
-    debug_assert!(utf16.len() <= c::c_int::MAX as usize);
-    debug_assert!(utf8.len() <= c::c_int::MAX as usize);
+    debug_assert!(utf16.len() <= i32::MAX as usize);
+    debug_assert!(utf8.len() <= i32::MAX as usize);
 
     if utf16.is_empty() {
         return Ok(0);
@@ -396,9 +386,9 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
             c::CP_UTF8,              // CodePage
             c::WC_ERR_INVALID_CHARS, // dwFlags
             utf16.as_ptr(),          // lpWideCharStr
-            utf16.len() as c::c_int, // cchWideChar
+            utf16.len() as i32,      // cchWideChar
             utf8.as_mut_ptr(),       // lpMultiByteStr
-            utf8.len() as c::c_int,  // cbMultiByte
+            utf8.len() as i32,       // cbMultiByte
             ptr::null(),             // lpDefaultChar
             ptr::null_mut(),         // lpUsedDefaultChar
         )
diff --git a/std/src/sys/pal/windows/thread.rs b/std/src/sys/pal/windows/thread.rs
index 70099e0a3b560..668a3c05e20be 100644
--- a/std/src/sys/pal/windows/thread.rs
+++ b/std/src/sys/pal/windows/thread.rs
@@ -27,29 +27,35 @@ impl Thread {
 
         // CreateThread rounds up values for the stack size to the nearest page size (at least 4kb).
         // If a value of zero is given then the default stack size is used instead.
-        let ret = c::CreateThread(
-            ptr::null_mut(),
-            stack,
-            Some(thread_start),
-            p as *mut _,
-            c::STACK_SIZE_PARAM_IS_A_RESERVATION,
-            ptr::null_mut(),
-        );
-        let ret = HandleOrNull::from_raw_handle(ret);
+        // SAFETY: `thread_start` has the right ABI for a thread's entry point.
+        // `p` is simply passed through to the new thread without being touched.
+        let ret = unsafe {
+            let ret = c::CreateThread(
+                ptr::null_mut(),
+                stack,
+                Some(thread_start),
+                p as *mut _,
+                c::STACK_SIZE_PARAM_IS_A_RESERVATION,
+                ptr::null_mut(),
+            );
+            HandleOrNull::from_raw_handle(ret)
+        };
         return if let Ok(handle) = ret.try_into() {
             Ok(Thread { handle: Handle::from_inner(handle) })
         } else {
             // The thread failed to start and as a result p was not consumed. Therefore, it is
             // safe to reconstruct the box so that it gets deallocated.
-            drop(Box::from_raw(p));
+            unsafe { drop(Box::from_raw(p)) };
             Err(io::Error::last_os_error())
         };
 
-        unsafe extern "system" fn thread_start(main: *mut c_void) -> c::DWORD {
+        unsafe extern "system" fn thread_start(main: *mut c_void) -> u32 {
             // Next, reserve some stack space for if we otherwise run out of stack.
             stack_overflow::reserve_stack();
             // Finally, let's run some code.
-            Box::from_raw(main as *mut Box<dyn FnOnce()>)();
+            // SAFETY: We are simply recreating the box that was leaked earlier.
+            // It's the responsibility of the one who call `Thread::new` to ensure this is safe to call here.
+            unsafe { Box::from_raw(main as *mut Box<dyn FnOnce()>)() };
             0
         }
     }
@@ -69,7 +75,7 @@ impl Thread {
     ///
     /// `name` must end with a zero value
     pub unsafe fn set_name_wide(name: &[u16]) {
-        c::SetThreadDescription(c::GetCurrentThread(), name.as_ptr());
+        unsafe { c::SetThreadDescription(c::GetCurrentThread(), name.as_ptr()) };
     }
 
     pub fn join(self) {
diff --git a/std/src/sys/pal/windows/thread_local_dtor.rs b/std/src/sys/pal/windows/thread_local_dtor.rs
deleted file mode 100644
index cf542d2bfb838..0000000000000
--- a/std/src/sys/pal/windows/thread_local_dtor.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-//! Implements thread-local destructors that are not associated with any
-//! particular data.
-
-#![unstable(feature = "thread_local_internals", issue = "none")]
-#![cfg(target_thread_local)]
-
-pub use super::thread_local_key::register_keyless_dtor as register_dtor;
diff --git a/std/src/sys/pal/windows/thread_local_key.rs b/std/src/sys/pal/windows/thread_local_key.rs
deleted file mode 100644
index e5ba619fc6ba4..0000000000000
--- a/std/src/sys/pal/windows/thread_local_key.rs
+++ /dev/null
@@ -1,351 +0,0 @@
-use crate::cell::UnsafeCell;
-use crate::ptr;
-use crate::sync::atomic::{
-    AtomicPtr, AtomicU32,
-    Ordering::{AcqRel, Acquire, Relaxed, Release},
-};
-use crate::sys::c;
-
-#[cfg(test)]
-mod tests;
-
-// Using a per-thread list avoids the problems in synchronizing global state.
-#[thread_local]
-#[cfg(target_thread_local)]
-static DESTRUCTORS: crate::cell::RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> =
-    crate::cell::RefCell::new(Vec::new());
-
-// Ensure this can never be inlined because otherwise this may break in dylibs.
-// See #44391.
-#[inline(never)]
-#[cfg(target_thread_local)]
-pub unsafe fn register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    dtors_used();
-    match DESTRUCTORS.try_borrow_mut() {
-        Ok(mut dtors) => dtors.push((t, dtor)),
-        Err(_) => rtabort!("global allocator may not use TLS"),
-    }
-}
-
-#[inline(never)] // See comment above
-#[cfg(target_thread_local)]
-/// Runs destructors. This should not be called until thread exit.
-unsafe fn run_keyless_dtors() {
-    // Drop all the destructors.
-    //
-    // Note: While this is potentially an infinite loop, it *should* be
-    // the case that this loop always terminates because we provide the
-    // guarantee that a TLS key cannot be set after it is flagged for
-    // destruction.
-    loop {
-        // Use a let-else binding to ensure the `RefCell` guard is dropped
-        // immediately. Otherwise, a panic would occur if a TLS destructor
-        // tries to access the list.
-        let Some((ptr, dtor)) = DESTRUCTORS.borrow_mut().pop() else {
-            break;
-        };
-        (dtor)(ptr);
-    }
-    // We're done so free the memory.
-    DESTRUCTORS.replace(Vec::new());
-}
-
-type Key = c::DWORD;
-type Dtor = unsafe extern "C" fn(*mut u8);
-
-// Turns out, like pretty much everything, Windows is pretty close the
-// functionality that Unix provides, but slightly different! In the case of
-// TLS, Windows does not provide an API to provide a destructor for a TLS
-// variable. This ends up being pretty crucial to this implementation, so we
-// need a way around this.
-//
-// The solution here ended up being a little obscure, but fear not, the
-// internet has informed me [1][2] that this solution is not unique (no way
-// I could have thought of it as well!). The key idea is to insert some hook
-// somewhere to run arbitrary code on thread termination. With this in place
-// we'll be able to run anything we like, including all TLS destructors!
-//
-// To accomplish this feat, we perform a number of threads, all contained
-// within this module:
-//
-// * All TLS destructors are tracked by *us*, not the Windows runtime. This
-//   means that we have a global list of destructors for each TLS key that
-//   we know about.
-// * When a thread exits, we run over the entire list and run dtors for all
-//   non-null keys. This attempts to match Unix semantics in this regard.
-//
-// For more details and nitty-gritty, see the code sections below!
-//
-// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
-// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
-
-pub struct StaticKey {
-    /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
-    /// is not a valid key value, this allows us to use zero as sentinel value
-    /// without risking overflow.
-    key: AtomicU32,
-    dtor: Option<Dtor>,
-    next: AtomicPtr<StaticKey>,
-    /// Currently, destructors cannot be unregistered, so we cannot use racy
-    /// initialization for keys. Instead, we need synchronize initialization.
-    /// Use the Windows-provided `Once` since it does not require TLS.
-    once: UnsafeCell<c::INIT_ONCE>,
-}
-
-impl StaticKey {
-    #[inline]
-    pub const fn new(dtor: Option<Dtor>) -> StaticKey {
-        StaticKey {
-            key: AtomicU32::new(0),
-            dtor,
-            next: AtomicPtr::new(ptr::null_mut()),
-            once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
-        }
-    }
-
-    #[inline]
-    pub unsafe fn set(&'static self, val: *mut u8) {
-        let r = c::TlsSetValue(self.key(), val.cast());
-        debug_assert_eq!(r, c::TRUE);
-    }
-
-    #[inline]
-    pub unsafe fn get(&'static self) -> *mut u8 {
-        c::TlsGetValue(self.key()).cast()
-    }
-
-    #[inline]
-    unsafe fn key(&'static self) -> Key {
-        match self.key.load(Acquire) {
-            0 => self.init(),
-            key => key - 1,
-        }
-    }
-
-    #[cold]
-    unsafe fn init(&'static self) -> Key {
-        if self.dtor.is_some() {
-            dtors_used();
-            let mut pending = c::FALSE;
-            let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());
-            assert_eq!(r, c::TRUE);
-
-            if pending == c::FALSE {
-                // Some other thread initialized the key, load it.
-                self.key.load(Relaxed) - 1
-            } else {
-                let key = c::TlsAlloc();
-                if key == c::TLS_OUT_OF_INDEXES {
-                    // Wakeup the waiting threads before panicking to avoid deadlock.
-                    c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut());
-                    panic!("out of TLS indexes");
-                }
-
-                register_dtor(self);
-
-                // Release-storing the key needs to be the last thing we do.
-                // This is because in `fn key()`, other threads will do an acquire load of the key,
-                // and if that sees this write then it will entirely bypass the `InitOnce`. We thus
-                // need to establish synchronization through `key`. In particular that acquire load
-                // must happen-after the register_dtor above, to ensure the dtor actually runs!
-                self.key.store(key + 1, Release);
-
-                let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut());
-                debug_assert_eq!(r, c::TRUE);
-
-                key
-            }
-        } else {
-            // If there is no destructor to clean up, we can use racy initialization.
-
-            let key = c::TlsAlloc();
-            assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");
-
-            match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
-                Ok(_) => key,
-                Err(new) => {
-                    // Some other thread completed initialization first, so destroy
-                    // our key and use theirs.
-                    let r = c::TlsFree(key);
-                    debug_assert_eq!(r, c::TRUE);
-                    new - 1
-                }
-            }
-        }
-    }
-}
-
-unsafe impl Send for StaticKey {}
-unsafe impl Sync for StaticKey {}
-
-// -------------------------------------------------------------------------
-// Dtor registration
-//
-// Windows has no native support for running destructors so we manage our own
-// list of destructors to keep track of how to destroy keys. We then install a
-// callback later to get invoked whenever a thread exits, running all
-// appropriate destructors.
-//
-// Currently unregistration from this list is not supported. A destructor can be
-// registered but cannot be unregistered. There's various simplifying reasons
-// for doing this, the big ones being:
-//
-// 1. Currently we don't even support deallocating TLS keys, so normal operation
-//    doesn't need to deallocate a destructor.
-// 2. There is no point in time where we know we can unregister a destructor
-//    because it could always be getting run by some remote thread.
-//
-// Typically processes have a statically known set of TLS keys which is pretty
-// small, and we'd want to keep this memory alive for the whole process anyway
-// really.
-
-static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut());
-
-/// Should only be called once per key, otherwise loops or breaks may occur in
-/// the linked list.
-unsafe fn register_dtor(key: &'static StaticKey) {
-    // Ensure this is never run when native thread locals are available.
-    assert_eq!(false, cfg!(target_thread_local));
-    let this = <*const StaticKey>::cast_mut(key);
-    // Use acquire ordering to pass along the changes done by the previously
-    // registered keys when we store the new head with release ordering.
-    let mut head = DTORS.load(Acquire);
-    loop {
-        key.next.store(head, Relaxed);
-        match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
-            Ok(_) => break,
-            Err(new) => head = new,
-        }
-    }
-}
-
-// -------------------------------------------------------------------------
-// Where the Magic (TM) Happens
-//
-// If you're looking at this code, and wondering "what is this doing?",
-// you're not alone! I'll try to break this down step by step:
-//
-// # What's up with CRT$XLB?
-//
-// For anything about TLS destructors to work on Windows, we have to be able
-// to run *something* when a thread exits. To do so, we place a very special
-// static in a very special location. If this is encoded in just the right
-// way, the kernel's loader is apparently nice enough to run some function
-// of ours whenever a thread exits! How nice of the kernel!
-//
-// Lots of detailed information can be found in source [1] above, but the
-// gist of it is that this is leveraging a feature of Microsoft's PE format
-// (executable format) which is not actually used by any compilers today.
-// This apparently translates to any callbacks in the ".CRT$XLB" section
-// being run on certain events.
-//
-// So after all that, we use the compiler's #[link_section] feature to place
-// a callback pointer into the magic section so it ends up being called.
-//
-// # What's up with this callback?
-//
-// The callback specified receives a number of parameters from... someone!
-// (the kernel? the runtime? I'm not quite sure!) There are a few events that
-// this gets invoked for, but we're currently only interested on when a
-// thread or a process "detaches" (exits). The process part happens for the
-// last thread and the thread part happens for any normal thread.
-//
-// # Ok, what's up with running all these destructors?
-//
-// This will likely need to be improved over time, but this function
-// attempts a "poor man's" destructor callback system. Once we've got a list
-// of what to run, we iterate over all keys, check their values, and then run
-// destructors if the values turn out to be non null (setting them to null just
-// beforehand). We do this a few times in a loop to basically match Unix
-// semantics. If we don't reach a fixed point after a short while then we just
-// inevitably leak something most likely.
-//
-// # The article mentions weird stuff about "/INCLUDE"?
-//
-// It sure does! Specifically we're talking about this quote:
-//
-//      The Microsoft run-time library facilitates this process by defining a
-//      memory image of the TLS Directory and giving it the special name
-//      “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
-//      linker looks for this memory image and uses the data there to create the
-//      TLS Directory. Other compilers that support TLS and work with the
-//      Microsoft linker must use this same technique.
-//
-// Basically what this means is that if we want support for our TLS
-// destructors/our hook being called then we need to make sure the linker does
-// not omit this symbol. Otherwise it will omit it and our callback won't be
-// wired up.
-//
-// We don't actually use the `/INCLUDE` linker flag here like the article
-// mentions because the Rust compiler doesn't propagate linker flags, but
-// instead we use a shim function which performs a volatile 1-byte load from
-// the address of the symbol to ensure it sticks around.
-
-#[link_section = ".CRT$XLB"]
-#[cfg_attr(miri, used)] // Miri only considers explicitly `#[used]` statics for `lookup_link_section`
-pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
-    on_tls_callback;
-
-fn dtors_used() {
-    // we don't want LLVM eliminating p_thread_callback when destructors are used.
-    // when the symbol makes it to the linker the linker will take over
-    unsafe { crate::intrinsics::volatile_load(&p_thread_callback) };
-}
-
-unsafe extern "system" fn on_tls_callback(_h: c::LPVOID, dwReason: c::DWORD, _pv: c::LPVOID) {
-    if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
-        #[cfg(not(target_thread_local))]
-        run_dtors();
-        #[cfg(target_thread_local)]
-        run_keyless_dtors();
-    }
-
-    // See comments above for what this is doing. Note that we don't need this
-    // trickery on GNU windows, just on MSVC.
-    #[cfg(all(target_env = "msvc", not(target_thread_local)))]
-    {
-        extern "C" {
-            static _tls_used: u8;
-        }
-        crate::intrinsics::volatile_load(&_tls_used);
-    }
-}
-
-#[cfg(not(target_thread_local))]
-unsafe fn run_dtors() {
-    for _ in 0..5 {
-        let mut any_run = false;
-
-        // Use acquire ordering to observe key initialization.
-        let mut cur = DTORS.load(Acquire);
-        while !cur.is_null() {
-            let pre_key = (*cur).key.load(Acquire);
-            let dtor = (*cur).dtor.unwrap();
-            cur = (*cur).next.load(Relaxed);
-
-            // In StaticKey::init, we register the dtor before setting `key`.
-            // So if one thread's `run_dtors` races with another thread executing `init` on the same
-            // `StaticKey`, we can encounter a key of 0 here. That means this key was never
-            // initialized in this thread so we can safely skip it.
-            if pre_key == 0 {
-                continue;
-            }
-            // If this is non-zero, then via the `Acquire` load above we synchronized with
-            // everything relevant for this key. (It's not clear that this is needed, since the
-            // release-acquire pair on DTORS also establishes synchronization, but better safe than
-            // sorry.)
-            let key = pre_key - 1;
-
-            let ptr = c::TlsGetValue(key);
-            if !ptr.is_null() {
-                c::TlsSetValue(key, ptr::null_mut());
-                dtor(ptr as *mut _);
-                any_run = true;
-            }
-        }
-
-        if !any_run {
-            break;
-        }
-    }
-}
diff --git a/std/src/sys/pal/windows/thread_local_key/tests.rs b/std/src/sys/pal/windows/thread_local_key/tests.rs
deleted file mode 100644
index 4119f99096842..0000000000000
--- a/std/src/sys/pal/windows/thread_local_key/tests.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-// This file only tests the thread local key fallback.
-// Windows targets with native thread local support do not use this.
-#![cfg(not(target_thread_local))]
-
-use super::StaticKey;
-use crate::ptr;
-
-#[test]
-fn smoke() {
-    static K1: StaticKey = StaticKey::new(None);
-    static K2: StaticKey = StaticKey::new(None);
-
-    unsafe {
-        assert!(K1.get().is_null());
-        assert!(K2.get().is_null());
-        K1.set(ptr::without_provenance_mut(1));
-        K2.set(ptr::without_provenance_mut(2));
-        assert_eq!(K1.get() as usize, 1);
-        assert_eq!(K2.get() as usize, 2);
-    }
-}
-
-#[test]
-fn destructors() {
-    use crate::mem::ManuallyDrop;
-    use crate::sync::Arc;
-    use crate::thread;
-
-    unsafe extern "C" fn destruct(ptr: *mut u8) {
-        drop(Arc::from_raw(ptr as *const ()));
-    }
-
-    static KEY: StaticKey = StaticKey::new(Some(destruct));
-
-    let shared1 = Arc::new(());
-    let shared2 = Arc::clone(&shared1);
-
-    unsafe {
-        assert!(KEY.get().is_null());
-        KEY.set(Arc::into_raw(shared1) as *mut u8);
-    }
-
-    thread::spawn(move || unsafe {
-        assert!(KEY.get().is_null());
-        KEY.set(Arc::into_raw(shared2) as *mut u8);
-    })
-    .join()
-    .unwrap();
-
-    // Leak the Arc, let the TLS destructor clean it up.
-    let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(KEY.get() as *const ())) };
-    assert_eq!(
-        Arc::strong_count(&shared1),
-        1,
-        "destructor should have dropped the other reference on thread exit"
-    );
-}
diff --git a/std/src/sys/pal/windows/time.rs b/std/src/sys/pal/windows/time.rs
index 09e78a29304f9..b853daeffebd7 100644
--- a/std/src/sys/pal/windows/time.rs
+++ b/std/src/sys/pal/windows/time.rs
@@ -76,8 +76,8 @@ impl SystemTime {
     fn from_intervals(intervals: i64) -> SystemTime {
         SystemTime {
             t: c::FILETIME {
-                dwLowDateTime: intervals as c::DWORD,
-                dwHighDateTime: (intervals >> 32) as c::DWORD,
+                dwLowDateTime: intervals as u32,
+                dwHighDateTime: (intervals >> 32) as u32,
             },
         }
     }
@@ -172,7 +172,7 @@ mod perf_counter {
     use crate::time::Duration;
 
     pub struct PerformanceCounterInstant {
-        ts: c::LARGE_INTEGER,
+        ts: i64,
     }
     impl PerformanceCounterInstant {
         pub fn now() -> Self {
@@ -196,7 +196,7 @@ mod perf_counter {
         }
     }
 
-    fn frequency() -> c::LARGE_INTEGER {
+    fn frequency() -> i64 {
         // Either the cached result of `QueryPerformanceFrequency` or `0` for
         // uninitialized. Storing this as a single `AtomicU64` allows us to use
         // `Relaxed` operations, as we are only interested in the effects on a
@@ -206,7 +206,7 @@ mod perf_counter {
         let cached = FREQUENCY.load(Ordering::Relaxed);
         // If a previous thread has filled in this global state, use that.
         if cached != 0 {
-            return cached as c::LARGE_INTEGER;
+            return cached as i64;
         }
         // ... otherwise learn for ourselves ...
         let mut frequency = 0;
@@ -218,8 +218,8 @@ mod perf_counter {
         frequency
     }
 
-    fn query() -> c::LARGE_INTEGER {
-        let mut qpc_value: c::LARGE_INTEGER = 0;
+    fn query() -> i64 {
+        let mut qpc_value: i64 = 0;
         cvt(unsafe { c::QueryPerformanceCounter(&mut qpc_value) }).unwrap();
         qpc_value
     }
diff --git a/std/src/sys/pal/xous/mod.rs b/std/src/sys/pal/xous/mod.rs
index 68189bcc2e377..961d45c5e834f 100644
--- a/std/src/sys/pal/xous/mod.rs
+++ b/std/src/sys/pal/xous/mod.rs
@@ -1,4 +1,4 @@
-#![deny(unsafe_op_in_unsafe_fn)]
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 pub mod alloc;
 #[path = "../unsupported/args.rs"]
@@ -17,7 +17,6 @@ pub mod pipe;
 pub mod process;
 pub mod stdio;
 pub mod thread;
-pub mod thread_local_key;
 pub mod time;
 
 #[path = "../unsupported/common.rs"]
diff --git a/std/src/sys/pal/xous/os.rs b/std/src/sys/pal/xous/os.rs
index 8d2eaee8aa617..9be09eed62989 100644
--- a/std/src/sys/pal/xous/os.rs
+++ b/std/src/sys/pal/xous/os.rs
@@ -149,11 +149,11 @@ pub fn getenv(_: &OsStr) -> Option<OsString> {
     None
 }
 
-pub fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot set env vars on this platform"))
 }
 
-pub fn unsetenv(_: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(_: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot unset env vars on this platform"))
 }
 
diff --git a/std/src/sys/pal/xous/thread.rs b/std/src/sys/pal/xous/thread.rs
index da7d722cc7082..279f24f9ee8e4 100644
--- a/std/src/sys/pal/xous/thread.rs
+++ b/std/src/sys/pal/xous/thread.rs
@@ -81,7 +81,7 @@ impl Thread {
             // Destroy TLS, which will free the TLS page and call the destructor for
             // any thread local storage (if any).
             unsafe {
-                crate::sys::thread_local_key::destroy_tls();
+                crate::sys::thread_local::key::destroy_tls();
             }
 
             // Deallocate the stack memory, along with the guard pages. Afterwards,
diff --git a/std/src/sys/pal/zkvm/alloc.rs b/std/src/sys/pal/zkvm/alloc.rs
index fd333f1215150..2fdca22352470 100644
--- a/std/src/sys/pal/zkvm/alloc.rs
+++ b/std/src/sys/pal/zkvm/alloc.rs
@@ -5,7 +5,7 @@ use crate::alloc::{GlobalAlloc, Layout, System};
 unsafe impl GlobalAlloc for System {
     #[inline]
     unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
-        abi::sys_alloc_aligned(layout.size(), layout.align())
+        unsafe { abi::sys_alloc_aligned(layout.size(), layout.align()) }
     }
 
     #[inline]
diff --git a/std/src/sys/pal/zkvm/mod.rs b/std/src/sys/pal/zkvm/mod.rs
index 0b22eabca6d82..651f25d66236b 100644
--- a/std/src/sys/pal/zkvm/mod.rs
+++ b/std/src/sys/pal/zkvm/mod.rs
@@ -6,6 +6,7 @@
 //! This is all super highly experimental and not actually intended for
 //! wide/production use yet, it's still all in the experimental category. This
 //! will likely change over time.
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 const WORD_SIZE: usize = core::mem::size_of::<u32>();
 
@@ -25,7 +26,6 @@ pub mod pipe;
 #[path = "../unsupported/process.rs"]
 pub mod process;
 pub mod stdio;
-pub mod thread_local_key;
 #[path = "../unsupported/time.rs"]
 pub mod time;
 
diff --git a/std/src/sys/pal/zkvm/os.rs b/std/src/sys/pal/zkvm/os.rs
index 759beb2d306b9..e7d6cd52a258e 100644
--- a/std/src/sys/pal/zkvm/os.rs
+++ b/std/src/sys/pal/zkvm/os.rs
@@ -115,11 +115,11 @@ pub fn getenv(varname: &OsStr) -> Option<OsString> {
     Some(OsString::from_inner(os_str::Buf { inner: u8s.to_vec() }))
 }
 
-pub fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
+pub unsafe fn setenv(_: &OsStr, _: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot set env vars on this platform"))
 }
 
-pub fn unsetenv(_: &OsStr) -> io::Result<()> {
+pub unsafe fn unsetenv(_: &OsStr) -> io::Result<()> {
     Err(io::const_io_error!(io::ErrorKind::Unsupported, "cannot unset env vars on this platform"))
 }
 
diff --git a/std/src/sys/pal/zkvm/thread_local_key.rs b/std/src/sys/pal/zkvm/thread_local_key.rs
deleted file mode 100644
index 2f67924c61823..0000000000000
--- a/std/src/sys/pal/zkvm/thread_local_key.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-use crate::alloc::{alloc, Layout};
-
-pub type Key = usize;
-
-#[inline]
-pub unsafe fn create(_dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
-    alloc(Layout::new::<*mut u8>()) as _
-}
-
-#[inline]
-pub unsafe fn set(key: Key, value: *mut u8) {
-    let key: *mut *mut u8 = core::ptr::with_exposed_provenance_mut(key);
-    *key = value;
-}
-
-#[inline]
-pub unsafe fn get(key: Key) -> *mut u8 {
-    let key: *mut *mut u8 = core::ptr::with_exposed_provenance_mut(key);
-    *key
-}
-
-#[inline]
-pub unsafe fn destroy(_key: Key) {}
diff --git a/std/src/sys/personality/dwarf/mod.rs b/std/src/sys/personality/dwarf/mod.rs
index 652fbe95a14d1..89f7f133e21b4 100644
--- a/std/src/sys/personality/dwarf/mod.rs
+++ b/std/src/sys/personality/dwarf/mod.rs
@@ -17,32 +17,30 @@ pub struct DwarfReader {
     pub ptr: *const u8,
 }
 
-#[repr(C, packed)]
-struct Unaligned<T>(T);
-
+#[forbid(unsafe_op_in_unsafe_fn)]
 impl DwarfReader {
     pub fn new(ptr: *const u8) -> DwarfReader {
         DwarfReader { ptr }
     }
 
-    // DWARF streams are packed, so e.g., a u32 would not necessarily be aligned
-    // on a 4-byte boundary. This may cause problems on platforms with strict
-    // alignment requirements. By wrapping data in a "packed" struct, we are
-    // telling the backend to generate "misalignment-safe" code.
+    /// Read a type T and then bump the pointer by that amount.
+    ///
+    /// DWARF streams are "packed", so all types must be read at align 1.
     pub unsafe fn read<T: Copy>(&mut self) -> T {
-        let Unaligned(result) = *(self.ptr as *const Unaligned<T>);
-        self.ptr = self.ptr.add(mem::size_of::<T>());
-        result
+        unsafe {
+            let result = self.ptr.cast::<T>().read_unaligned();
+            self.ptr = self.ptr.byte_add(mem::size_of::<T>());
+            result
+        }
     }
 
-    // ULEB128 and SLEB128 encodings are defined in Section 7.6 - "Variable
-    // Length Data".
+    /// ULEB128 and SLEB128 encodings are defined in Section 7.6 - "Variable Length Data".
     pub unsafe fn read_uleb128(&mut self) -> u64 {
         let mut shift: usize = 0;
         let mut result: u64 = 0;
         let mut byte: u8;
         loop {
-            byte = self.read::<u8>();
+            byte = unsafe { self.read::<u8>() };
             result |= ((byte & 0x7F) as u64) << shift;
             shift += 7;
             if byte & 0x80 == 0 {
@@ -57,7 +55,7 @@ impl DwarfReader {
         let mut result: u64 = 0;
         let mut byte: u8;
         loop {
-            byte = self.read::<u8>();
+            byte = unsafe { self.read::<u8>() };
             result |= ((byte & 0x7F) as u64) << shift;
             shift += 7;
             if byte & 0x80 == 0 {
diff --git a/std/src/sys/sync/condvar/itron.rs b/std/src/sys/sync/condvar/itron.rs
index 9b64d241efd12..3a3039889e98b 100644
--- a/std/src/sys/sync/condvar/itron.rs
+++ b/std/src/sys/sync/condvar/itron.rs
@@ -1,4 +1,5 @@
 //! POSIX conditional variable implementation based on user-space wait queues.
+
 use crate::sys::pal::itron::{
     abi, error::expect_success_aborting, spin::SpinMutex, task, time::with_tmos_strong,
 };
diff --git a/std/src/sys/sync/condvar/teeos.rs b/std/src/sys/sync/condvar/teeos.rs
index 0a931f407d2fa..6457da91c2a5d 100644
--- a/std/src/sys/sync/condvar/teeos.rs
+++ b/std/src/sys/sync/condvar/teeos.rs
@@ -76,16 +76,16 @@ impl Condvar {
 
     #[inline]
     pub unsafe fn wait(&self, mutex: &Mutex) {
-        let mutex = mutex::raw(mutex);
+        let mutex = unsafe { mutex::raw(mutex) };
         self.verify(mutex);
-        let r = libc::pthread_cond_wait(raw(self), mutex);
+        let r = unsafe { libc::pthread_cond_wait(raw(self), mutex) };
         debug_assert_eq!(r, 0);
     }
 
     pub unsafe fn wait_timeout(&self, mutex: &Mutex, dur: Duration) -> bool {
         use crate::sys::time::Timespec;
 
-        let mutex = mutex::raw(mutex);
+        let mutex = unsafe { mutex::raw(mutex) };
         self.verify(mutex);
 
         let timeout = Timespec::now(libc::CLOCK_MONOTONIC)
@@ -93,7 +93,7 @@ impl Condvar {
             .and_then(|t| t.to_timespec())
             .unwrap_or(TIMESPEC_MAX);
 
-        let r = pthread_cond_timedwait(raw(self), mutex, &timeout);
+        let r = unsafe { pthread_cond_timedwait(raw(self), mutex, &timeout) };
         assert!(r == libc::ETIMEDOUT || r == 0);
         r == 0
     }
diff --git a/std/src/sys/sync/mutex/futex.rs b/std/src/sys/sync/mutex/futex.rs
index 7427cae94d68a..81afa94b14787 100644
--- a/std/src/sys/sync/mutex/futex.rs
+++ b/std/src/sys/sync/mutex/futex.rs
@@ -1,19 +1,8 @@
-use crate::sync::atomic::{
-    self,
-    Ordering::{Acquire, Relaxed, Release},
-};
-use crate::sys::futex::{futex_wait, futex_wake};
-
-cfg_if::cfg_if! {
-if #[cfg(windows)] {
-    // On Windows we can have a smol futex
-    type Atomic = atomic::AtomicU8;
-    type State = u8;
-} else {
-    type Atomic = atomic::AtomicU32;
-    type State = u32;
-}
-}
+use crate::sync::atomic::Ordering::{Acquire, Relaxed, Release};
+use crate::sys::futex::{self, futex_wait, futex_wake};
+
+type Atomic = futex::SmallAtomic;
+type State = futex::SmallPrimitive;
 
 pub struct Mutex {
     futex: Atomic,
diff --git a/std/src/sys/sync/mutex/itron.rs b/std/src/sys/sync/mutex/itron.rs
index a134eb2d1beca..4ba32a8fbcd69 100644
--- a/std/src/sys/sync/mutex/itron.rs
+++ b/std/src/sys/sync/mutex/itron.rs
@@ -1,5 +1,6 @@
 //! Mutex implementation backed by μITRON mutexes. Assumes `acre_mtx` and
 //! `TA_INHERIT` are available.
+
 use crate::sys::pal::itron::{
     abi,
     error::{expect_success, expect_success_aborting, fail, ItronError},
diff --git a/std/src/sys/sync/mutex/windows7.rs b/std/src/sys/sync/mutex/windows7.rs
index ef2f84082cd5c..689dba10f01ed 100644
--- a/std/src/sys/sync/mutex/windows7.rs
+++ b/std/src/sys/sync/mutex/windows7.rs
@@ -25,7 +25,7 @@ unsafe impl Send for Mutex {}
 unsafe impl Sync for Mutex {}
 
 #[inline]
-pub unsafe fn raw(m: &Mutex) -> c::PSRWLOCK {
+pub unsafe fn raw(m: &Mutex) -> *mut c::SRWLOCK {
     m.srwlock.get()
 }
 
diff --git a/std/src/sys/sync/once/futex.rs b/std/src/sys/sync/once/futex.rs
index 609085dcd4712..8a231e65ad134 100644
--- a/std/src/sys/sync/once/futex.rs
+++ b/std/src/sys/sync/once/futex.rs
@@ -57,7 +57,7 @@ impl<'a> Drop for CompletionGuard<'a> {
         // up on the Once. `futex_wake_all` does its own synchronization, hence
         // we do not need `AcqRel`.
         if self.state.swap(self.set_state_on_drop_to, Release) == QUEUED {
-            futex_wake_all(&self.state);
+            futex_wake_all(self.state);
         }
     }
 }
diff --git a/std/src/sys/sync/rwlock/solid.rs b/std/src/sys/sync/rwlock/solid.rs
index 9bf6f5dbb731e..7558eee8edd33 100644
--- a/std/src/sys/sync/rwlock/solid.rs
+++ b/std/src/sys/sync/rwlock/solid.rs
@@ -1,4 +1,5 @@
 //! A readers-writer lock implementation backed by the SOLID kernel extension.
+
 use crate::sys::pal::{
     abi,
     itron::{
diff --git a/std/src/sys/sync/thread_parking/futex.rs b/std/src/sys/sync/thread_parking/futex.rs
index 588e7b27826f6..034eececb2a28 100644
--- a/std/src/sys/sync/thread_parking/futex.rs
+++ b/std/src/sys/sync/thread_parking/futex.rs
@@ -1,15 +1,18 @@
+#![forbid(unsafe_op_in_unsafe_fn)]
 use crate::pin::Pin;
-use crate::sync::atomic::AtomicU32;
 use crate::sync::atomic::Ordering::{Acquire, Release};
-use crate::sys::futex::{futex_wait, futex_wake};
+use crate::sys::futex::{self, futex_wait, futex_wake};
 use crate::time::Duration;
 
-const PARKED: u32 = u32::MAX;
-const EMPTY: u32 = 0;
-const NOTIFIED: u32 = 1;
+type Atomic = futex::SmallAtomic;
+type State = futex::SmallPrimitive;
+
+const PARKED: State = State::MAX;
+const EMPTY: State = 0;
+const NOTIFIED: State = 1;
 
 pub struct Parker {
-    state: AtomicU32,
+    state: Atomic,
 }
 
 // Notes about memory ordering:
@@ -36,7 +39,7 @@ impl Parker {
     /// Construct the futex parker. The UNIX parker implementation
     /// requires this to happen in-place.
     pub unsafe fn new_in_place(parker: *mut Parker) {
-        parker.write(Self { state: AtomicU32::new(EMPTY) });
+        unsafe { parker.write(Self { state: Atomic::new(EMPTY) }) };
     }
 
     // Assumes this is only called by the thread that owns the Parker,
diff --git a/std/src/sys/sync/thread_parking/mod.rs b/std/src/sys/sync/thread_parking/mod.rs
index ed1a6437faaaf..0ebc5e093ee2a 100644
--- a/std/src/sys/sync/thread_parking/mod.rs
+++ b/std/src/sys/sync/thread_parking/mod.rs
@@ -1,5 +1,6 @@
 cfg_if::cfg_if! {
     if #[cfg(any(
+        all(target_os = "windows", not(target_vendor = "win7")),
         target_os = "linux",
         target_os = "android",
         all(target_arch = "wasm32", target_feature = "atomics"),
@@ -18,9 +19,9 @@ cfg_if::cfg_if! {
     ))] {
         mod id;
         pub use id::Parker;
-    } else if #[cfg(target_os = "windows")] {
-        mod windows;
-        pub use windows::Parker;
+    } else if #[cfg(target_vendor = "win7")] {
+        mod windows7;
+        pub use windows7::Parker;
     } else if #[cfg(all(target_vendor = "apple", not(miri)))] {
         mod darwin;
         pub use darwin::Parker;
diff --git a/std/src/sys/sync/thread_parking/windows.rs b/std/src/sys/sync/thread_parking/windows7.rs
similarity index 97%
rename from std/src/sys/sync/thread_parking/windows.rs
rename to std/src/sys/sync/thread_parking/windows7.rs
index 4b8102d505a1f..3a8d40dc5cfac 100644
--- a/std/src/sys/sync/thread_parking/windows.rs
+++ b/std/src/sys/sync/thread_parking/windows7.rs
@@ -64,6 +64,7 @@ use crate::sync::atomic::{
 };
 use crate::sys::{c, dur2timeout};
 use crate::time::Duration;
+use core::ffi::c_void;
 
 pub struct Parker {
     state: AtomicI8,
@@ -117,7 +118,7 @@ impl Parker {
 
         loop {
             // Wait for something to happen, assuming it's still set to PARKED.
-            c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE);
+            c::WaitOnAddress(self.ptr(), &PARKED as *const _ as *const c_void, 1, c::INFINITE);
             // Change NOTIFIED=>EMPTY but leave PARKED alone.
             if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() {
                 // Actually woken up by unpark().
@@ -144,7 +145,7 @@ impl Parker {
         }
 
         // Wait for something to happen, assuming it's still set to PARKED.
-        c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout));
+        c::WaitOnAddress(self.ptr(), &PARKED as *const _ as *const c_void, 1, dur2timeout(timeout));
         // Set the state back to EMPTY (from either PARKED or NOTIFIED).
         // Note that we don't just write EMPTY, but use swap() to also
         // include an acquire-ordered read to synchronize with unpark()'s
@@ -177,8 +178,8 @@ impl Parker {
         }
     }
 
-    fn ptr(&self) -> c::LPVOID {
-        core::ptr::addr_of!(self.state) as c::LPVOID
+    fn ptr(&self) -> *const c_void {
+        core::ptr::addr_of!(self.state).cast::<c_void>()
     }
 }
 
diff --git a/std/src/sys/thread_local/destructors/linux_like.rs b/std/src/sys/thread_local/destructors/linux_like.rs
new file mode 100644
index 0000000000000..c381be0bf8c76
--- /dev/null
+++ b/std/src/sys/thread_local/destructors/linux_like.rs
@@ -0,0 +1,58 @@
+//! Destructor registration for Linux-like systems.
+//!
+//! Since what appears to be version 2.18, glibc has shipped the
+//! `__cxa_thread_atexit_impl` symbol which GCC and clang both use to invoke
+//! destructors in C++ thread_local globals. This function does exactly what
+//! we want: it schedules a callback which will be run at thread exit with the
+//! provided argument.
+//!
+//! Unfortunately, our minimum supported glibc version (at the time of writing)
+//! is 2.17, so we can only link this symbol weakly and need to use the
+//! [`list`](super::list) destructor implementation as fallback.
+
+use crate::mem::transmute;
+
+// FIXME: The Rust compiler currently omits weakly function definitions (i.e.,
+// __cxa_thread_atexit_impl) and its metadata from LLVM IR.
+#[no_sanitize(cfi, kcfi)]
+pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
+    /// This is necessary because the __cxa_thread_atexit_impl implementation
+    /// std links to by default may be a C or C++ implementation that was not
+    /// compiled using the Clang integer normalization option.
+    #[cfg(sanitizer_cfi_normalize_integers)]
+    use core::ffi::c_int;
+    #[cfg(not(sanitizer_cfi_normalize_integers))]
+    #[cfi_encoding = "i"]
+    #[repr(transparent)]
+    #[allow(non_camel_case_types)]
+    pub struct c_int(#[allow(dead_code)] pub core::ffi::c_int);
+
+    extern "C" {
+        #[linkage = "extern_weak"]
+        static __dso_handle: *mut u8;
+        #[linkage = "extern_weak"]
+        static __cxa_thread_atexit_impl: Option<
+            extern "C" fn(
+                unsafe extern "C" fn(*mut libc::c_void),
+                *mut libc::c_void,
+                *mut libc::c_void,
+            ) -> c_int,
+        >;
+    }
+
+    if let Some(f) = unsafe { __cxa_thread_atexit_impl } {
+        unsafe {
+            f(
+                transmute::<unsafe extern "C" fn(*mut u8), unsafe extern "C" fn(*mut libc::c_void)>(
+                    dtor,
+                ),
+                t.cast(),
+                core::ptr::addr_of!(__dso_handle) as *mut _,
+            );
+        }
+    } else {
+        unsafe {
+            super::list::register(t, dtor);
+        }
+    }
+}
diff --git a/std/src/sys/thread_local/destructors/list.rs b/std/src/sys/thread_local/destructors/list.rs
new file mode 100644
index 0000000000000..b9d5214c438d2
--- /dev/null
+++ b/std/src/sys/thread_local/destructors/list.rs
@@ -0,0 +1,44 @@
+use crate::cell::RefCell;
+use crate::sys::thread_local::guard;
+
+#[thread_local]
+static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
+
+pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
+    let Ok(mut dtors) = DTORS.try_borrow_mut() else {
+        // This point can only be reached if the global allocator calls this
+        // function again.
+        // FIXME: maybe use the system allocator instead?
+        rtabort!("the global allocator may not use TLS with destructors");
+    };
+
+    guard::enable();
+
+    dtors.push((t, dtor));
+}
+
+/// The [`guard`] module contains platform-specific functions which will run this
+/// function on thread exit if [`guard::enable`] has been called.
+///
+/// # Safety
+///
+/// May only be run on thread exit to guarantee that there are no live references
+/// to TLS variables while they are destroyed.
+pub unsafe fn run() {
+    loop {
+        let mut dtors = DTORS.borrow_mut();
+        match dtors.pop() {
+            Some((t, dtor)) => {
+                drop(dtors);
+                unsafe {
+                    dtor(t);
+                }
+            }
+            None => {
+                // Free the list memory.
+                *dtors = Vec::new();
+                break;
+            }
+        }
+    }
+}
diff --git a/std/src/sys/thread_local/guard/apple.rs b/std/src/sys/thread_local/guard/apple.rs
new file mode 100644
index 0000000000000..6c27f7ae35cba
--- /dev/null
+++ b/std/src/sys/thread_local/guard/apple.rs
@@ -0,0 +1,31 @@
+//! macOS allows registering destructors through _tlv_atexit. But since calling
+//! it while TLS destructors are running is UB, we still need to keep our own
+//! list of destructors.
+
+use crate::cell::Cell;
+use crate::ptr;
+use crate::sys::thread_local::destructors;
+
+pub fn enable() {
+    #[thread_local]
+    static REGISTERED: Cell<bool> = Cell::new(false);
+
+    extern "C" {
+        fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8);
+    }
+
+    if !REGISTERED.replace(true) {
+        // SAFETY: Calling _tlv_atexit while TLS destructors are running is UB.
+        // But as run_dtors is only called after being registered, this point
+        // cannot be reached from it.
+        unsafe {
+            _tlv_atexit(run_dtors, ptr::null_mut());
+        }
+    }
+
+    unsafe extern "C" fn run_dtors(_: *mut u8) {
+        unsafe {
+            destructors::run();
+        }
+    }
+}
diff --git a/std/src/sys/thread_local/guard/key.rs b/std/src/sys/thread_local/guard/key.rs
new file mode 100644
index 0000000000000..67c3ca8862767
--- /dev/null
+++ b/std/src/sys/thread_local/guard/key.rs
@@ -0,0 +1,23 @@
+//! A lot of UNIX platforms don't have a specialized way to register TLS
+//! destructors for native TLS. Instead, we use one TLS key with a destructor
+//! that will run all native TLS destructors in the destructor list.
+
+use crate::ptr;
+use crate::sys::thread_local::destructors;
+use crate::sys::thread_local::key::{set, LazyKey};
+
+pub fn enable() {
+    static DTORS: LazyKey = LazyKey::new(Some(run));
+
+    // Setting the key value to something other than NULL will result in the
+    // destructor being run at thread exit.
+    unsafe {
+        set(DTORS.force(), ptr::without_provenance_mut(1));
+    }
+
+    unsafe extern "C" fn run(_: *mut u8) {
+        unsafe {
+            destructors::run();
+        }
+    }
+}
diff --git a/std/src/sys/thread_local/guard/solid.rs b/std/src/sys/thread_local/guard/solid.rs
new file mode 100644
index 0000000000000..b65d00c5b5fb7
--- /dev/null
+++ b/std/src/sys/thread_local/guard/solid.rs
@@ -0,0 +1,23 @@
+//! SOLID, just like macOS, has an API to register TLS destructors. But since
+//! it does not allow specifying an argument to that function, and will not run
+//! destructors for terminated tasks, we still keep our own list.
+
+use crate::cell::Cell;
+use crate::sys::pal::{abi, itron::task};
+use crate::sys::thread_local::destructors;
+
+pub fn enable() {
+    #[thread_local]
+    static REGISTERED: Cell<bool> = Cell::new(false);
+
+    if !REGISTERED.replace(true) {
+        let tid = task::current_task_id_aborting();
+        // Register `tls_dtor` to make sure the TLS destructors are called
+        // for tasks created by other means than `std::thread`
+        unsafe { abi::SOLID_TLS_AddDestructor(tid as i32, tls_dtor) };
+    }
+
+    unsafe extern "C" fn tls_dtor(_unused: *mut u8) {
+        unsafe { destructors::run() };
+    }
+}
diff --git a/std/src/sys/thread_local/guard/windows.rs b/std/src/sys/thread_local/guard/windows.rs
new file mode 100644
index 0000000000000..f6cd457046ffc
--- /dev/null
+++ b/std/src/sys/thread_local/guard/windows.rs
@@ -0,0 +1,104 @@
+//! Support for Windows TLS destructors.
+//!
+//! Unfortunately, Windows does not provide a nice API to provide a destructor
+//! for a TLS variable. Thus, the solution here ended up being a little more
+//! obscure, but fear not, the internet has informed me [1][2] that this solution
+//! is not unique (no way I could have thought of it as well!). The key idea is
+//! to insert some hook somewhere to run arbitrary code on thread termination.
+//! With this in place we'll be able to run anything we like, including all
+//! TLS destructors!
+//!
+//! In order to realize this, all TLS destructors are tracked by *us*, not the
+//! Windows runtime. This means that we have a global list of destructors for
+//! each TLS key or variable that we know about.
+//!
+//! # What's up with CRT$XLB?
+//!
+//! For anything about TLS destructors to work on Windows, we have to be able
+//! to run *something* when a thread exits. To do so, we place a very special
+//! static in a very special location. If this is encoded in just the right
+//! way, the kernel's loader is apparently nice enough to run some function
+//! of ours whenever a thread exits! How nice of the kernel!
+//!
+//! Lots of detailed information can be found in source [1] above, but the
+//! gist of it is that this is leveraging a feature of Microsoft's PE format
+//! (executable format) which is not actually used by any compilers today.
+//! This apparently translates to any callbacks in the ".CRT$XLB" section
+//! being run on certain events.
+//!
+//! So after all that, we use the compiler's #[link_section] feature to place
+//! a callback pointer into the magic section so it ends up being called.
+//!
+//! # What's up with this callback?
+//!
+//! The callback specified receives a number of parameters from... someone!
+//! (the kernel? the runtime? I'm not quite sure!) There are a few events that
+//! this gets invoked for, but we're currently only interested on when a
+//! thread or a process "detaches" (exits). The process part happens for the
+//! last thread and the thread part happens for any normal thread.
+//!
+//! # The article mentions weird stuff about "/INCLUDE"?
+//!
+//! It sure does! Specifically we're talking about this quote:
+//!
+//! ```quote
+//! The Microsoft run-time library facilitates this process by defining a
+//! memory image of the TLS Directory and giving it the special name
+//! “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
+//! linker looks for this memory image and uses the data there to create the
+//! TLS Directory. Other compilers that support TLS and work with the
+//! Microsoft linker must use this same technique.
+//! ```
+//!
+//! Basically what this means is that if we want support for our TLS
+//! destructors/our hook being called then we need to make sure the linker does
+//! not omit this symbol. Otherwise it will omit it and our callback won't be
+//! wired up.
+//!
+//! We don't actually use the `/INCLUDE` linker flag here like the article
+//! mentions because the Rust compiler doesn't propagate linker flags, but
+//! instead we use a shim function which performs a volatile 1-byte load from
+//! the address of the symbol to ensure it sticks around.
+//!
+//! [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
+//! [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
+
+use crate::ptr;
+use crate::sys::c;
+use core::ffi::c_void;
+
+pub fn enable() {
+    // When destructors are used, we don't want LLVM eliminating CALLBACK for any
+    // reason. Once the symbol makes it to the linker, it will do the rest.
+    unsafe { ptr::from_ref(&CALLBACK).read_volatile() };
+}
+
+#[link_section = ".CRT$XLB"]
+#[cfg_attr(miri, used)] // Miri only considers explicitly `#[used]` statics for `lookup_link_section`
+pub static CALLBACK: unsafe extern "system" fn(*mut c_void, u32, *mut c_void) = tls_callback;
+
+unsafe extern "system" fn tls_callback(_h: *mut c_void, dw_reason: u32, _pv: *mut c_void) {
+    // See comments above for what this is doing. Note that we don't need this
+    // trickery on GNU windows, just on MSVC.
+    #[cfg(all(target_env = "msvc", not(target_thread_local)))]
+    {
+        extern "C" {
+            static _tls_used: u8;
+        }
+
+        unsafe {
+            ptr::from_ref(&_tls_used).read_volatile();
+        }
+    }
+
+    if dw_reason == c::DLL_THREAD_DETACH || dw_reason == c::DLL_PROCESS_DETACH {
+        #[cfg(target_thread_local)]
+        unsafe {
+            super::super::destructors::run();
+        }
+        #[cfg(not(target_thread_local))]
+        unsafe {
+            super::super::key::run_dtors();
+        }
+    }
+}
diff --git a/std/src/sys/thread_local/key/racy.rs b/std/src/sys/thread_local/key/racy.rs
new file mode 100644
index 0000000000000..69f11458c3289
--- /dev/null
+++ b/std/src/sys/thread_local/key/racy.rs
@@ -0,0 +1,82 @@
+//! A `LazyKey` implementation using racy initialization.
+//!
+//! Unfortunately, none of the platforms currently supported by `std` allows
+//! creating TLS keys at compile-time. Thus we need a way to lazily create keys.
+//! Instead of blocking API like `OnceLock`, we use racy initialization, which
+//! should be more lightweight and avoids circular dependencies with the rest of
+//! `std`.
+
+use crate::sync::atomic::{self, AtomicUsize, Ordering};
+
+/// A type for TLS keys that are statically allocated.
+///
+/// This is basically a `LazyLock<Key>`, but avoids blocking and circular
+/// dependencies with the rest of `std`.
+pub struct LazyKey {
+    /// Inner static TLS key (internals).
+    key: AtomicUsize,
+    /// Destructor for the TLS value.
+    dtor: Option<unsafe extern "C" fn(*mut u8)>,
+}
+
+// Define a sentinel value that is likely not to be returned
+// as a TLS key.
+#[cfg(not(target_os = "nto"))]
+const KEY_SENTVAL: usize = 0;
+// On QNX Neutrino, 0 is always returned when currently not in use.
+// Using 0 would mean to always create two keys and remote the first
+// one (with value of 0) immediately afterwards.
+#[cfg(target_os = "nto")]
+const KEY_SENTVAL: usize = libc::PTHREAD_KEYS_MAX + 1;
+
+impl LazyKey {
+    #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
+    pub const fn new(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> LazyKey {
+        LazyKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor }
+    }
+
+    #[inline]
+    pub fn force(&self) -> super::Key {
+        match self.key.load(Ordering::Acquire) {
+            KEY_SENTVAL => self.lazy_init() as super::Key,
+            n => n as super::Key,
+        }
+    }
+
+    fn lazy_init(&self) -> usize {
+        // POSIX allows the key created here to be KEY_SENTVAL, but the compare_exchange
+        // below relies on using KEY_SENTVAL as a sentinel value to check who won the
+        // race to set the shared TLS key. As far as I know, there is no
+        // guaranteed value that cannot be returned as a posix_key_create key,
+        // so there is no value we can initialize the inner key with to
+        // prove that it has not yet been set. As such, we'll continue using a
+        // value of KEY_SENTVAL, but with some gyrations to make sure we have a non-KEY_SENTVAL
+        // value returned from the creation routine.
+        // FIXME: this is clearly a hack, and should be cleaned up.
+        let key1 = super::create(self.dtor);
+        let key = if key1 as usize != KEY_SENTVAL {
+            key1
+        } else {
+            let key2 = super::create(self.dtor);
+            unsafe {
+                super::destroy(key1);
+            }
+            key2
+        };
+        rtassert!(key as usize != KEY_SENTVAL);
+        match self.key.compare_exchange(
+            KEY_SENTVAL,
+            key as usize,
+            Ordering::Release,
+            Ordering::Acquire,
+        ) {
+            // The CAS succeeded, so we've created the actual key
+            Ok(_) => key as usize,
+            // If someone beat us to the punch, use their key instead
+            Err(n) => unsafe {
+                super::destroy(key);
+                n
+            },
+        }
+    }
+}
diff --git a/std/src/sys/pal/sgx/thread_local_key.rs b/std/src/sys/thread_local/key/sgx.rs
similarity index 74%
rename from std/src/sys/pal/sgx/thread_local_key.rs
rename to std/src/sys/thread_local/key/sgx.rs
index c7a57d3a3d47e..4aa2e5afa72ef 100644
--- a/std/src/sys/pal/sgx/thread_local_key.rs
+++ b/std/src/sys/thread_local/key/sgx.rs
@@ -1,9 +1,9 @@
-use super::abi::tls::{Key as AbiKey, Tls};
+use crate::sys::pal::abi::tls::{Key as AbiKey, Tls};
 
 pub type Key = usize;
 
 #[inline]
-pub unsafe fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
+pub fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
     Tls::create(dtor).as_usize()
 }
 
diff --git a/std/src/sys/thread_local/key/tests.rs b/std/src/sys/thread_local/key/tests.rs
new file mode 100644
index 0000000000000..d82b34e71f0e4
--- /dev/null
+++ b/std/src/sys/thread_local/key/tests.rs
@@ -0,0 +1,62 @@
+use super::{get, set, LazyKey};
+use crate::ptr;
+
+#[test]
+fn smoke() {
+    static K1: LazyKey = LazyKey::new(None);
+    static K2: LazyKey = LazyKey::new(None);
+
+    let k1 = K1.force();
+    let k2 = K2.force();
+    assert_ne!(k1, k2);
+
+    assert_eq!(K1.force(), k1);
+    assert_eq!(K2.force(), k2);
+
+    unsafe {
+        assert!(get(k1).is_null());
+        assert!(get(k2).is_null());
+        set(k1, ptr::without_provenance_mut(1));
+        set(k2, ptr::without_provenance_mut(2));
+        assert_eq!(get(k1) as usize, 1);
+        assert_eq!(get(k2) as usize, 2);
+    }
+}
+
+#[test]
+fn destructors() {
+    use crate::mem::ManuallyDrop;
+    use crate::sync::Arc;
+    use crate::thread;
+
+    unsafe extern "C" fn destruct(ptr: *mut u8) {
+        drop(unsafe { Arc::from_raw(ptr as *const ()) });
+    }
+
+    static KEY: LazyKey = LazyKey::new(Some(destruct));
+
+    let shared1 = Arc::new(());
+    let shared2 = Arc::clone(&shared1);
+
+    let key = KEY.force();
+    unsafe {
+        assert!(get(key).is_null());
+        set(key, Arc::into_raw(shared1) as *mut u8);
+    }
+
+    thread::spawn(move || unsafe {
+        let key = KEY.force();
+        assert!(get(key).is_null());
+        set(key, Arc::into_raw(shared2) as *mut u8);
+    })
+    .join()
+    .unwrap();
+
+    // Leak the Arc, let the TLS destructor clean it up.
+    let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(get(key) as *const ())) };
+    assert_eq!(
+        Arc::strong_count(&shared1),
+        1,
+        "destructor should have dropped the other reference on thread exit"
+    );
+}
diff --git a/std/src/sys/thread_local/key/unix.rs b/std/src/sys/thread_local/key/unix.rs
new file mode 100644
index 0000000000000..28e48a750b9bf
--- /dev/null
+++ b/std/src/sys/thread_local/key/unix.rs
@@ -0,0 +1,28 @@
+use crate::mem;
+
+pub type Key = libc::pthread_key_t;
+
+#[inline]
+pub fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
+    let mut key = 0;
+    assert_eq!(unsafe { libc::pthread_key_create(&mut key, mem::transmute(dtor)) }, 0);
+    key
+}
+
+#[inline]
+pub unsafe fn set(key: Key, value: *mut u8) {
+    let r = unsafe { libc::pthread_setspecific(key, value as *mut _) };
+    debug_assert_eq!(r, 0);
+}
+
+#[inline]
+#[cfg(any(not(target_thread_local), test))]
+pub unsafe fn get(key: Key) -> *mut u8 {
+    unsafe { libc::pthread_getspecific(key) as *mut u8 }
+}
+
+#[inline]
+pub unsafe fn destroy(key: Key) {
+    let r = unsafe { libc::pthread_key_delete(key) };
+    debug_assert_eq!(r, 0);
+}
diff --git a/std/src/sys/thread_local/key/windows.rs b/std/src/sys/thread_local/key/windows.rs
new file mode 100644
index 0000000000000..8b43e558d5d98
--- /dev/null
+++ b/std/src/sys/thread_local/key/windows.rs
@@ -0,0 +1,206 @@
+//! Implementation of `LazyKey` for Windows.
+//!
+//! Windows has no native support for running destructors so we manage our own
+//! list of destructors to keep track of how to destroy keys. We then install a
+//! callback later to get invoked whenever a thread exits, running all
+//! appropriate destructors (see the [`guard`](guard) module documentation).
+//!
+//! This will likely need to be improved over time, but this module attempts a
+//! "poor man's" destructor callback system. Once we've got a list of what to
+//! run, we iterate over all keys, check their values, and then run destructors
+//! if the values turn out to be non null (setting them to null just beforehand).
+//! We do this a few times in a loop to basically match Unix semantics. If we
+//! don't reach a fixed point after a short while then we just inevitably leak
+//! something.
+//!
+//! The list is implemented as an atomic single-linked list of `LazyKey`s and
+//! does not support unregistration. Unfortunately, this means that we cannot
+//! use racy initialization for creating the keys in `LazyKey`, as that could
+//! result in destructors being missed. Hence, we synchronize the creation of
+//! keys with destructors through [`INIT_ONCE`](c::INIT_ONCE) (`std`'s
+//! [`Once`](crate::sync::Once) cannot be used since it might use TLS itself).
+//! For keys without destructors, racy initialization suffices.
+
+// FIXME: investigate using a fixed-size array instead, as the maximum number
+//        of keys is [limited to 1088](https://learn.microsoft.com/en-us/windows/win32/ProcThread/thread-local-storage).
+
+use crate::cell::UnsafeCell;
+use crate::ptr;
+use crate::sync::atomic::{
+    AtomicPtr, AtomicU32,
+    Ordering::{AcqRel, Acquire, Relaxed, Release},
+};
+use crate::sys::c;
+use crate::sys::thread_local::guard;
+
+pub type Key = u32;
+type Dtor = unsafe extern "C" fn(*mut u8);
+
+pub struct LazyKey {
+    /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == u32::MAX
+    /// is not a valid key value, this allows us to use zero as sentinel value
+    /// without risking overflow.
+    key: AtomicU32,
+    dtor: Option<Dtor>,
+    next: AtomicPtr<LazyKey>,
+    /// Currently, destructors cannot be unregistered, so we cannot use racy
+    /// initialization for keys. Instead, we need synchronize initialization.
+    /// Use the Windows-provided `Once` since it does not require TLS.
+    once: UnsafeCell<c::INIT_ONCE>,
+}
+
+impl LazyKey {
+    #[inline]
+    pub const fn new(dtor: Option<Dtor>) -> LazyKey {
+        LazyKey {
+            key: AtomicU32::new(0),
+            dtor,
+            next: AtomicPtr::new(ptr::null_mut()),
+            once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
+        }
+    }
+
+    #[inline]
+    pub fn force(&'static self) -> Key {
+        match self.key.load(Acquire) {
+            0 => unsafe { self.init() },
+            key => key - 1,
+        }
+    }
+
+    #[cold]
+    unsafe fn init(&'static self) -> Key {
+        if self.dtor.is_some() {
+            let mut pending = c::FALSE;
+            let r = unsafe {
+                c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut())
+            };
+            assert_eq!(r, c::TRUE);
+
+            if pending == c::FALSE {
+                // Some other thread initialized the key, load it.
+                self.key.load(Relaxed) - 1
+            } else {
+                let key = unsafe { c::TlsAlloc() };
+                if key == c::TLS_OUT_OF_INDEXES {
+                    // Wakeup the waiting threads before panicking to avoid deadlock.
+                    unsafe {
+                        c::InitOnceComplete(
+                            self.once.get(),
+                            c::INIT_ONCE_INIT_FAILED,
+                            ptr::null_mut(),
+                        );
+                    }
+                    panic!("out of TLS indexes");
+                }
+
+                unsafe {
+                    register_dtor(self);
+                }
+
+                // Release-storing the key needs to be the last thing we do.
+                // This is because in `fn key()`, other threads will do an acquire load of the key,
+                // and if that sees this write then it will entirely bypass the `InitOnce`. We thus
+                // need to establish synchronization through `key`. In particular that acquire load
+                // must happen-after the register_dtor above, to ensure the dtor actually runs!
+                self.key.store(key + 1, Release);
+
+                let r = unsafe { c::InitOnceComplete(self.once.get(), 0, ptr::null_mut()) };
+                debug_assert_eq!(r, c::TRUE);
+
+                key
+            }
+        } else {
+            // If there is no destructor to clean up, we can use racy initialization.
+
+            let key = unsafe { c::TlsAlloc() };
+            assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");
+
+            match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
+                Ok(_) => key,
+                Err(new) => unsafe {
+                    // Some other thread completed initialization first, so destroy
+                    // our key and use theirs.
+                    let r = c::TlsFree(key);
+                    debug_assert_eq!(r, c::TRUE);
+                    new - 1
+                },
+            }
+        }
+    }
+}
+
+unsafe impl Send for LazyKey {}
+unsafe impl Sync for LazyKey {}
+
+#[inline]
+pub unsafe fn set(key: Key, val: *mut u8) {
+    let r = unsafe { c::TlsSetValue(key, val.cast()) };
+    debug_assert_eq!(r, c::TRUE);
+}
+
+#[inline]
+pub unsafe fn get(key: Key) -> *mut u8 {
+    unsafe { c::TlsGetValue(key).cast() }
+}
+
+static DTORS: AtomicPtr<LazyKey> = AtomicPtr::new(ptr::null_mut());
+
+/// Should only be called once per key, otherwise loops or breaks may occur in
+/// the linked list.
+unsafe fn register_dtor(key: &'static LazyKey) {
+    guard::enable();
+
+    let this = <*const LazyKey>::cast_mut(key);
+    // Use acquire ordering to pass along the changes done by the previously
+    // registered keys when we store the new head with release ordering.
+    let mut head = DTORS.load(Acquire);
+    loop {
+        key.next.store(head, Relaxed);
+        match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
+            Ok(_) => break,
+            Err(new) => head = new,
+        }
+    }
+}
+
+/// This will and must only be run by the destructor callback in [`guard`].
+pub unsafe fn run_dtors() {
+    for _ in 0..5 {
+        let mut any_run = false;
+
+        // Use acquire ordering to observe key initialization.
+        let mut cur = DTORS.load(Acquire);
+        while !cur.is_null() {
+            let pre_key = unsafe { (*cur).key.load(Acquire) };
+            let dtor = unsafe { (*cur).dtor.unwrap() };
+            cur = unsafe { (*cur).next.load(Relaxed) };
+
+            // In LazyKey::init, we register the dtor before setting `key`.
+            // So if one thread's `run_dtors` races with another thread executing `init` on the same
+            // `LazyKey`, we can encounter a key of 0 here. That means this key was never
+            // initialized in this thread so we can safely skip it.
+            if pre_key == 0 {
+                continue;
+            }
+            // If this is non-zero, then via the `Acquire` load above we synchronized with
+            // everything relevant for this key. (It's not clear that this is needed, since the
+            // release-acquire pair on DTORS also establishes synchronization, but better safe than
+            // sorry.)
+            let key = pre_key - 1;
+
+            let ptr = unsafe { c::TlsGetValue(key) };
+            if !ptr.is_null() {
+                unsafe {
+                    c::TlsSetValue(key, ptr::null_mut());
+                    dtor(ptr as *mut _);
+                    any_run = true;
+                }
+            }
+        }
+
+        if !any_run {
+            break;
+        }
+    }
+}
diff --git a/std/src/sys/pal/xous/thread_local_key.rs b/std/src/sys/thread_local/key/xous.rs
similarity index 73%
rename from std/src/sys/pal/xous/thread_local_key.rs
rename to std/src/sys/thread_local/key/xous.rs
index 6c29813c79dfd..5a837a33e190e 100644
--- a/std/src/sys/pal/xous/thread_local_key.rs
+++ b/std/src/sys/thread_local/key/xous.rs
@@ -1,3 +1,41 @@
+//! Thread Local Storage
+//!
+//! Currently, we are limited to 1023 TLS entries. The entries
+//! live in a page of memory that's unique per-process, and is
+//! stored in the `$tp` register. If this register is 0, then
+//! TLS has not been initialized and thread cleanup can be skipped.
+//!
+//! The index into this register is the `key`. This key is identical
+//! between all threads, but indexes a different offset within this
+//! pointer.
+//!
+//! # Dtor registration (stolen from Windows)
+//!
+//! Xous has no native support for running destructors so we manage our own
+//! list of destructors to keep track of how to destroy keys. When a thread
+//! or the process exits, `run_dtors` is called, which will iterate through
+//! the list and run the destructors.
+//!
+//! Currently unregistration from this list is not supported. A destructor can be
+//! registered but cannot be unregistered. There's various simplifying reasons
+//! for doing this, the big ones being:
+//!
+//! 1. Currently we don't even support deallocating TLS keys, so normal operation
+//!    doesn't need to deallocate a destructor.
+//! 2. There is no point in time where we know we can unregister a destructor
+//!    because it could always be getting run by some remote thread.
+//!
+//! Typically processes have a statically known set of TLS keys which is pretty
+//! small, and we'd want to keep this memory alive for the whole process anyway
+//! really.
+//!
+//! Perhaps one day we can fold the `Box` here into a static allocation,
+//! expanding the `LazyKey` structure to contain not only a slot for the TLS
+//! key but also a slot for the destructor queue on windows. An optimization for
+//! another day!
+
+// FIXME(joboet): implement support for native TLS instead.
+
 use crate::mem::ManuallyDrop;
 use crate::ptr;
 use crate::sync::atomic::AtomicPtr;
@@ -7,18 +45,7 @@ use core::arch::asm;
 
 use crate::os::xous::ffi::{map_memory, unmap_memory, MemoryFlags};
 
-/// Thread Local Storage
-///
-/// Currently, we are limited to 1023 TLS entries. The entries
-/// live in a page of memory that's unique per-process, and is
-/// stored in the `$tp` register. If this register is 0, then
-/// TLS has not been initialized and thread cleanup can be skipped.
-///
-/// The index into this register is the `key`. This key is identical
-/// between all threads, but indexes a different offset within this
-/// pointer.
 pub type Key = usize;
-
 pub type Dtor = unsafe extern "C" fn(*mut u8);
 
 const TLS_MEMORY_SIZE: usize = 4096;
@@ -89,7 +116,7 @@ fn tls_table() -> &'static mut [*mut u8] {
 }
 
 #[inline]
-pub unsafe fn create(dtor: Option<Dtor>) -> Key {
+pub fn create(dtor: Option<Dtor>) -> Key {
     // Allocate a new TLS key. These keys are shared among all threads.
     #[allow(unused_unsafe)]
     let key = unsafe { TLS_KEY_INDEX.fetch_add(1, Relaxed) };
@@ -118,32 +145,6 @@ pub unsafe fn destroy(_key: Key) {
     // lots of TLS variables, but in practice that's not an issue.
 }
 
-// -------------------------------------------------------------------------
-// Dtor registration (stolen from Windows)
-//
-// Xous has no native support for running destructors so we manage our own
-// list of destructors to keep track of how to destroy keys. We then install a
-// callback later to get invoked whenever a thread exits, running all
-// appropriate destructors.
-//
-// Currently unregistration from this list is not supported. A destructor can be
-// registered but cannot be unregistered. There's various simplifying reasons
-// for doing this, the big ones being:
-//
-// 1. Currently we don't even support deallocating TLS keys, so normal operation
-//    doesn't need to deallocate a destructor.
-// 2. There is no point in time where we know we can unregister a destructor
-//    because it could always be getting run by some remote thread.
-//
-// Typically processes have a statically known set of TLS keys which is pretty
-// small, and we'd want to keep this memory alive for the whole process anyway
-// really.
-//
-// Perhaps one day we can fold the `Box` here into a static allocation,
-// expanding the `StaticKey` structure to contain not only a slot for the TLS
-// key but also a slot for the destructor queue on windows. An optimization for
-// another day!
-
 struct Node {
     dtor: Dtor,
     key: Key,
diff --git a/std/src/sys/thread_local/mod.rs b/std/src/sys/thread_local/mod.rs
index 0a78a1a1cf02d..3d1b91a7ea095 100644
--- a/std/src/sys/thread_local/mod.rs
+++ b/std/src/sys/thread_local/mod.rs
@@ -1,27 +1,159 @@
-#![unstable(feature = "thread_local_internals", reason = "should not be necessary", issue = "none")]
-#![cfg_attr(test, allow(unused))]
+//! Implementation of the `thread_local` macro.
+//!
+//! There are three different thread-local implementations:
+//! * Some targets lack threading support, and hence have only one thread, so
+//!   the TLS data is stored in a normal `static`.
+//! * Some targets support TLS natively via the dynamic linker and C runtime.
+//! * On some targets, the OS provides a library-based TLS implementation. The
+//!   TLS data is heap-allocated and referenced using a TLS key.
+//!
+//! Each implementation provides a macro which generates the `LocalKey` `const`
+//! used to reference the TLS variable, along with the necessary helper structs
+//! to track the initialization/destruction state of the variable.
+//!
+//! Additionally, this module contains abstractions for the OS interfaces used
+//! for these implementations.
 
-// There are three thread-local implementations: "static", "fast", "OS".
-// The "OS" thread local key type is accessed via platform-specific API calls and is slow, while the
-// "fast" key type is accessed via code generated via LLVM, where TLS keys are set up by the linker.
-// "static" is for single-threaded platforms where a global static is sufficient.
+#![cfg_attr(test, allow(unused))]
+#![doc(hidden)]
+#![forbid(unsafe_op_in_unsafe_fn)]
+#![unstable(
+    feature = "thread_local_internals",
+    reason = "internal details of the thread_local macro",
+    issue = "none"
+)]
 
 cfg_if::cfg_if! {
-    if #[cfg(any(all(target_family = "wasm", not(target_feature = "atomics")), target_os = "uefi"))] {
-        #[doc(hidden)]
-        mod static_local;
-        #[doc(hidden)]
-        pub use static_local::{EagerStorage, LazyStorage, thread_local_inner};
+    if #[cfg(any(
+        all(target_family = "wasm", not(target_feature = "atomics")),
+        target_os = "uefi",
+        target_os = "zkvm",
+    ))] {
+        mod statik;
+        pub use statik::{EagerStorage, LazyStorage, thread_local_inner};
     } else if #[cfg(target_thread_local)] {
-        #[doc(hidden)]
-        mod fast_local;
-        #[doc(hidden)]
-        pub use fast_local::{EagerStorage, LazyStorage, thread_local_inner};
+        mod native;
+        pub use native::{EagerStorage, LazyStorage, thread_local_inner};
     } else {
-        #[doc(hidden)]
-        mod os_local;
-        #[doc(hidden)]
-        pub use os_local::{Key, thread_local_inner};
+        mod os;
+        pub use os::{Storage, thread_local_inner};
+    }
+}
+
+/// The native TLS implementation needs a way to register destructors for its data.
+/// This module contains platform-specific implementations of that register.
+///
+/// It turns out however that most platforms don't have a way to register a
+/// destructor for each variable. On these platforms, we keep track of the
+/// destructors ourselves and register (through the [`guard`] module) only a
+/// single callback that runs all of the destructors in the list.
+#[cfg(all(target_thread_local, not(all(target_family = "wasm", not(target_feature = "atomics")))))]
+pub(crate) mod destructors {
+    cfg_if::cfg_if! {
+        if #[cfg(any(
+            target_os = "linux",
+            target_os = "android",
+            target_os = "fuchsia",
+            target_os = "redox",
+            target_os = "hurd",
+            target_os = "netbsd",
+            target_os = "dragonfly"
+        ))] {
+            mod linux_like;
+            mod list;
+            pub(super) use linux_like::register;
+            pub(super) use list::run;
+        } else {
+            mod list;
+            pub(super) use list::register;
+            pub(crate) use list::run;
+        }
+    }
+}
+
+/// This module provides a way to schedule the execution of the destructor list
+/// on systems without a per-variable destructor system.
+mod guard {
+    cfg_if::cfg_if! {
+        if #[cfg(all(target_thread_local, target_vendor = "apple"))] {
+            mod apple;
+            pub(super) use apple::enable;
+        } else if #[cfg(target_os = "windows")] {
+            mod windows;
+            pub(super) use windows::enable;
+        } else if #[cfg(any(
+            all(target_family = "wasm", target_feature = "atomics"),
+        ))] {
+            pub(super) fn enable() {
+                // FIXME: Right now there is no concept of "thread exit", but
+                // this is likely going to show up at some point in the form of
+                // an exported symbol that the wasm runtime is going to be
+                // expected to call. For now we just leak everything, but if
+                // such a function starts to exist it will probably need to
+                // iterate the destructor list with this function:
+                #[allow(unused)]
+                use super::destructors::run;
+            }
+        } else if #[cfg(target_os = "hermit")] {
+            pub(super) fn enable() {}
+        } else if #[cfg(target_os = "solid_asp3")] {
+            mod solid;
+            pub(super) use solid::enable;
+        } else if #[cfg(all(target_thread_local, not(target_family = "wasm")))] {
+            mod key;
+            pub(super) use key::enable;
+        }
+    }
+}
+
+/// `const`-creatable TLS keys.
+///
+/// Most OSs without native TLS will provide a library-based way to create TLS
+/// storage. For each TLS variable, we create a key, which can then be used to
+/// reference an entry in a thread-local table. This then associates each key
+/// with a pointer which we can get and set to store our data.
+pub(crate) mod key {
+    cfg_if::cfg_if! {
+        if #[cfg(any(
+            all(
+                not(target_vendor = "apple"),
+                not(target_family = "wasm"),
+                target_family = "unix",
+            ),
+            target_os = "teeos",
+        ))] {
+            mod racy;
+            mod unix;
+            #[cfg(test)]
+            mod tests;
+            pub(super) use racy::LazyKey;
+            pub(super) use unix::{Key, set};
+            #[cfg(any(not(target_thread_local), test))]
+            pub(super) use unix::get;
+            use unix::{create, destroy};
+        } else if #[cfg(all(not(target_thread_local), target_os = "windows"))] {
+            #[cfg(test)]
+            mod tests;
+            mod windows;
+            pub(super) use windows::{Key, LazyKey, get, run_dtors, set};
+        } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
+            mod racy;
+            mod sgx;
+            #[cfg(test)]
+            mod tests;
+            pub(super) use racy::LazyKey;
+            pub(super) use sgx::{Key, get, set};
+            use sgx::{create, destroy};
+        } else if #[cfg(target_os = "xous")] {
+            mod racy;
+            #[cfg(test)]
+            mod tests;
+            mod xous;
+            pub(super) use racy::LazyKey;
+            pub(crate) use xous::destroy_tls;
+            pub(super) use xous::{Key, get, set};
+            use xous::{create, destroy};
+        }
     }
 }
 
diff --git a/std/src/sys/thread_local/fast_local/eager.rs b/std/src/sys/thread_local/native/eager.rs
similarity index 53%
rename from std/src/sys/thread_local/fast_local/eager.rs
rename to std/src/sys/thread_local/native/eager.rs
index c2bc580530ba4..99e5ae7fb9687 100644
--- a/std/src/sys/thread_local/fast_local/eager.rs
+++ b/std/src/sys/thread_local/native/eager.rs
@@ -1,7 +1,7 @@
 use crate::cell::{Cell, UnsafeCell};
 use crate::ptr::{self, drop_in_place};
 use crate::sys::thread_local::abort_on_dtor_unwind;
-use crate::sys::thread_local_dtor::register_dtor;
+use crate::sys::thread_local::destructors;
 
 #[derive(Clone, Copy)]
 enum State {
@@ -21,43 +21,35 @@ impl<T> Storage<T> {
         Storage { state: Cell::new(State::Initial), val: UnsafeCell::new(val) }
     }
 
-    /// Get a reference to the TLS value. If the TLS variable has been destroyed,
-    /// `None` is returned.
+    /// Get a pointer to the TLS value. If the TLS variable has been destroyed,
+    /// a null pointer is returned.
     ///
-    /// # Safety
-    /// * The `self` reference must remain valid until the TLS destructor has been
-    ///   run.
-    /// * The returned reference may only be used until thread destruction occurs
-    ///   and may not be used after reentrant initialization has occurred.
+    /// The resulting pointer may not be used after thread destruction has
+    /// occurred.
     ///
-    // FIXME(#110897): return NonNull instead of lying about the lifetime.
+    /// # Safety
+    /// The `self` reference must remain valid until the TLS destructor is run.
     #[inline]
-    pub unsafe fn get(&self) -> Option<&'static T> {
+    pub unsafe fn get(&self) -> *const T {
         match self.state.get() {
-            // SAFETY: as the state is not `Destroyed`, the value cannot have
-            // been destroyed yet. The reference fulfills the terms outlined
-            // above.
-            State::Alive => unsafe { Some(&*self.val.get()) },
-            State::Destroyed => None,
+            State::Alive => self.val.get(),
+            State::Destroyed => ptr::null(),
             State::Initial => unsafe { self.initialize() },
         }
     }
 
     #[cold]
-    unsafe fn initialize(&self) -> Option<&'static T> {
+    unsafe fn initialize(&self) -> *const T {
         // Register the destructor
 
         // SAFETY:
-        // * the destructor will be called at thread destruction.
-        // * the caller guarantees that `self` will be valid until that time.
+        // The caller guarantees that `self` will be valid until thread destruction.
         unsafe {
-            register_dtor(ptr::from_ref(self).cast_mut().cast(), destroy::<T>);
+            destructors::register(ptr::from_ref(self).cast_mut().cast(), destroy::<T>);
         }
+
         self.state.set(State::Alive);
-        // SAFETY: as the state is not `Destroyed`, the value cannot have
-        // been destroyed yet. The reference fulfills the terms outlined
-        // above.
-        unsafe { Some(&*self.val.get()) }
+        self.val.get()
     }
 }
 
diff --git a/std/src/sys/thread_local/fast_local/lazy.rs b/std/src/sys/thread_local/native/lazy.rs
similarity index 60%
rename from std/src/sys/thread_local/fast_local/lazy.rs
rename to std/src/sys/thread_local/native/lazy.rs
index c2e9a17145468..9d47e8ef68975 100644
--- a/std/src/sys/thread_local/fast_local/lazy.rs
+++ b/std/src/sys/thread_local/native/lazy.rs
@@ -2,7 +2,7 @@ use crate::cell::UnsafeCell;
 use crate::hint::unreachable_unchecked;
 use crate::ptr;
 use crate::sys::thread_local::abort_on_dtor_unwind;
-use crate::sys::thread_local_dtor::register_dtor;
+use crate::sys::thread_local::destructors;
 
 pub unsafe trait DestroyedState: Sized {
     fn register_dtor<T>(s: &Storage<T, Self>);
@@ -15,7 +15,7 @@ unsafe impl DestroyedState for ! {
 unsafe impl DestroyedState for () {
     fn register_dtor<T>(s: &Storage<T, ()>) {
         unsafe {
-            register_dtor(ptr::from_ref(s).cast_mut().cast(), destroy::<T>);
+            destructors::register(ptr::from_ref(s).cast_mut().cast(), destroy::<T>);
         }
     }
 }
@@ -39,49 +39,31 @@ where
         Storage { state: UnsafeCell::new(State::Initial) }
     }
 
-    /// Get a reference to the TLS value, potentially initializing it with the
-    /// provided parameters. If the TLS variable has been destroyed, `None` is
-    /// returned.
+    /// Get a pointer to the TLS value, potentially initializing it with the
+    /// provided parameters. If the TLS variable has been destroyed, a null
+    /// pointer is returned.
     ///
-    /// # Safety
-    /// * The `self` reference must remain valid until the TLS destructor is run,
-    ///   at which point the returned reference is invalidated.
-    /// * The returned reference may only be used until thread destruction occurs
-    ///   and may not be used after reentrant initialization has occurred.
+    /// The resulting pointer may not be used after reentrant inialialization
+    /// or thread destruction has occurred.
     ///
-    // FIXME(#110897): return NonNull instead of lying about the lifetime.
+    /// # Safety
+    /// The `self` reference must remain valid until the TLS destructor is run.
     #[inline]
-    pub unsafe fn get_or_init(
-        &self,
-        i: Option<&mut Option<T>>,
-        f: impl FnOnce() -> T,
-    ) -> Option<&'static T> {
-        // SAFETY:
-        // No mutable reference to the inner value exists outside the calls to
-        // `replace`. The lifetime of the returned reference fulfills the terms
-        // outlined above.
+    pub unsafe fn get_or_init(&self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T {
         let state = unsafe { &*self.state.get() };
         match state {
-            State::Alive(v) => Some(v),
-            State::Destroyed(_) => None,
+            State::Alive(v) => v,
+            State::Destroyed(_) => ptr::null(),
             State::Initial => unsafe { self.initialize(i, f) },
         }
     }
 
     #[cold]
-    unsafe fn initialize(
-        &self,
-        i: Option<&mut Option<T>>,
-        f: impl FnOnce() -> T,
-    ) -> Option<&'static T> {
+    unsafe fn initialize(&self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T {
         // Perform initialization
 
         let v = i.and_then(Option::take).unwrap_or_else(f);
 
-        // SAFETY:
-        // If references to the inner value exist, they were created in `f`
-        // and are invalidated here. The caller promises to never use them
-        // after this.
         let old = unsafe { self.state.get().replace(State::Alive(v)) };
         match old {
             // If the variable is not being recursively initialized, register
@@ -92,12 +74,10 @@ where
             val => drop(val),
         }
 
-        // SAFETY:
-        // Initialization was completed and the state was set to `Alive`, so the
-        // reference fulfills the terms outlined above.
+        // SAFETY: the state was just set to `Alive`
         unsafe {
             let State::Alive(v) = &*self.state.get() else { unreachable_unchecked() };
-            Some(v)
+            v
         }
     }
 }
diff --git a/std/src/sys/thread_local/fast_local/mod.rs b/std/src/sys/thread_local/native/mod.rs
similarity index 64%
rename from std/src/sys/thread_local/fast_local/mod.rs
rename to std/src/sys/thread_local/native/mod.rs
index 25379071cb7a6..1cc45fe892dee 100644
--- a/std/src/sys/thread_local/fast_local/mod.rs
+++ b/std/src/sys/thread_local/native/mod.rs
@@ -1,7 +1,7 @@
 //! Thread local support for platforms with native TLS.
 //!
 //! To achieve the best performance, we choose from four different types for
-//! the TLS variable, depending from the method of initialization used (`const`
+//! the TLS variable, depending on the method of initialization used (`const`
 //! or lazy) and the drop requirements of the stored type:
 //!
 //! |         | `Drop`               | `!Drop`             |
@@ -29,8 +29,6 @@
 //! eliminates the `Destroyed` state for these values, which can allow more niche
 //! optimizations to occur for the `State` enum. For `Drop` types, `()` is used.
 
-#![deny(unsafe_op_in_unsafe_fn)]
-
 mod eager;
 mod lazy;
 
@@ -52,32 +50,26 @@ pub macro thread_local_inner {
     (@key $t:ty, const $init:expr) => {{
         const __INIT: $t = $init;
 
-        #[inline]
-        #[deny(unsafe_op_in_unsafe_fn)]
-        unsafe fn __getit(
-            _init: $crate::option::Option<&mut $crate::option::Option<$t>>,
-        ) -> $crate::option::Option<&'static $t> {
-            use $crate::thread::local_impl::EagerStorage;
+        unsafe {
             use $crate::mem::needs_drop;
-            use $crate::ptr::addr_of;
+            use $crate::thread::LocalKey;
+            use $crate::thread::local_impl::EagerStorage;
 
-            if needs_drop::<$t>() {
-                #[thread_local]
-                static VAL: EagerStorage<$t> = EagerStorage::new(__INIT);
-                unsafe {
-                    VAL.get()
+            LocalKey::new(const {
+                if needs_drop::<$t>() {
+                    |_| {
+                        #[thread_local]
+                        static VAL: EagerStorage<$t> = EagerStorage::new(__INIT);
+                        VAL.get()
+                    }
+                } else {
+                    |_| {
+                        #[thread_local]
+                        static VAL: $t = __INIT;
+                        &VAL
+                    }
                 }
-            } else {
-                #[thread_local]
-                static VAL: $t = __INIT;
-                unsafe {
-                    $crate::option::Option::Some(&*addr_of!(VAL))
-                }
-            }
-        }
-
-        unsafe {
-            $crate::thread::LocalKey::new(__getit)
+            })
         }
     }},
 
@@ -88,31 +80,26 @@ pub macro thread_local_inner {
             $init
         }
 
-        #[inline]
-        #[deny(unsafe_op_in_unsafe_fn)]
-        unsafe fn __getit(
-            init: $crate::option::Option<&mut $crate::option::Option<$t>>,
-        ) -> $crate::option::Option<&'static $t> {
-            use $crate::thread::local_impl::LazyStorage;
+        unsafe {
             use $crate::mem::needs_drop;
+            use $crate::thread::LocalKey;
+            use $crate::thread::local_impl::LazyStorage;
 
-            if needs_drop::<$t>() {
-                #[thread_local]
-                static VAL: LazyStorage<$t, ()> = LazyStorage::new();
-                unsafe {
-                    VAL.get_or_init(init, __init)
+            LocalKey::new(const {
+                if needs_drop::<$t>() {
+                    |init| {
+                        #[thread_local]
+                        static VAL: LazyStorage<$t, ()> = LazyStorage::new();
+                        VAL.get_or_init(init, __init)
+                    }
+                } else {
+                    |init| {
+                        #[thread_local]
+                        static VAL: LazyStorage<$t, !> = LazyStorage::new();
+                        VAL.get_or_init(init, __init)
+                    }
                 }
-            } else {
-                #[thread_local]
-                static VAL: LazyStorage<$t, !> = LazyStorage::new();
-                unsafe {
-                    VAL.get_or_init(init, __init)
-                }
-            }
-        }
-
-        unsafe {
-            $crate::thread::LocalKey::new(__getit)
+            })
         }
     }},
     ($(#[$attr:meta])* $vis:vis $name:ident, $t:ty, $($init:tt)*) => {
diff --git a/std/src/sys/thread_local/os_local.rs b/std/src/sys/thread_local/os.rs
similarity index 52%
rename from std/src/sys/thread_local/os_local.rs
rename to std/src/sys/thread_local/os.rs
index d6ddbb78a9c86..e06185f00690b 100644
--- a/std/src/sys/thread_local/os_local.rs
+++ b/std/src/sys/thread_local/os.rs
@@ -2,7 +2,7 @@ use super::abort_on_dtor_unwind;
 use crate::cell::Cell;
 use crate::marker::PhantomData;
 use crate::ptr;
-use crate::sys_common::thread_local_key::StaticKey as OsKey;
+use crate::sys::thread_local::key::{get, set, Key, LazyKey};
 
 #[doc(hidden)]
 #[allow_internal_unstable(thread_local_internals)]
@@ -16,30 +16,22 @@ pub macro thread_local_inner {
     },
 
     // used to generate the `LocalKey` value for `thread_local!`
-    (@key $t:ty, $init:expr) => {
-        {
-            #[inline]
-            fn __init() -> $t { $init }
+    (@key $t:ty, $init:expr) => {{
+        #[inline]
+        fn __init() -> $t { $init }
 
-            // `#[inline] does not work on windows-gnu due to linking errors around dllimports.
-            // See https://github.com/rust-lang/rust/issues/109797.
-            #[cfg_attr(not(windows), inline)]
-            unsafe fn __getit(
-                init: $crate::option::Option<&mut $crate::option::Option<$t>>,
-            ) -> $crate::option::Option<&'static $t> {
-                use $crate::thread::local_impl::Key;
-
-                static __KEY: Key<$t> = Key::new();
-                unsafe {
-                    __KEY.get(init, __init)
-                }
-            }
+        unsafe {
+            use $crate::thread::LocalKey;
+            use $crate::thread::local_impl::Storage;
 
-            unsafe {
-                $crate::thread::LocalKey::new(__getit)
-            }
+            // Inlining does not work on windows-gnu due to linking errors around
+            // dllimports. See https://github.com/rust-lang/rust/issues/109797.
+            LocalKey::new(#[cfg_attr(windows, inline(never))] |init| {
+                static VAL: Storage<$t> = Storage::new();
+                VAL.get(init, __init)
+            })
         }
-    },
+    }},
     ($(#[$attr:meta])* $vis:vis $name:ident, $t:ty, $($init:tt)*) => {
         $(#[$attr])* $vis const $name: $crate::thread::LocalKey<$t> =
             $crate::thread::local_impl::thread_local_inner!(@key $t, $($init)*);
@@ -49,66 +41,71 @@ pub macro thread_local_inner {
 /// Use a regular global static to store this key; the state provided will then be
 /// thread-local.
 #[allow(missing_debug_implementations)]
-pub struct Key<T> {
-    os: OsKey,
+pub struct Storage<T> {
+    key: LazyKey,
     marker: PhantomData<Cell<T>>,
 }
 
-unsafe impl<T> Sync for Key<T> {}
+unsafe impl<T> Sync for Storage<T> {}
 
 struct Value<T: 'static> {
     value: T,
-    key: &'static Key<T>,
+    // INVARIANT: if this value is stored under a TLS key, `key` must be that `key`.
+    key: Key,
 }
 
-impl<T: 'static> Key<T> {
+impl<T: 'static> Storage<T> {
     #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
-    pub const fn new() -> Key<T> {
-        Key { os: OsKey::new(Some(destroy_value::<T>)), marker: PhantomData }
+    pub const fn new() -> Storage<T> {
+        Storage { key: LazyKey::new(Some(destroy_value::<T>)), marker: PhantomData }
     }
 
-    /// Get the value associated with this key, initializating it if necessary.
+    /// Get a pointer to the TLS value, potentially initializing it with the
+    /// provided parameters. If the TLS variable has been destroyed, a null
+    /// pointer is returned.
     ///
-    /// # Safety
-    /// * the returned reference must not be used after recursive initialization
-    /// or thread destruction occurs.
-    pub unsafe fn get(
-        &'static self,
-        i: Option<&mut Option<T>>,
-        f: impl FnOnce() -> T,
-    ) -> Option<&'static T> {
-        // SAFETY: (FIXME: get should actually be safe)
-        let ptr = unsafe { self.os.get() as *mut Value<T> };
+    /// The resulting pointer may not be used after reentrant inialialization
+    /// or thread destruction has occurred.
+    pub fn get(&'static self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T {
+        let key = self.key.force();
+        let ptr = unsafe { get(key) as *mut Value<T> };
         if ptr.addr() > 1 {
             // SAFETY: the check ensured the pointer is safe (its destructor
             // is not running) + it is coming from a trusted source (self).
-            unsafe { Some(&(*ptr).value) }
+            unsafe { &(*ptr).value }
         } else {
-            // SAFETY: At this point we are sure we have no value and so
-            // initializing (or trying to) is safe.
-            unsafe { self.try_initialize(ptr, i, f) }
+            // SAFETY: trivially correct.
+            unsafe { Self::try_initialize(key, ptr, i, f) }
         }
     }
 
+    /// # Safety
+    /// * `key` must be the result of calling `self.key.force()`
+    /// * `ptr` must be the current value associated with `key`.
     unsafe fn try_initialize(
-        &'static self,
+        key: Key,
         ptr: *mut Value<T>,
         i: Option<&mut Option<T>>,
         f: impl FnOnce() -> T,
-    ) -> Option<&'static T> {
+    ) -> *const T {
         if ptr.addr() == 1 {
             // destructor is running
-            return None;
+            return ptr::null();
         }
 
-        let value = i.and_then(Option::take).unwrap_or_else(f);
-        let ptr = Box::into_raw(Box::new(Value { value, key: self }));
-        // SAFETY: (FIXME: get should actually be safe)
-        let old = unsafe { self.os.get() as *mut Value<T> };
-        // SAFETY: `ptr` is a correct pointer that can be destroyed by the key destructor.
-        unsafe {
-            self.os.set(ptr as *mut u8);
-        }
+        let value = Box::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key });
+        let ptr = Box::into_raw(value);
+
+        // SAFETY:
+        // * key came from a `LazyKey` and is thus correct.
+        // * `ptr` is a correct pointer that can be destroyed by the key destructor.
+        // * the value is stored under the key that it contains.
+        let old = unsafe {
+            let old = get(key) as *mut Value<T>;
+            set(key, ptr as *mut u8);
+            old
+        };
+
         if !old.is_null() {
             // If the variable was recursively initialized, drop the old value.
             // SAFETY: We cannot be inside a `LocalKey::with` scope, as the
@@ -119,7 +116,7 @@ impl<T: 'static> Key<T> {
         }
 
         // SAFETY: We just created this value above.
-        unsafe { Some(&(*ptr).value) }
+        unsafe { &(*ptr).value }
     }
 }
 
@@ -136,8 +133,10 @@ unsafe extern "C" fn destroy_value<T: 'static>(ptr: *mut u8) {
     abort_on_dtor_unwind(|| {
         let ptr = unsafe { Box::from_raw(ptr as *mut Value<T>) };
         let key = ptr.key;
-        unsafe { key.os.set(ptr::without_provenance_mut(1)) };
+        // SAFETY: `key` is the TLS key `ptr` was stored under.
+        unsafe { set(key, ptr::without_provenance_mut(1)) };
         drop(ptr);
-        unsafe { key.os.set(ptr::null_mut()) };
+        // SAFETY: `key` is the TLS key `ptr` was stored under.
+        unsafe { set(key, ptr::null_mut()) };
     });
 }
diff --git a/std/src/sys/thread_local/static_local.rs b/std/src/sys/thread_local/statik.rs
similarity index 64%
rename from std/src/sys/thread_local/static_local.rs
rename to std/src/sys/thread_local/statik.rs
index 6beda2e718802..0f08cab1ae4ff 100644
--- a/std/src/sys/thread_local/static_local.rs
+++ b/std/src/sys/thread_local/statik.rs
@@ -13,19 +13,14 @@ pub macro thread_local_inner {
     (@key $t:ty, const $init:expr) => {{
         const __INIT: $t = $init;
 
-        #[inline]
-        #[deny(unsafe_op_in_unsafe_fn)]
-        unsafe fn __getit(
-            _init: $crate::option::Option<&mut $crate::option::Option<$t>>,
-        ) -> $crate::option::Option<&'static $t> {
+        unsafe {
+            use $crate::thread::LocalKey;
             use $crate::thread::local_impl::EagerStorage;
 
-            static VAL: EagerStorage<$t> = EagerStorage { value: __INIT };
-            $crate::option::Option::Some(&VAL.value)
-        }
-
-        unsafe {
-            $crate::thread::LocalKey::new(__getit)
+            LocalKey::new(|_| {
+                static VAL: EagerStorage<$t> = EagerStorage { value: __INIT };
+                &VAL.value
+            })
         }
     }},
 
@@ -34,19 +29,14 @@ pub macro thread_local_inner {
         #[inline]
         fn __init() -> $t { $init }
 
-        #[inline]
-        #[deny(unsafe_op_in_unsafe_fn)]
-        unsafe fn __getit(
-            init: $crate::option::Option<&mut $crate::option::Option<$t>>,
-        ) -> $crate::option::Option<&'static $t> {
+        unsafe {
+            use $crate::thread::LocalKey;
             use $crate::thread::local_impl::LazyStorage;
 
-            static VAL: LazyStorage<$t> = LazyStorage::new();
-            unsafe { $crate::option::Option::Some(VAL.get(init, __init)) }
-        }
-
-        unsafe {
-            $crate::thread::LocalKey::new(__getit)
+            LocalKey::new(|init| {
+                static VAL: LazyStorage<$t> = LazyStorage::new();
+                VAL.get(init, __init)
+            })
         }
     }},
     ($(#[$attr:meta])* $vis:vis $name:ident, $t:ty, $($init:tt)*) => {
@@ -73,16 +63,13 @@ impl<T> LazyStorage<T> {
         LazyStorage { value: UnsafeCell::new(None) }
     }
 
-    /// Gets a reference to the contained value, initializing it if necessary.
+    /// Get a pointer to the TLS value, potentially initializing it with the
+    /// provided parameters.
     ///
-    /// # Safety
-    /// The returned reference may not be used after reentrant initialization has occurred.
+    /// The resulting pointer may not be used after reentrant inialialization
+    /// has occurred.
     #[inline]
-    pub unsafe fn get(
-        &'static self,
-        i: Option<&mut Option<T>>,
-        f: impl FnOnce() -> T,
-    ) -> &'static T {
+    pub fn get(&'static self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T {
         let value = unsafe { &*self.value.get() };
         match value {
             Some(v) => v,
@@ -91,11 +78,7 @@ impl<T> LazyStorage<T> {
     }
 
     #[cold]
-    unsafe fn initialize(
-        &'static self,
-        i: Option<&mut Option<T>>,
-        f: impl FnOnce() -> T,
-    ) -> &'static T {
+    fn initialize(&'static self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T {
         let value = i.and_then(Option::take).unwrap_or_else(f);
         // Destroy the old value, after updating the TLS variable as the
         // destructor might reference it.
diff --git a/std/src/sys_common/fs.rs b/std/src/sys_common/fs.rs
index 617ac52e51ca8..acb6713cf1b14 100644
--- a/std/src/sys_common/fs.rs
+++ b/std/src/sys_common/fs.rs
@@ -42,7 +42,7 @@ fn remove_dir_all_recursive(path: &Path) -> io::Result<()> {
     fs::remove_dir(path)
 }
 
-pub fn try_exists(path: &Path) -> io::Result<bool> {
+pub fn exists(path: &Path) -> io::Result<bool> {
     match fs::metadata(path) {
         Ok(_) => Ok(true),
         Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(false),
diff --git a/std/src/sys_common/mod.rs b/std/src/sys_common/mod.rs
index 3a38ba1100f01..60ee405ecaaa2 100644
--- a/std/src/sys_common/mod.rs
+++ b/std/src/sys_common/mod.rs
@@ -20,23 +20,13 @@
 #[cfg(test)]
 mod tests;
 
-pub mod backtrace;
 pub mod fs;
 pub mod io;
 pub mod lazy_box;
 pub mod process;
-pub mod thread_local_dtor;
 pub mod wstr;
 pub mod wtf8;
 
-cfg_if::cfg_if! {
-    if #[cfg(target_os = "windows")] {
-        pub use crate::sys::thread_local_key;
-    } else {
-        pub mod thread_local_key;
-    }
-}
-
 cfg_if::cfg_if! {
     if #[cfg(any(
         all(unix, not(target_os = "l4re")),
diff --git a/std/src/sys_common/thread_local_dtor.rs b/std/src/sys_common/thread_local_dtor.rs
deleted file mode 100644
index 98382fc6acc23..0000000000000
--- a/std/src/sys_common/thread_local_dtor.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! Thread-local destructor
-//!
-//! Besides thread-local "keys" (pointer-sized non-addressable thread-local store
-//! with an associated destructor), many platforms also provide thread-local
-//! destructors that are not associated with any particular data. These are
-//! often more efficient.
-//!
-//! This module provides a fallback implementation for that interface, based
-//! on the less efficient thread-local "keys". Each platform provides
-//! a `thread_local_dtor` module which will either re-export the fallback,
-//! or implement something more efficient.
-
-#![unstable(feature = "thread_local_internals", issue = "none")]
-#![allow(dead_code)]
-
-use crate::cell::RefCell;
-use crate::ptr;
-use crate::sys_common::thread_local_key::StaticKey;
-
-pub unsafe fn register_dtor_fallback(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
-    // The fallback implementation uses a vanilla OS-based TLS key to track
-    // the list of destructors that need to be run for this thread. The key
-    // then has its own destructor which runs all the other destructors.
-    //
-    // The destructor for DTORS is a little special in that it has a `while`
-    // loop to continuously drain the list of registered destructors. It
-    // *should* be the case that this loop always terminates because we
-    // provide the guarantee that a TLS key cannot be set after it is
-    // flagged for destruction.
-
-    static DTORS: StaticKey = StaticKey::new(Some(run_dtors));
-    // FIXME(joboet): integrate RefCell into pointer to avoid infinite recursion
-    // when the global allocator tries to register a destructor and just panic
-    // instead.
-    type List = RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>>;
-    if DTORS.get().is_null() {
-        let v: Box<List> = Box::new(RefCell::new(Vec::new()));
-        DTORS.set(Box::into_raw(v) as *mut u8);
-    }
-    let list = &*(DTORS.get() as *const List);
-    match list.try_borrow_mut() {
-        Ok(mut dtors) => dtors.push((t, dtor)),
-        Err(_) => rtabort!("global allocator may not use TLS"),
-    }
-
-    unsafe extern "C" fn run_dtors(mut ptr: *mut u8) {
-        while !ptr.is_null() {
-            let list = Box::from_raw(ptr as *mut List).into_inner();
-            for (ptr, dtor) in list.into_iter() {
-                dtor(ptr);
-            }
-            ptr = DTORS.get();
-            DTORS.set(ptr::null_mut());
-        }
-    }
-}
diff --git a/std/src/sys_common/thread_local_key.rs b/std/src/sys_common/thread_local_key.rs
deleted file mode 100644
index a9cd26389cd41..0000000000000
--- a/std/src/sys_common/thread_local_key.rs
+++ /dev/null
@@ -1,174 +0,0 @@
-//! OS-based thread local storage for non-Windows systems
-//!
-//! This module provides an implementation of OS-based thread local storage,
-//! using the native OS-provided facilities (think `TlsAlloc` or
-//! `pthread_setspecific`). The interface of this differs from the other types
-//! of thread-local-storage provided in this crate in that OS-based TLS can only
-//! get/set pointer-sized data, possibly with an associated destructor.
-//!
-//! This module also provides two flavors of TLS. One is intended for static
-//! initialization, and does not contain a `Drop` implementation to deallocate
-//! the OS-TLS key. The other is a type which does implement `Drop` and hence
-//! has a safe interface.
-//!
-//! Windows doesn't use this module at all; `sys::pal::windows::thread_local_key`
-//! gets imported in its stead.
-//!
-//! # Usage
-//!
-//! This module should likely not be used directly unless other primitives are
-//! being built on. Types such as `thread_local::spawn::Key` are likely much
-//! more useful in practice than this OS-based version which likely requires
-//! unsafe code to interoperate with.
-//!
-//! # Examples
-//!
-//! Using a dynamically allocated TLS key. Note that this key can be shared
-//! among many threads via an `Arc`.
-//!
-//! ```ignore (cannot-doctest-private-modules)
-//! let key = Key::new(None);
-//! assert!(key.get().is_null());
-//! key.set(1 as *mut u8);
-//! assert!(!key.get().is_null());
-//!
-//! drop(key); // deallocate this TLS slot.
-//! ```
-//!
-//! Sometimes a statically allocated key is either required or easier to work
-//! with, however.
-//!
-//! ```ignore (cannot-doctest-private-modules)
-//! static KEY: StaticKey = INIT;
-//!
-//! unsafe {
-//!     assert!(KEY.get().is_null());
-//!     KEY.set(1 as *mut u8);
-//! }
-//! ```
-
-#![allow(non_camel_case_types)]
-#![unstable(feature = "thread_local_internals", issue = "none")]
-#![allow(dead_code)]
-
-#[cfg(test)]
-mod tests;
-
-use crate::sync::atomic::{self, AtomicUsize, Ordering};
-use crate::sys::thread_local_key as imp;
-
-/// A type for TLS keys that are statically allocated.
-///
-/// This type is entirely `unsafe` to use as it does not protect against
-/// use-after-deallocation or use-during-deallocation.
-///
-/// The actual OS-TLS key is lazily allocated when this is used for the first
-/// time. The key is also deallocated when the Rust runtime exits or `destroy`
-/// is called, whichever comes first.
-///
-/// # Examples
-///
-/// ```ignore (cannot-doctest-private-modules)
-/// use tls::os::{StaticKey, INIT};
-///
-/// // Use a regular global static to store the key.
-/// static KEY: StaticKey = INIT;
-///
-/// // The state provided via `get` and `set` is thread-local.
-/// unsafe {
-///     assert!(KEY.get().is_null());
-///     KEY.set(1 as *mut u8);
-/// }
-/// ```
-pub struct StaticKey {
-    /// Inner static TLS key (internals).
-    key: AtomicUsize,
-    /// Destructor for the TLS value.
-    ///
-    /// See `Key::new` for information about when the destructor runs and how
-    /// it runs.
-    dtor: Option<unsafe extern "C" fn(*mut u8)>,
-}
-
-/// Constant initialization value for static TLS keys.
-///
-/// This value specifies no destructor by default.
-pub const INIT: StaticKey = StaticKey::new(None);
-
-// Define a sentinel value that is likely not to be returned
-// as a TLS key.
-#[cfg(not(target_os = "nto"))]
-const KEY_SENTVAL: usize = 0;
-// On QNX Neutrino, 0 is always returned when currently not in use.
-// Using 0 would mean to always create two keys and remote the first
-// one (with value of 0) immediately afterwards.
-#[cfg(target_os = "nto")]
-const KEY_SENTVAL: usize = libc::PTHREAD_KEYS_MAX + 1;
-
-impl StaticKey {
-    #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
-    pub const fn new(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> StaticKey {
-        StaticKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor }
-    }
-
-    /// Gets the value associated with this TLS key
-    ///
-    /// This will lazily allocate a TLS key from the OS if one has not already
-    /// been allocated.
-    #[inline]
-    pub unsafe fn get(&self) -> *mut u8 {
-        imp::get(self.key())
-    }
-
-    /// Sets this TLS key to a new value.
-    ///
-    /// This will lazily allocate a TLS key from the OS if one has not already
-    /// been allocated.
-    #[inline]
-    pub unsafe fn set(&self, val: *mut u8) {
-        imp::set(self.key(), val)
-    }
-
-    #[inline]
-    unsafe fn key(&self) -> imp::Key {
-        match self.key.load(Ordering::Acquire) {
-            KEY_SENTVAL => self.lazy_init() as imp::Key,
-            n => n as imp::Key,
-        }
-    }
-
-    unsafe fn lazy_init(&self) -> usize {
-        // POSIX allows the key created here to be KEY_SENTVAL, but the compare_exchange
-        // below relies on using KEY_SENTVAL as a sentinel value to check who won the
-        // race to set the shared TLS key. As far as I know, there is no
-        // guaranteed value that cannot be returned as a posix_key_create key,
-        // so there is no value we can initialize the inner key with to
-        // prove that it has not yet been set. As such, we'll continue using a
-        // value of KEY_SENTVAL, but with some gyrations to make sure we have a non-KEY_SENTVAL
-        // value returned from the creation routine.
-        // FIXME: this is clearly a hack, and should be cleaned up.
-        let key1 = imp::create(self.dtor);
-        let key = if key1 as usize != KEY_SENTVAL {
-            key1
-        } else {
-            let key2 = imp::create(self.dtor);
-            imp::destroy(key1);
-            key2
-        };
-        rtassert!(key as usize != KEY_SENTVAL);
-        match self.key.compare_exchange(
-            KEY_SENTVAL,
-            key as usize,
-            Ordering::Release,
-            Ordering::Acquire,
-        ) {
-            // The CAS succeeded, so we've created the actual key
-            Ok(_) => key as usize,
-            // If someone beat us to the punch, use their key instead
-            Err(n) => {
-                imp::destroy(key);
-                n
-            }
-        }
-    }
-}
diff --git a/std/src/sys_common/thread_local_key/tests.rs b/std/src/sys_common/thread_local_key/tests.rs
deleted file mode 100644
index 48bed31af517c..0000000000000
--- a/std/src/sys_common/thread_local_key/tests.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-use super::StaticKey;
-use core::ptr;
-
-#[test]
-fn statik() {
-    static K1: StaticKey = StaticKey::new(None);
-    static K2: StaticKey = StaticKey::new(None);
-
-    unsafe {
-        assert!(K1.get().is_null());
-        assert!(K2.get().is_null());
-        K1.set(ptr::without_provenance_mut(1));
-        K2.set(ptr::without_provenance_mut(2));
-        assert_eq!(K1.get() as usize, 1);
-        assert_eq!(K2.get() as usize, 2);
-    }
-}
diff --git a/std/src/sys_common/wtf8.rs b/std/src/sys_common/wtf8.rs
index 38e15f9f54960..6aeeb6259285d 100644
--- a/std/src/sys_common/wtf8.rs
+++ b/std/src/sys_common/wtf8.rs
@@ -325,6 +325,11 @@ impl Wtf8Buf {
         self.bytes.shrink_to(min_capacity)
     }
 
+    #[inline]
+    pub fn leak<'a>(self) -> &'a mut Wtf8 {
+        unsafe { Wtf8::from_mut_bytes_unchecked(self.bytes.leak()) }
+    }
+
     /// Returns the number of bytes that this string buffer can hold without reallocating.
     #[inline]
     pub fn capacity(&self) -> usize {
@@ -469,10 +474,13 @@ impl Wtf8Buf {
         Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false }
     }
 
-    /// Part of a hack to make PathBuf::push/pop more efficient.
+    /// Provides plumbing to core `Vec::extend_from_slice`.
+    /// More well behaving alternative to allowing outer types
+    /// full mutable access to the core `Vec`.
     #[inline]
-    pub(crate) fn as_mut_vec_for_path_buf(&mut self) -> &mut Vec<u8> {
-        &mut self.bytes
+    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+        self.bytes.extend_from_slice(other);
+        self.is_known_utf8 = false;
     }
 }
 
@@ -594,7 +602,8 @@ impl Wtf8 {
     /// marked unsafe.
     #[inline]
     pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
-        mem::transmute(value)
+        // SAFETY: start with &[u8], end with fancy &[u8]
+        unsafe { &*(value as *const [u8] as *const Wtf8) }
     }
 
     /// Creates a mutable WTF-8 slice from a mutable WTF-8 byte slice.
@@ -603,7 +612,8 @@ impl Wtf8 {
     /// marked unsafe.
     #[inline]
     unsafe fn from_mut_bytes_unchecked(value: &mut [u8]) -> &mut Wtf8 {
-        mem::transmute(value)
+        // SAFETY: start with &mut [u8], end with fancy &mut [u8]
+        unsafe { &mut *(value as *mut [u8] as *mut Wtf8) }
     }
 
     /// Returns the length, in WTF-8 bytes.
@@ -934,8 +944,12 @@ pub fn check_utf8_boundary(slice: &Wtf8, index: usize) {
 /// Copied from core::str::raw::slice_unchecked
 #[inline]
 pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
-    // memory layout of a &[u8] and &Wtf8 are the same
-    Wtf8::from_bytes_unchecked(slice::from_raw_parts(s.bytes.as_ptr().add(begin), end - begin))
+    // SAFETY: memory layout of a &[u8] and &Wtf8 are the same
+    unsafe {
+        let len = end - begin;
+        let start = s.as_bytes().as_ptr().add(begin);
+        Wtf8::from_bytes_unchecked(slice::from_raw_parts(start, len))
+    }
 }
 
 /// Copied from core::str::raw::slice_error_fail
diff --git a/std/src/sys_common/wtf8/tests.rs b/std/src/sys_common/wtf8/tests.rs
index 6a1cc41a8fb04..b57c99a8452a1 100644
--- a/std/src/sys_common/wtf8/tests.rs
+++ b/std/src/sys_common/wtf8/tests.rs
@@ -725,3 +725,27 @@ fn wtf8_utf8_boundary_between_surrogates() {
     string.push(CodePoint::from_u32(0xD800).unwrap());
     check_utf8_boundary(&string, 3);
 }
+
+#[test]
+fn wobbled_wtf8_plus_bytes_isnt_utf8() {
+    let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+    assert!(!string.is_known_utf8);
+    string.extend_from_slice(b"some utf-8");
+    assert!(!string.is_known_utf8);
+}
+
+#[test]
+fn wobbled_wtf8_plus_str_isnt_utf8() {
+    let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() };
+    assert!(!string.is_known_utf8);
+    string.push_str("some utf-8");
+    assert!(!string.is_known_utf8);
+}
+
+#[test]
+fn unwobbly_wtf8_plus_utf8_is_utf8() {
+    let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world");
+    assert!(string.is_known_utf8);
+    string.push_str("some utf-8");
+    assert!(string.is_known_utf8);
+}
diff --git a/std/src/thread/local.rs b/std/src/thread/local.rs
index c1b4440e56088..f147c5fdcd146 100644
--- a/std/src/thread/local.rs
+++ b/std/src/thread/local.rs
@@ -62,7 +62,7 @@ use crate::fmt;
 /// FOO.set(2);
 ///
 /// // each thread starts out with the initial value of 1
-/// let t = thread::spawn(move|| {
+/// let t = thread::spawn(move || {
 ///     assert_eq!(FOO.get(), 1);
 ///     FOO.set(3);
 /// });
@@ -123,7 +123,7 @@ pub struct LocalKey<T: 'static> {
     // trivially devirtualizable by LLVM because the value of `inner` never
     // changes and the constant should be readonly within a crate. This mainly
     // only runs into problems when TLS statics are exported across crates.
-    inner: unsafe fn(Option<&mut Option<T>>) -> Option<&'static T>,
+    inner: fn(Option<&mut Option<T>>) -> *const T,
 }
 
 #[stable(feature = "std_debug", since = "1.16.0")]
@@ -238,9 +238,7 @@ impl<T: 'static> LocalKey<T> {
         issue = "none"
     )]
     #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
-    pub const unsafe fn new(
-        inner: unsafe fn(Option<&mut Option<T>>) -> Option<&'static T>,
-    ) -> LocalKey<T> {
+    pub const unsafe fn new(inner: fn(Option<&mut Option<T>>) -> *const T) -> LocalKey<T> {
         LocalKey { inner }
     }
 
@@ -281,8 +279,7 @@ impl<T: 'static> LocalKey<T> {
     where
         F: FnOnce(&T) -> R,
     {
-        // SAFETY: `inner` is safe to call within the lifetime of the thread
-        let thread_local = unsafe { (self.inner)(None).ok_or(AccessError)? };
+        let thread_local = unsafe { (self.inner)(None).as_ref().ok_or(AccessError)? };
         Ok(f(thread_local))
     }
 
@@ -304,9 +301,8 @@ impl<T: 'static> LocalKey<T> {
     {
         let mut init = Some(init);
 
-        // SAFETY: `inner` is safe to call within the lifetime of the thread
         let reference = unsafe {
-            (self.inner)(Some(&mut init)).expect(
+            (self.inner)(Some(&mut init)).as_ref().expect(
                 "cannot access a Thread Local Storage value \
                  during or after destruction",
             )
diff --git a/std/src/thread/mod.rs b/std/src/thread/mod.rs
index 22215873933d6..c8ee365392f85 100644
--- a/std/src/thread/mod.rs
+++ b/std/src/thread/mod.rs
@@ -539,7 +539,7 @@ impl Builder {
             let f = f.into_inner();
             set_current(their_thread);
             let try_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
-                crate::sys_common::backtrace::__rust_begin_short_backtrace(f)
+                crate::sys::backtrace::__rust_begin_short_backtrace(f)
             }));
             // SAFETY: `their_packet` as been built just above and moved by the
             // closure (it is an Arc<...>) and `my_packet` will be stored in the
@@ -561,7 +561,8 @@ impl Builder {
         let main = Box::new(main);
         // SAFETY: dynamic size and alignment of the Box remain the same. See below for why the
         // lifetime change is justified.
-        let main = unsafe { Box::from_raw(Box::into_raw(main) as *mut (dyn FnOnce() + 'static)) };
+        let main =
+            unsafe { Box::from_raw(Box::into_raw(main) as *mut (dyn FnOnce() + Send + 'static)) };
 
         Ok(JoinInner {
             // SAFETY:
@@ -1544,7 +1545,7 @@ struct Packet<'scope, T> {
 // The type `T` should already always be Send (otherwise the thread could not
 // have been created) and the Packet is Sync because all access to the
 // `UnsafeCell` synchronized (by the `join()` boundary), and `ScopeData` is Sync.
-unsafe impl<'scope, T: Sync> Sync for Packet<'scope, T> {}
+unsafe impl<'scope, T: Send> Sync for Packet<'scope, T> {}
 
 impl<'scope, T> Drop for Packet<'scope, T> {
     fn drop(&mut self) {
diff --git a/std/tests/create_dir_all_bare.rs b/std/tests/create_dir_all_bare.rs
index fd2a7f906f839..79c3c8f528efa 100644
--- a/std/tests/create_dir_all_bare.rs
+++ b/std/tests/create_dir_all_bare.rs
@@ -2,6 +2,7 @@
 
 //! Note that this test changes the current directory so
 //! should not be in the same process as other tests.
+
 use std::env;
 use std::fs;
 use std::path::{Path, PathBuf};
diff --git a/std/tests/run-time-detect.rs b/std/tests/run-time-detect.rs
index c9b9c54e3d49c..6948670565662 100644
--- a/std/tests/run-time-detect.rs
+++ b/std/tests/run-time-detect.rs
@@ -121,10 +121,8 @@ fn x86_all() {
     println!("avx512bw: {:?}", is_x86_feature_detected!("avx512bw"));
     println!("avx512cd: {:?}", is_x86_feature_detected!("avx512cd"));
     println!("avx512dq: {:?}", is_x86_feature_detected!("avx512dq"));
-    println!("avx512er: {:?}", is_x86_feature_detected!("avx512er"));
     println!("avx512f: {:?}", is_x86_feature_detected!("avx512f"));
     println!("avx512ifma: {:?}", is_x86_feature_detected!("avx512ifma"));
-    println!("avx512pf: {:?}", is_x86_feature_detected!("avx512pf"));
     println!("avx512vbmi2: {:?}", is_x86_feature_detected!("avx512vbmi2"));
     println!("avx512vbmi: {:?}", is_x86_feature_detected!("avx512vbmi"));
     println!("avx512vl: {:?}", is_x86_feature_detected!("avx512vl"));
diff --git a/std/tests/windows.rs b/std/tests/windows.rs
new file mode 100644
index 0000000000000..9f7596f1bc2c0
--- /dev/null
+++ b/std/tests/windows.rs
@@ -0,0 +1,14 @@
+#![cfg(windows)]
+//! An external tests
+
+use std::{ffi::OsString, os::windows::ffi::OsStringExt, path::PathBuf};
+
+#[test]
+#[should_panic]
+fn os_string_must_know_it_isnt_utf8_issue_126291() {
+    let mut utf8 = PathBuf::from(OsString::from("utf8".to_owned()));
+    let non_utf8: OsString =
+        OsStringExt::from_wide(&[0x6e, 0x6f, 0x6e, 0xd800, 0x75, 0x74, 0x66, 0x38]);
+    utf8.set_extension(&non_utf8);
+    utf8.into_os_string().into_string().unwrap();
+}
diff --git a/sysroot/Cargo.toml b/sysroot/Cargo.toml
index 1ddacd92e6b94..169eeeca8c2e8 100644
--- a/sysroot/Cargo.toml
+++ b/sysroot/Cargo.toml
@@ -27,3 +27,4 @@ profiler = ["std/profiler"]
 std_detect_file_io = ["std/std_detect_file_io"]
 std_detect_dlsym_getauxval = ["std/std_detect_dlsym_getauxval"]
 std_detect_env_override = ["std/std_detect_env_override"]
+windows_raw_dylib = ["std/windows_raw_dylib"]
diff --git a/test/Cargo.toml b/test/Cargo.toml
index 0e2409f63ab1a..75cc7c00e389c 100644
--- a/test/Cargo.toml
+++ b/test/Cargo.toml
@@ -7,8 +7,6 @@ edition = "2021"
 getopts = { version = "0.2.21", features = ['rustc-dep-of-std'] }
 std = { path = "../std" }
 core = { path = "../core" }
-panic_unwind = { path = "../panic_unwind" }
-panic_abort = { path = "../panic_abort" }
 
 [target.'cfg(not(all(windows, target_env = "msvc")))'.dependencies]
 libc = { version = "0.2.150", default-features = false }
diff --git a/test/src/bench.rs b/test/src/bench.rs
index 64ca13c0d4ed3..9f34f54c3d60a 100644
--- a/test/src/bench.rs
+++ b/test/src/bench.rs
@@ -1,4 +1,5 @@
 //! Benchmarking module.
+
 use super::{
     event::CompletedTest,
     options::BenchMode,
diff --git a/test/src/cli.rs b/test/src/cli.rs
index 6ac3b3eaa797b..b7d24405b775e 100644
--- a/test/src/cli.rs
+++ b/test/src/cli.rs
@@ -200,7 +200,7 @@ Test Attributes:
 pub fn parse_opts(args: &[String]) -> Option<OptRes> {
     // Parse matches.
     let opts = optgroups();
-    let binary = args.get(0).map(|c| &**c).unwrap_or("...");
+    let binary = args.first().map(|c| &**c).unwrap_or("...");
     let args = args.get(1..).unwrap_or(args);
     let matches = match opts.parse(args) {
         Ok(m) => m,
diff --git a/test/src/helpers/concurrency.rs b/test/src/helpers/concurrency.rs
index b395adcf885ce..1854c6a76524d 100644
--- a/test/src/helpers/concurrency.rs
+++ b/test/src/helpers/concurrency.rs
@@ -1,5 +1,6 @@
 //! Helper module which helps to determine amount of threads to be used
 //! during tests execution.
+
 use std::{env, num::NonZero, thread};
 
 pub fn get_concurrency() -> usize {
diff --git a/test/src/helpers/metrics.rs b/test/src/helpers/metrics.rs
index f77a23e6875b2..bc38969cefb8d 100644
--- a/test/src/helpers/metrics.rs
+++ b/test/src/helpers/metrics.rs
@@ -1,4 +1,5 @@
 //! Benchmark metrics.
+
 use std::collections::BTreeMap;
 
 #[derive(Clone, PartialEq, Debug, Copy)]
diff --git a/test/src/lib.rs b/test/src/lib.rs
index 7bd08a0605f83..71cb796b93705 100644
--- a/test/src/lib.rs
+++ b/test/src/lib.rs
@@ -25,7 +25,6 @@
 #![feature(test)]
 #![allow(internal_features)]
 
-// Public reexports
 pub use self::bench::{black_box, Bencher};
 pub use self::console::run_tests_console;
 pub use self::options::{ColorConfig, Options, OutputFormat, RunIgnored, ShouldPanic};
@@ -58,7 +57,7 @@ use std::{
     env, io,
     io::prelude::Write,
     mem::ManuallyDrop,
-    panic::{self, catch_unwind, AssertUnwindSafe, PanicInfo},
+    panic::{self, catch_unwind, AssertUnwindSafe, PanicHookInfo},
     process::{self, Command, Termination},
     sync::mpsc::{channel, Sender},
     sync::{Arc, Mutex},
@@ -123,7 +122,7 @@ pub fn test_main(args: &[String], tests: Vec<TestDescAndFn>, options: Option<Opt
             // from interleaving with the panic message or appearing after it.
             let builtin_panic_hook = panic::take_hook();
             let hook = Box::new({
-                move |info: &'_ PanicInfo<'_>| {
+                move |info: &'_ PanicHookInfo<'_>| {
                     if !info.can_unwind() {
                         std::mem::forget(std::io::stderr().lock());
                         let mut stdout = ManuallyDrop::new(std::io::stdout().lock());
@@ -726,7 +725,7 @@ fn spawn_test_subprocess(
 
 fn run_test_in_spawned_subprocess(desc: TestDesc, runnable_test: RunnableTest) -> ! {
     let builtin_panic_hook = panic::take_hook();
-    let record_result = Arc::new(move |panic_info: Option<&'_ PanicInfo<'_>>| {
+    let record_result = Arc::new(move |panic_info: Option<&'_ PanicHookInfo<'_>>| {
         let test_result = match panic_info {
             Some(info) => calc_result(&desc, Err(info.payload()), &None, &None),
             None => calc_result(&desc, Ok(()), &None, &None),
diff --git a/test/src/term/terminfo/parm.rs b/test/src/term/terminfo/parm.rs
index 2815f6cfc77fe..c5b4ef01893c2 100644
--- a/test/src/term/terminfo/parm.rs
+++ b/test/src/term/terminfo/parm.rs
@@ -524,7 +524,7 @@ fn format(val: Param, op: FormatOp, flags: Flags) -> Result<Vec<u8>, String> {
         } else {
             let mut s_ = Vec::with_capacity(flags.width);
             s_.extend(repeat(b' ').take(n));
-            s_.extend(s.into_iter());
+            s_.extend(s);
             s = s_;
         }
     }
diff --git a/test/src/term/win.rs b/test/src/term/win.rs
index 55020141a827d..65764c0ffc1b9 100644
--- a/test/src/term/win.rs
+++ b/test/src/term/win.rs
@@ -22,6 +22,8 @@ type WORD = u16;
 type DWORD = u32;
 type BOOL = i32;
 type HANDLE = *mut u8;
+// https://docs.microsoft.com/en-us/windows/console/getstdhandle
+const STD_OUTPUT_HANDLE: DWORD = -11 as _;
 
 #[allow(non_snake_case)]
 #[repr(C)]
@@ -99,16 +101,13 @@ impl<T: Write + Send + 'static> WinConsole<T> {
         accum |= color_to_bits(self.background) << 4;
 
         unsafe {
-            // Magic -11 means stdout, from
-            // https://docs.microsoft.com/en-us/windows/console/getstdhandle
-            //
             // You may be wondering, "but what about stderr?", and the answer
             // to that is that setting terminal attributes on the stdout
             // handle also sets them for stderr, since they go to the same
             // terminal! Admittedly, this is fragile, since stderr could be
             // redirected to a different console. This is good enough for
             // rustc though. See #13400.
-            let out = GetStdHandle(-11i32 as DWORD);
+            let out = GetStdHandle(STD_OUTPUT_HANDLE);
             SetConsoleTextAttribute(out, accum);
         }
     }
@@ -120,9 +119,8 @@ impl<T: Write + Send + 'static> WinConsole<T> {
         let bg;
         unsafe {
             let mut buffer_info = MaybeUninit::<CONSOLE_SCREEN_BUFFER_INFO>::uninit();
-            if GetConsoleScreenBufferInfo(GetStdHandle(-11i32 as DWORD), buffer_info.as_mut_ptr())
-                != 0
-            {
+            let handle = GetStdHandle(STD_OUTPUT_HANDLE);
+            if GetConsoleScreenBufferInfo(handle, buffer_info.as_mut_ptr()) != 0 {
                 let buffer_info = buffer_info.assume_init();
                 fg = bits_to_color(buffer_info.wAttributes);
                 bg = bits_to_color(buffer_info.wAttributes >> 4);
diff --git a/unwind/src/lib.rs b/unwind/src/lib.rs
index 544d9fbf1ae0f..45a1c334a44dd 100644
--- a/unwind/src/lib.rs
+++ b/unwind/src/lib.rs
@@ -2,7 +2,7 @@
 #![unstable(feature = "panic_unwind", issue = "32837")]
 #![feature(link_cfg)]
 #![feature(staged_api)]
-#![feature(c_unwind)]
+#![cfg_attr(bootstrap, feature(c_unwind))]
 #![feature(strict_provenance)]
 #![cfg_attr(target_arch = "wasm64", feature(simd_wasm64))]
 #![cfg_attr(not(target_env = "msvc"), feature(libc))]