Ban subnormals and NaNs in const {from,to}_bits

rust-lang · Apr 12, 2022 · 8358179 · 8358179
1 parent b200483
commit 8358179
Show file tree

Hide file tree

Showing 5 changed files with 392 additions and 38 deletions.
diff --git a/library/core/src/num/f32.rs b/library/core/src/num/f32.rs
@@ -623,6 +623,23 @@ impl f32 {
         }
     }
 
+    // This operates on bits, and only bits, so it can ignore concerns about weird FPUs.
+    // FIXME(jubilee): In a just world, this would be the entire impl for classify,
+    // plus a transmute. We do not live in a just world, but we can make it more so.
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    const fn classify_bits(b: u32) -> FpCategory {
+        const EXP_MASK: u32 = 0x7f800000;
+        const MAN_MASK: u32 = 0x007fffff;
+
+        match (b & MAN_MASK, b & EXP_MASK) {
+            (0, EXP_MASK) => FpCategory::Infinite,
+            (_, EXP_MASK) => FpCategory::Nan,
+            (0, 0) => FpCategory::Zero,
+            (_, 0) => FpCategory::Subnormal,
+            _ => FpCategory::Normal,
+        }
+    }
+
     /// Returns `true` if `self` has a positive sign, including `+0.0`, `NaN`s with
     /// positive sign bit and positive infinity.
     ///
@@ -874,8 +891,59 @@ impl f32 {
     #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
     #[inline]
     pub const fn to_bits(self) -> u32 {
-        // SAFETY: `u32` is a plain old datatype so we can always transmute to it
-        unsafe { mem::transmute(self) }
+        // SAFETY: `u32` is a plain old datatype so we can always transmute to it.
+        // ...sorta.
+        //
+        // It turns out that at runtime, it is possible for a floating point number
+        // to be subject to a floating point mode that alters nonzero subnormal numbers
+        // to zero on reads and writes, aka "denormals are zero" and "flush to zero".
+        // This is not a problem per se, but at least one tier2 platform for Rust
+        // actually exhibits this behavior by default.
+        //
+        // In addition, on x86 targets with SSE or SSE2 disabled and the x87 FPU enabled,
+        // i.e. not soft-float, the way Rust does parameter passing can actually alter
+        // a number that is "not infinity" to have the same exponent as infinity,
+        // in a slightly unpredictable manner.
+        //
+        // And, of course evaluating to a NaN value is fairly nondeterministic.
+        // More precisely: when NaN should be returned is knowable, but which NaN?
+        // So far that's defined by a combination of LLVM and the CPU, not Rust.
+        // This function, however, allows observing the bitstring of a NaN,
+        // thus introspection on CTFE.
+        //
+        // In order to preserve, at least for the moment, const-to-runtime equivalence,
+        // we reject any of these possible situations from happening.
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_f32_to_u32(ct: f32) -> u32 {
+            match ct.classify() {
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f32::to_bits on a NaN")
+                }
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f32::to_bits on a subnormal number")
+                }
+                FpCategory::Infinite =>
+                // SAFETY: Infinity per se is fine
+                unsafe { mem::transmute::<f32, u32>(ct) },
+                FpCategory::Zero | FpCategory::Normal => {
+                    // SAFETY: We have a normal floating point number. Now we transmute, i.e. do a bitcopy.
+                    let bits: u32 = unsafe { mem::transmute::<f32, u32>(ct) };
+                    // Let's doublecheck to make sure it wasn't a weird float by truncating it.
+                    if bits >> 23 & 0xFF == 0xFF {
+                        panic!(
+                            "const-eval error: an unusually large x87 floating point value should not leak into const eval"
+                        )
+                    };
+                    bits
+                }
+            }
+        }
+        // SAFETY: `u32` is a plain old datatype so we can always... uh...
+        // ...look, just pretend you forgot what you just read.
+        // Stability concerns.
+        let rt_f32_to_u32 = |rt| unsafe { mem::transmute::<f32, u32>(rt) };
+        // SAFETY: We use internal implementations that either always work or fail at compile time.
+        unsafe { intrinsics::const_eval_select((self,), ct_f32_to_u32, rt_f32_to_u32) }
     }
 
     /// Raw transmutation from `u32`.
@@ -919,9 +987,49 @@ impl f32 {
     #[must_use]
     #[inline]
     pub const fn from_bits(v: u32) -> Self {
-        // SAFETY: `u32` is a plain old datatype so we can always transmute from it
         // It turns out the safety issues with sNaN were overblown! Hooray!
-        unsafe { mem::transmute::<u32, f32>(v) }
+        // SAFETY: `u32` is a plain old datatype so we can always transmute from it
+        // ...sorta.
+        //
+        // It turns out that at runtime, it is possible for a floating point number
+        // to be subject to a floating point mode that alters nonzero subnormal numbers
+        // to zero on reads and writes, aka "denormals are zero" and "flush to zero".
+        // This is not a problem usually, but at least one tier2 platform for Rust
+        // actually exhibits this behavior by default: thumbv7neon
+        // aka "the Neon FPU in AArch32 state"
+        //
+        // In addition, on x86 targets with SSE or SSE2 disabled and the x87 FPU enabled,
+        // i.e. not soft-float, the way Rust does parameter passing can actually alter
+        // a number that is "not infinity" to have the same exponent as infinity,
+        // in a slightly unpredictable manner.
+        //
+        // And, of course evaluating to a NaN value is fairly nondeterministic.
+        // More precisely: when NaN should be returned is knowable, but which NaN?
+        // So far that's defined by a combination of LLVM and the CPU, not Rust.
+        // This function, however, allows observing the bitstring of a NaN,
+        // thus introspection on CTFE.
+        //
+        // In order to preserve, at least for the moment, const-to-runtime equivalence,
+        // reject any of these possible situations from happening.
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_u32_to_f32(ct: u32) -> f32 {
+            match f32::classify_bits(ct) {
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f32::from_bits on a subnormal number");
+                }
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f32::from_bits on NaN");
+                }
+                // SAFETY: It's not a frumious number
+                _ => unsafe { mem::transmute::<u32, f32>(ct) },
+            }
+        }
+        // SAFETY: `u32` is a plain old datatype so we can always... uh...
+        // ...look, just pretend you forgot what you just read.
+        // Stability concerns.
+        let rt_u32_to_f32 = |rt| unsafe { mem::transmute::<u32, f32>(rt) };
+        // SAFETY: We use internal implementations that either always work or fail at compile time.
+        unsafe { intrinsics::const_eval_select((v,), ct_u32_to_f32, rt_u32_to_f32) }
     }
 
     /// Return the memory representation of this floating point number as a byte array in

diff --git a/library/core/src/num/f64.rs b/library/core/src/num/f64.rs
@@ -621,6 +621,23 @@ impl f64 {
         }
     }
 
+    // This operates on bits, and only bits, so it can ignore concerns about weird FPUs.
+    // FIXME(jubilee): In a just world, this would be the entire impl for classify,
+    // plus a transmute. We do not live in a just world, but we can make it more so.
+    #[rustc_const_unstable(feature = "const_float_classify", issue = "72505")]
+    const fn classify_bits(b: u64) -> FpCategory {
+        const EXP_MASK: u64 = 0x7ff0000000000000;
+        const MAN_MASK: u64 = 0x000fffffffffffff;
+
+        match (b & MAN_MASK, b & EXP_MASK) {
+            (0, EXP_MASK) => FpCategory::Infinite,
+            (_, EXP_MASK) => FpCategory::Nan,
+            (0, 0) => FpCategory::Zero,
+            (_, 0) => FpCategory::Subnormal,
+            _ => FpCategory::Normal,
+        }
+    }
+
     /// Returns `true` if `self` has a positive sign, including `+0.0`, `NaN`s with
     /// positive sign bit and positive infinity.
     ///
@@ -891,8 +908,41 @@ impl f64 {
     #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
     #[inline]
     pub const fn to_bits(self) -> u64 {
-        // SAFETY: `u64` is a plain old datatype so we can always transmute to it
-        unsafe { mem::transmute(self) }
+        // SAFETY: `u64` is a plain old datatype so we can always transmute to it.
+        // ...sorta.
+        //
+        // See the SAFETY comment in f64::from_bits for more.
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_f64_to_u64(ct: f64) -> u64 {
+            match ct.classify() {
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f64::to_bits on a NaN")
+                }
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f64::to_bits on a subnormal number")
+                }
+                FpCategory::Infinite =>
+                // SAFETY: Infinity per se is fine
+                unsafe { mem::transmute::<f64, u64>(ct) },
+                FpCategory::Zero | FpCategory::Normal => {
+                    // SAFETY: We have a normal floating point number. Now we transmute, i.e. do a bitcopy.
+                    let bits: u64 = unsafe { mem::transmute::<f64, u64>(ct) };
+                    // Let's doublecheck to make sure it wasn't a weird float by truncating it.
+                    if (bits >> 52) & 0x7FF == 0x7FF {
+                        panic!(
+                            "const-eval error: an unusually large x87 floating point value should not leak into const eval"
+                        )
+                    };
+                    bits
+                }
+            }
+        }
+        // SAFETY: `u64` is a plain old datatype so we can always... uh...
+        // ...look, just pretend you forgot what you just read.
+        // Stability concerns.
+        let rt_f64_to_u64 = |rt| unsafe { mem::transmute::<f64, u64>(rt) };
+        // SAFETY: We use internal implementations that either always work or fail at compile time.
+        unsafe { intrinsics::const_eval_select((self,), ct_f64_to_u64, rt_f64_to_u64) }
     }
 
     /// Raw transmutation from `u64`.
@@ -936,9 +986,54 @@ impl f64 {
     #[must_use]
     #[inline]
     pub const fn from_bits(v: u64) -> Self {
-        // SAFETY: `u64` is a plain old datatype so we can always transmute from it
         // It turns out the safety issues with sNaN were overblown! Hooray!
-        unsafe { mem::transmute::<u64, f64>(v) }
+        // SAFETY: `u64` is a plain old datatype so we can always transmute from it
+        // ...sorta.
+        //
+        // It turns out that at runtime, it is possible for a floating point number
+        // to be subject to floating point modes that alters nonzero subnormal numbers
+        // to zero on reads and writes, aka "denormals are zero" and "flush to zero".
+        // This is not a problem usually, but at least one tier2 platform for Rust
+        // actually exhibits an FTZ behavior kby default: thumbv7neon
+        // aka "the Neon FPU in AArch32 state"
+        //
+        // Even with this, not all instructions exhibit the FTZ behaviors on thumbv7neon,
+        // so this should load the same bits if LLVM emits the "correct" instructions,
+        // but LLVM sometimes makes interesting choices about float optimization,
+        // and other FPUs may do similar. Thus, it is wise to indulge luxuriously in caution.
+        //
+        // In addition, on x86 targets with SSE or SSE2 disabled and the x87 FPU enabled,
+        // i.e. not soft-float, the way Rust does parameter passing can actually alter
+        // a number that is "not infinity" to have the same exponent as infinity,
+        // in a slightly unpredictable manner.
+        //
+        // And, of course evaluating to a NaN value is fairly nondeterministic.
+        // More precisely: when NaN should be returned is knowable, but which NaN?
+        // So far that's defined by a combination of LLVM and the CPU, not Rust.
+        // This function, however, allows observing the bitstring of a NaN,
+        // thus introspection on CTFE.
+        //
+        // In order to preserve, at least for the moment, const-to-runtime equivalence,
+        // reject any of these possible situations from happening.
+        #[rustc_const_unstable(feature = "const_float_bits_conv", issue = "72447")]
+        const fn ct_u64_to_f64(ct: u64) -> f64 {
+            match f64::classify_bits(ct) {
+                FpCategory::Subnormal => {
+                    panic!("const-eval error: cannot use f64::from_bits on a subnormal number");
+                }
+                FpCategory::Nan => {
+                    panic!("const-eval error: cannot use f64::from_bits on NaN");
+                }
+                // SAFETY: It's not a frumious number
+                _ => unsafe { mem::transmute::<u64, f64>(ct) },
+            }
+        }
+        // SAFETY: `u64` is a plain old datatype so we can always... uh...
+        // ...look, just pretend you forgot what you just read.
+        // Stability concerns.
+        let rt_u64_to_f64 = |rt| unsafe { mem::transmute::<u64, f64>(rt) };
+        // SAFETY: We use internal implementations that either always work or fail at compile time.
+        unsafe { intrinsics::const_eval_select((v,), ct_u64_to_f64, rt_u64_to_f64) }
     }
 
     /// Return the memory representation of this floating point number as a byte array in

diff --git a/src/test/ui/consts/const-float-bits-conv.rs b/src/test/ui/consts/const-float-bits-conv.rs
@@ -37,22 +37,6 @@ fn f32() {
     const_assert!(f32::from_bits(0x44a72000), 1337.0);
     const_assert!(f32::from_ne_bytes(0x44a72000u32.to_ne_bytes()), 1337.0);
     const_assert!(f32::from_bits(0xc1640000), -14.25);
-
-    // Check that NaNs roundtrip their bits regardless of signalingness
-    // 0xA is 0b1010; 0x5 is 0b0101 -- so these two together clobbers all the mantissa bits
-    const MASKED_NAN1: u32 = f32::NAN.to_bits() ^ 0x002A_AAAA;
-    const MASKED_NAN2: u32 = f32::NAN.to_bits() ^ 0x0055_5555;
-
-    const_assert!(f32::from_bits(MASKED_NAN1).is_nan());
-    const_assert!(f32::from_bits(MASKED_NAN1).is_nan());
-
-    // LLVM does not guarantee that loads and stores of NaNs preserve their exact bit pattern.
-    // In practice, this seems to only cause a problem on x86, since the most widely used calling
-    // convention mandates that floating point values are returned on the x87 FPU stack. See #73328.
-    if !cfg!(target_arch = "x86") {
-        const_assert!(f32::from_bits(MASKED_NAN1).to_bits(), MASKED_NAN1);
-        const_assert!(f32::from_bits(MASKED_NAN2).to_bits(), MASKED_NAN2);
-    }
 }
 
 fn f64() {
@@ -70,20 +54,6 @@ fn f64() {
     const_assert!(f64::from_bits(0x4094e40000000000), 1337.0);
     const_assert!(f64::from_ne_bytes(0x4094e40000000000u64.to_ne_bytes()), 1337.0);
     const_assert!(f64::from_bits(0xc02c800000000000), -14.25);
-
-    // Check that NaNs roundtrip their bits regardless of signalingness
-    // 0xA is 0b1010; 0x5 is 0b0101 -- so these two together clobbers all the mantissa bits
-    const MASKED_NAN1: u64 = f64::NAN.to_bits() ^ 0x000A_AAAA_AAAA_AAAA;
-    const MASKED_NAN2: u64 = f64::NAN.to_bits() ^ 0x0005_5555_5555_5555;
-
-    const_assert!(f64::from_bits(MASKED_NAN1).is_nan());
-    const_assert!(f64::from_bits(MASKED_NAN1).is_nan());
-
-    // See comment above.
-    if !cfg!(target_arch = "x86") {
-        const_assert!(f64::from_bits(MASKED_NAN1).to_bits(), MASKED_NAN1);
-        const_assert!(f64::from_bits(MASKED_NAN2).to_bits(), MASKED_NAN2);
-    }
 }
 
 fn main() {

diff --git a/src/test/ui/consts/const-float-bits-reject-conv.rs b/src/test/ui/consts/const-float-bits-reject-conv.rs
@@ -0,0 +1,62 @@
+// compile-flags: -Zmir-opt-level=0
+#![feature(const_float_bits_conv)]
+#![feature(const_float_classify)]
+
+// Don't promote
+const fn nop<T>(x: T) -> T { x }
+
+macro_rules! const_assert {
+    ($a:expr) => {
+        {
+            const _: () = assert!($a);
+            assert!(nop($a));
+        }
+    };
+    ($a:expr, $b:expr) => {
+        {
+            const _: () = assert!($a == $b);
+            assert_eq!(nop($a), nop($b));
+        }
+    };
+}
+
+fn f32() {
+    // Check that NaNs roundtrip their bits regardless of signalingness
+    // 0xA is 0b1010; 0x5 is 0b0101 -- so these two together clobbers all the mantissa bits
+    // ...actually, let's just check that these break. :D
+    const MASKED_NAN1: u32 = f32::NAN.to_bits() ^ 0x002A_AAAA;
+    const MASKED_NAN2: u32 = f32::NAN.to_bits() ^ 0x0055_5555;
+
+    const_assert!(f32::from_bits(MASKED_NAN1).is_nan());
+    const_assert!(f32::from_bits(MASKED_NAN1).is_nan());
+
+    // LLVM does not guarantee that loads and stores of NaNs preserve their exact bit pattern.
+    // In practice, this seems to only cause a problem on x86, since the most widely used calling
+    // convention mandates that floating point values are returned on the x87 FPU stack. See #73328.
+    if !cfg!(target_arch = "x86") {
+        const_assert!(f32::from_bits(MASKED_NAN1).to_bits(), MASKED_NAN1);
+        const_assert!(f32::from_bits(MASKED_NAN2).to_bits(), MASKED_NAN2);
+    }
+}
+
+fn f64() {
+    // Check that NaNs roundtrip their bits regardless of signalingness
+    // 0xA is 0b1010; 0x5 is 0b0101 -- so these two together clobbers all the mantissa bits
+    // ...actually, let's just check that these break. :D
+    const MASKED_NAN1: u64 = f64::NAN.to_bits() ^ 0x000A_AAAA_AAAA_AAAA;
+    const MASKED_NAN2: u64 = f64::NAN.to_bits() ^ 0x0005_5555_5555_5555;
+
+    const_assert!(f64::from_bits(MASKED_NAN1).is_nan());
+    const_assert!(f64::from_bits(MASKED_NAN1).is_nan());
+
+    // See comment above.
+    if !cfg!(target_arch = "x86") {
+        const_assert!(f64::from_bits(MASKED_NAN1).to_bits(), MASKED_NAN1);
+        const_assert!(f64::from_bits(MASKED_NAN2).to_bits(), MASKED_NAN2);
+    }
+}
+
+fn main() {
+    f32();
+    f64();
+}