Skip to content

Commit

Permalink
[neon] reciprocal square-root estimate (rust-lang#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
gnzlbg authored and alexcrichton committed Oct 18, 2017
1 parent 93cc250 commit b2028d5
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 0 deletions.
11 changes: 11 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,16 @@ export RUSTFLAGS="$RUSTFLAGS -C codegen-units=1"
# having only one thread increases debuggability to be worth it.
export RUST_TEST_THREADS=1

# FIXME(rust-lang-nursery/stdsimd#120) run-time feature detection for ARM Neon
case ${TARGET} in
aarch*)
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+neon"
;;
*)
;;
esac

echo "RUSTFLAGS=${RUSTFLAGS}"

cargo test --target $TARGET
cargo test --release --target $TARGET
5 changes: 5 additions & 0 deletions src/arm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@ pub use self::v6::*;
pub use self::v7::*;
#[cfg(target_arch = "aarch64")]
pub use self::v8::*;
#[cfg(target_feature = "neon")]
pub use self::neon::*;

mod v6;
mod v7;
#[cfg(target_arch = "aarch64")]
mod v8;

#[cfg(target_feature = "neon")]
mod neon;
38 changes: 38 additions & 0 deletions src/arm/neon.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//! ARM NEON intrinsics
//!
//! The references is [ARM's NEON Intrinsics Reference](http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf). [ARM's NEON Intrinsics Online Database](https://developer.arm.com/technologies/neon/intrinsics) is also useful.

#[cfg(test)]
use stdsimd_test::assert_instr;

use v64::{f32x2};

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.aarch64.neon.frsqrte.v2f32"]
fn frsqrte_v2f32(a: f32x2) -> f32x2;
}

/// Reciprocal square-root estimate.
#[inline(always)]
#[target_feature = "+neon"]
#[cfg_attr(test, assert_instr(frsqrte))]
pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
frsqrte_v2f32(a)
}

#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;

use v64::{f32x2};
use arm::neon;

#[test]
fn vrsqrt_f32() {
let a = f32x2::new(1.0, 2.0);
let e = f32x2::new(0.9980469, 0.7050781);
let r = unsafe { neon::vrsqrte_f32(a) };
assert_eq!(r, e);
}
}

0 comments on commit b2028d5

Please sign in to comment.