From 23c6330a1281ad0be81e122952b77bab7b2fdd60 Mon Sep 17 00:00:00 2001 From: Mohan Rajendran Date: Wed, 20 Sep 2017 05:50:06 -0500 Subject: [PATCH] Added _mm_unpackhi_ps function (#16) Added _mm_unpackhi_ps --- .vscode/temp.sql | 0 TODO.md | 2 +- src/x86/sse.rs | 18 ++++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 .vscode/temp.sql diff --git a/.vscode/temp.sql b/.vscode/temp.sql new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/TODO.md b/TODO.md index 12832620bf33c..6b69e250da84b 100644 --- a/TODO.md +++ b/TODO.md @@ -155,7 +155,7 @@ sse * [ ] `_mm_storer_ps` * [ ] `_mm_move_ss` * [ ] `_mm_shuffle_ps` -* [ ] `_mm_unpackhi_ps` +* [x] `_mm_unpackhi_ps` * [ ] `_mm_unpacklo_ps` * [ ] `_mm_movehl_ps` * [ ] `_mm_movelh_ps` diff --git a/src/x86/sse.rs b/src/x86/sse.rs index d1af0e1a9b612..e1706d107d810 100644 --- a/src/x86/sse.rs +++ b/src/x86/sse.rs @@ -1,3 +1,4 @@ +use simd_llvm::simd_shuffle4; use v128::*; /// Return the square root of packed single-precision (32-bit) floating-point @@ -40,6 +41,14 @@ pub fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 { unsafe { maxps(a, b) } } +/// Unpack and interleave single-precision (32-bit) floating-point elements +/// from the high half of `a` and `b`; +#[inline(always)] +#[target_feature = "+sse"] +pub fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 { + unsafe { simd_shuffle4(a, b, [2, 6, 3, 7]) } +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 4 least significant bits of the return value. @@ -116,6 +125,15 @@ mod tests { assert_eq!(r, f32x4::new(-1.0, 20.0, 0.0, -5.0)); } + #[test] + #[target_feature = "+sse"] + fn _mm_unpackhi_ps() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let b = f32x4::new(5.0, 6.0, 7.0, 8.0); + let r = sse::_mm_unpackhi_ps(a, b); + assert_eq!(r, f32x4::new(3.0, 7.0, 4.0, 8.0)); + } + #[test] #[target_feature = "+sse"] fn _mm_movemask_ps() {