Skip to content

Commit ad9654a

Browse files
authored
Fix ARM vbsl* NEON intrinsics (rust-lang#1191) (rust-lang#1307)
1 parent 2833505 commit ad9654a

File tree

2 files changed

+206
-114
lines changed

2 files changed

+206
-114
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

+32-16
Original file line numberDiff line numberDiff line change
@@ -1045,31 +1045,47 @@ pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
10451045
#[cfg_attr(test, assert_instr(bsl))]
10461046
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
10471047
pub unsafe fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
1048-
simd_select(transmute::<_, int64x1_t>(a), b, c)
1048+
let not = int64x1_t(-1);
1049+
transmute(simd_or(
1050+
simd_and(a, transmute(b)),
1051+
simd_and(simd_xor(a, transmute(not)), transmute(c)),
1052+
))
10491053
}
10501054
/// Bitwise Select.
10511055
#[inline]
10521056
#[target_feature(enable = "neon")]
10531057
#[cfg_attr(test, assert_instr(bsl))]
10541058
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
10551059
pub unsafe fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
1056-
simd_select(transmute::<_, int64x1_t>(a), b, c)
1060+
let not = int64x1_t(-1);
1061+
transmute(simd_or(
1062+
simd_and(a, transmute(b)),
1063+
simd_and(simd_xor(a, transmute(not)), transmute(c)),
1064+
))
10571065
}
10581066
/// Bitwise Select. (128-bit)
10591067
#[inline]
10601068
#[target_feature(enable = "neon")]
10611069
#[cfg_attr(test, assert_instr(bsl))]
10621070
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
10631071
pub unsafe fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
1064-
simd_select(transmute::<_, int64x2_t>(a), b, c)
1072+
let not = int64x2_t(-1, -1);
1073+
transmute(simd_or(
1074+
simd_and(a, transmute(b)),
1075+
simd_and(simd_xor(a, transmute(not)), transmute(c)),
1076+
))
10651077
}
10661078
/// Bitwise Select. (128-bit)
10671079
#[inline]
10681080
#[target_feature(enable = "neon")]
10691081
#[cfg_attr(test, assert_instr(bsl))]
10701082
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
10711083
pub unsafe fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
1072-
simd_select(transmute::<_, int64x2_t>(a), b, c)
1084+
let not = int64x2_t(-1, -1);
1085+
transmute(simd_or(
1086+
simd_and(a, transmute(b)),
1087+
simd_and(simd_xor(a, transmute(not)), transmute(c)),
1088+
))
10731089
}
10741090

10751091
/// Signed saturating Accumulate of Unsigned value.
@@ -5136,37 +5152,37 @@ mod tests {
51365152

51375153
#[simd_test(enable = "neon")]
51385154
unsafe fn test_vbsl_f64() {
5139-
let a = u64x1::new(u64::MAX);
5140-
let b = f64x1::new(f64::MAX);
5141-
let c = f64x1::new(f64::MIN);
5142-
let e = f64x1::new(f64::MAX);
5155+
let a = u64x1::new(0x8000000000000000);
5156+
let b = f64x1::new(-1.23f64);
5157+
let c = f64x1::new(2.34f64);
5158+
let e = f64x1::new(-2.34f64);
51435159
let r: f64x1 = transmute(vbsl_f64(transmute(a), transmute(b), transmute(c)));
51445160
assert_eq!(r, e);
51455161
}
51465162
#[simd_test(enable = "neon")]
51475163
unsafe fn test_vbsl_p64() {
5148-
let a = u64x1::new(u64::MAX);
5164+
let a = u64x1::new(1);
51495165
let b = u64x1::new(u64::MAX);
51505166
let c = u64x1::new(u64::MIN);
5151-
let e = u64x1::new(u64::MAX);
5167+
let e = u64x1::new(1);
51525168
let r: u64x1 = transmute(vbsl_p64(transmute(a), transmute(b), transmute(c)));
51535169
assert_eq!(r, e);
51545170
}
51555171
#[simd_test(enable = "neon")]
51565172
unsafe fn test_vbslq_f64() {
5157-
let a = u64x2::new(u64::MAX, 0);
5158-
let b = f64x2::new(f64::MAX, f64::MAX);
5159-
let c = f64x2::new(f64::MIN, f64::MIN);
5160-
let e = f64x2::new(f64::MAX, f64::MIN);
5173+
let a = u64x2::new(1, 0x8000000000000000);
5174+
let b = f64x2::new(f64::MAX, -1.23f64);
5175+
let c = f64x2::new(f64::MIN, 2.34f64);
5176+
let e = f64x2::new(f64::MIN, -2.34f64);
51615177
let r: f64x2 = transmute(vbslq_f64(transmute(a), transmute(b), transmute(c)));
51625178
assert_eq!(r, e);
51635179
}
51645180
#[simd_test(enable = "neon")]
51655181
unsafe fn test_vbslq_p64() {
5166-
let a = u64x2::new(u64::MAX, 0);
5182+
let a = u64x2::new(u64::MAX, 1);
51675183
let b = u64x2::new(u64::MAX, u64::MAX);
51685184
let c = u64x2::new(u64::MIN, u64::MIN);
5169-
let e = u64x2::new(u64::MAX, u64::MIN);
5185+
let e = u64x2::new(u64::MAX, 1);
51705186
let r: u64x2 = transmute(vbslq_p64(transmute(a), transmute(b), transmute(c)));
51715187
assert_eq!(r, e);
51725188
}

0 commit comments

Comments
 (0)