Fix target-cpu fpu features on Armv8-R.

chrisnc · chrisnc · commit 1df34e4a8230 · 2024-09-14T18:28:40.000-07:00
This is a follow-up to #123159, but applied to Armv8-R. This required llvm/llvm-project#88287 to work properly. Now that this change exists in rustc's llvm, we can fix Armv8-R's default fpu features. In Armv8-R's case, the default features from LLVM for floating-point are sufficient, because there is no integer-only variant of this architecture. Add a run-make test that an appropriate target-cpu enables double-precision and neon for thumbv7em-none-eabihf and thumbv8m.main-none-eabihf.
diff --git a/compiler/rustc_target/src/spec/targets/armv8r_none_eabihf.rs b/compiler/rustc_target/src/spec/targets/armv8r_none_eabihf.rs
@@ -21,16 +21,16 @@ pub(crate) fn target() -> Target {
             linker: Some("rust-lld".into()),
             relocation_model: RelocModel::Static,
             panic_strategy: PanicStrategy::Abort,
-            // The Cortex-R52 has two variants with respect to floating-point support:
-            // 1. fp-armv8, SP-only, with 16 DP (32 SP) registers
-            // 2. neon-fp-armv8, SP+DP, with 32 DP registers
-            // Use the lesser of these two options as the default, as it will produce code
-            // compatible with either variant.
+            // Armv8-R requires a minimum set of floating-point features equivalent to:
+            // fp-armv8, SP-only, with 16 DP (32 SP) registers
+            // LLVM defines Armv8-R to include these features automatically.
+            //
+            // The Cortex-R52 supports these default features and optionally includes:
+            // neon-fp-armv8, SP+DP, with 32 DP registers
             //
             // Reference:
             // Arm Cortex-R52 Processor Technical Reference Manual
             // - Chapter 15 Advanced SIMD and floating-point support
-            features: "+fp-armv8,-fp64,-d32".into(),
             max_atomic_width: Some(64),
             emit_debug_gdb_scripts: false,
             // GCC defaults to 8 for arm-none here.
diff --git a/tests/run-make/arm-target-cpu-features/cortex-m7.checks b/tests/run-make/arm-target-cpu-features/cortex-m7.checks
@@ -0,0 +1,7 @@
+// Cortex-M7 does not have Advanced SIMD, so don't check anything in vadd_f32_q.
+CHECK-LABEL: vadd_f32_q:
+
+// Cortex-M7 enables double-precision.
+CHECK-LABEL: vadd_f64:
+CHECK: vadd.f64 d0, d0, d1
+CHECK: bx lr
diff --git a/tests/run-make/arm-target-cpu-features/cortex-m85.checks b/tests/run-make/arm-target-cpu-features/cortex-m85.checks
@@ -0,0 +1,12 @@
+// Cortex-M85 enables the Helium instructions.
+CHECK-LABEL: vadd_f32_q:
+CHECK: vld{{.*}}
+CHECK: vld{{.*}}
+CHECK: vadd.f32{{.*}}q
+CHECK: vst{{.*}} [r0]
+CHECK: bx lr
+
+// Cortex-M85 enables double-precision.
+CHECK-LABEL: vadd_f64:
+CHECK: vadd.f64 d0, d0, d1
+CHECK: bx lr
diff --git a/tests/run-make/arm-target-cpu-features/lib.rs b/tests/run-make/arm-target-cpu-features/lib.rs
@@ -0,0 +1,13 @@
+#![no_std]
+
+#[no_mangle]
+pub fn vadd_f32_q(x: &mut [f32; 4], y: &[f32; 4]) {
+    for i in 0..4 {
+        x[i] += y[i];
+    }
+}
+
+#[no_mangle]
+pub fn vadd_f64(x: f64, y: f64) -> f64 {
+    x + y
+}
diff --git a/tests/run-make/arm-target-cpu-features/rmake.rs b/tests/run-make/arm-target-cpu-features/rmake.rs
@@ -0,0 +1,55 @@
+// This tests that target-cpu correctly enables additional features for some Arm targets.
+// These targets were originally defined in such a way that features provided by target-cpu would be
+// disabled by the target spec itself. This was fixed in #123159.
+
+// FIXME: This test should move to tests/assembly when building without #![no_core] in
+// that environment is possible, tracked by #130375.
+
+use run_make_support::{llvm_filecheck, llvm_objdump, rustc, static_lib_name};
+
+struct TestCase {
+    target: &'static str,
+    cpu: &'static str,
+}
+
+static CASES: &[TestCase] = &[
+    TestCase { target: "thumbv7em-none-eabihf", cpu: "cortex-m7" },
+    TestCase { target: "thumbv8m.main-none-eabihf", cpu: "cortex-m85" },
+];
+
+fn main() {
+    for case in CASES {
+        let lib = static_lib_name(case.cpu);
+        let checks = format!("{}.checks", case.cpu);
+
+        let rustc_command = || {
+            let mut cmd = rustc();
+            cmd.edition("2021")
+                .target(case.target)
+                .arg("-Copt-level=3")
+                .crate_type("rlib")
+                .input("lib.rs")
+                .output(&lib);
+            cmd
+        };
+
+        let objdump_command = || {
+            let mut cmd = llvm_objdump();
+            cmd.arg("--arch-name=arm")
+                .arg(format!("--mcpu={}", case.cpu))
+                .disassemble()
+                .input(&lib);
+            cmd
+        };
+
+        // First, run without target-cpu and confirm that it fails.
+        rustc_command().run();
+        let dis = objdump_command().run().stdout_utf8();
+        llvm_filecheck().patterns(&checks).stdin_buf(dis).run_fail();
+
+        // Then, run with target-cpu and confirm that it succeeds.
+        rustc_command().arg(format!("-Ctarget-cpu={}", case.cpu)).run();
+        let dis = objdump_command().run().stdout_utf8();
+        llvm_filecheck().patterns(&checks).stdin_buf(dis).run();
+    }
+}