[SelectionDAG] Widen <2 x T> vector types for atomic load #120598

jofrn · 2024-12-19T16:24:21Z

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

Stack:

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.

llvmbot · 2024-12-19T16:24:58Z

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-llvm-selectiondag

Author: None (jofrn)

Changes

load atomic <n x T> is not valid. This change widens
vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

Stack:

#120598 ⬅
#120387
#120386
#120385
#120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.

Full diff: https://github.com/llvm/llvm-project/pull/120598.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+3-1)
(modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+36-3)
(modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+10)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b81c9f87cb27d7..22b7c15f8768ae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode* N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   /// resulting wider type. It takes:
   ///   LdChain: list of chains for the load to be generated.
   ///   Ld:      load to widen
+  template <typename T>
   SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
-                              LoadSDNode *LD);
+                              T *LD, bool IsAtomic = false);
 
   /// Helper function to generate a set of extension loads to load a vector with
   /// a resulting wider type. It takes:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c85e4ba2cfa5a7..4dfdd22ba27869 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+    Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+    break;
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
                      N->getOperand(1), N->getOperand(2));
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+  SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/);
+
+  if (Result) {
+    // If we generate a single load, we can use that for the chain.  Otherwise,
+    // build a factor node to remember the multiple loads are independent and
+    // chain to that.
+    SDValue NewChain;
+    if (LdChain.size() == 1)
+      NewChain = LdChain[0];
+    else
+      NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
+
+    // Modified the chain - switch anything that used the old chain to use
+    // the new one.
+    ReplaceValueWith(SDValue(N, 1), NewChain);
+
+    return Result;
+  }
+
+  report_fatal_error("Unable to widen atomic vector load");
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
   return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
 
+template <typename T>
 SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
-                                              LoadSDNode *LD) {
+                                              T *LD, bool IsAtomic) {
   // The strategy assumes that we can efficiently load power-of-two widths.
   // The routine chops the vector into the largest vector loads with the same
   // element type or scalar loads and then recombines it to the widen vector
@@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
     } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
   }
 
-  SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
-                             LD->getOriginalAlign(), MMOFlags, AAInfo);
+  SDValue LdOp;
+  if (IsAtomic)
+    LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
+        BasePtr, LD->getMemOperand());
+  else
+    LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+        LD->getOriginalAlign(), MMOFlags, AAInfo);
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction.
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index a57ae767859b10..f3cc39dba95253 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,16 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <2 x i32> @atomic_vec2_i32_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_i32_align:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 8
+  ret <2 x i32> %ret
+}
+
 define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
 ; CHECK3-LABEL: atomic_vec1_ptr:
 ; CHECK3:       ## %bb.0:

github-actions · 2024-12-19T16:28:03Z

✅ With the latest revision this PR passed the C/C++ code formatter.

arsenm · 2025-04-25T21:19:14Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+    EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+    SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+    return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  } else if (FirstVT == WidenVT)


No else after return

arsenm · 2025-04-25T21:23:06Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+  // Allow wider loads if they are sufficiently aligned to avoid memory faults
+  // and if the original load is simple.
+  unsigned LdAlign =
+      (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();


The load is not simple, this is an atomic load

arsenm · 2025-04-25T21:23:33Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

@@ -5982,6 +5985,89 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
                     N->getOperand(1), N->getOperand(2));
 }

+static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT,


Document this. The name is misleading, no load is performed here.

We also have way too many places essentially repeating the same bit coercion dance

arsenm · 2025-04-25T21:25:09Z

llvm/lib/Target/X86/X86InstrCompiler.td

+def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)))))),
+           (MOVDI2PDIrm addr:$src)>;   // load atomic <2 x i8>
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
+           (MOVDI2PDIrm addr:$src)>;   // load atomic <2 x i16>
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
+           (MOV64toPQIrm  addr:$src)>; // load atomic <2 x i32,float>


Can this be done as a separate step? This at most saves 1 instruction over the naive lowering?

Do you mean as a separate PR/commit?

arsenm · 2025-04-25T21:26:06Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+    return Result;
+  }
+
+  report_fatal_error("Unable to widen atomic vector load");


loadElement has no failure path, just delete the check and report_fatal_error

arsenm · 2025-04-27T08:27:01Z

Can you stop making no-op updates to these PRs? If there is an update, there should be some reviewable change. Right now this is just spamming everyones notifications

arsenm · 2025-05-05T18:29:59Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+    unsigned NumConcat =
+        WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+    SmallVector<SDValue, 16> ConcatOps(NumConcat);
+    SDValue UndefVal = DAG.getUNDEF(FirstVT);


arsenm · 2025-05-05T18:30:11Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+  } else if (FirstVT == WidenVT)
+    return LdOp;
+  else {
+    // TODO: We don't currently have any tests that exercise this code path.


Then just assert and don't implement

Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Dec 19, 2024

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 37e162b to f7691a8 Compare December 19, 2024 16:42

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch 2 times, most recently from cbaaeeb to d558091 Compare December 19, 2024 19:15

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 34492d9 to 6952507 Compare December 19, 2024 19:36

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from d558091 to 3f37dfc Compare December 19, 2024 19:36

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 6952507 to 3cf21cf Compare December 19, 2024 19:43

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 3f37dfc to 89503f2 Compare December 19, 2024 19:43

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 3cf21cf to a769a32 Compare December 19, 2024 21:28

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 89503f2 to b2f0b33 Compare December 19, 2024 21:28

jofrn mentioned this pull request Dec 19, 2024

[SelectionDAG] Split vector types for atomic load #120640

Open

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from b2f0b33 to 6737dda Compare December 19, 2024 21:33

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 464bb05 to ebba505 Compare December 19, 2024 21:59

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 6737dda to 2949391 Compare December 19, 2024 21:59

RKSimon requested review from arsenm, RKSimon and jyknight December 20, 2024 10:02

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from ebba505 to 5bbc421 Compare December 20, 2024 11:25

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 2949391 to 78adf01 Compare December 20, 2024 11:25

jofrn mentioned this pull request Dec 20, 2024

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

Open

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 5bbc421 to 746725f Compare December 20, 2024 11:39

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 05e7afd to 03400f8 Compare March 3, 2025 23:27

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 03400f8 to 40f307c Compare April 25, 2025 20:51

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from bc6c355 to 51488b8 Compare April 25, 2025 20:51

arsenm reviewed Apr 25, 2025

View reviewed changes

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 51488b8 to e262387 Compare April 26, 2025 04:07

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 40f307c to bca16c0 Compare April 26, 2025 04:07

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from e262387 to fc42ee7 Compare April 26, 2025 07:57

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from bca16c0 to 3fd1c8a Compare April 26, 2025 07:57

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from fc42ee7 to 867af9f Compare May 1, 2025 04:14

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 3fd1c8a to a60144b Compare May 1, 2025 04:14

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 867af9f to 340dec5 Compare May 1, 2025 04:39

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from d76acb2 to 6d04529 Compare May 1, 2025 06:45

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 340dec5 to 5bc5d32 Compare May 1, 2025 06:45

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 6d04529 to ebd2298 Compare May 5, 2025 17:45

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 5bc5d32 to 8da6708 Compare May 5, 2025 17:45

arsenm reviewed May 5, 2025

View reviewed changes

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from ebd2298 to acfcbcc Compare May 6, 2025 03:50

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 8da6708 to 218ce15 Compare May 6, 2025 03:50

jofrn changed the base branch from users/jofrn/spr/main/a06a5cc6 to main May 6, 2025 06:03

jofrn changed the title ~~[SelectionDAG][X86] Widen <2 x T> vector types for atomic load~~ [SelectionDAG] Widen <2 x T> vector types for atomic load May 6, 2025

jofrn mentioned this pull request May 6, 2025

[X86] Remove extra MOV after widening atomic load #138635

Open

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 218ce15 to 0b8a020 Compare May 6, 2025 06:03

jofrn changed the base branch from main to users/jofrn/spr/main/a06a5cc6 May 6, 2025 06:04

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 93e8bef to a9729ee Compare May 6, 2025 15:04

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 0b8a020 to 561e379 Compare May 6, 2025 15:04

[SelectionDAG] Widen <2 x T> vector types for atomic load

99a560f

Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1

jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from a9729ee to 5ce8ea6 Compare May 7, 2025 12:53

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 561e379 to 99a560f Compare May 7, 2025 12:53

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SelectionDAG] Widen <2 x T> vector types for atomic load #120598

[SelectionDAG] Widen <2 x T> vector types for atomic load #120598

jofrn commented Dec 19, 2024 •

edited

Loading

llvmbot commented Dec 19, 2024 •

edited

Loading

github-actions bot commented Dec 19, 2024 •

edited

Loading

arsenm Apr 25, 2025

arsenm Apr 25, 2025

arsenm Apr 25, 2025

arsenm Apr 25, 2025

jofrn May 1, 2025

arsenm May 1, 2025

arsenm Apr 25, 2025

arsenm commented Apr 27, 2025

arsenm May 5, 2025

arsenm May 5, 2025

[SelectionDAG] Widen <2 x T> vector types for atomic load #120598

Are you sure you want to change the base?

[SelectionDAG] Widen <2 x T> vector types for atomic load #120598

Conversation

jofrn commented Dec 19, 2024 • edited Loading

llvmbot commented Dec 19, 2024 • edited Loading

github-actions bot commented Dec 19, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

arsenm commented Apr 27, 2025

Choose a reason for hiding this comment

Choose a reason for hiding this comment

jofrn commented Dec 19, 2024 •

edited

Loading

llvmbot commented Dec 19, 2024 •

edited

Loading

github-actions bot commented Dec 19, 2024 •

edited

Loading