Skip to content

Commit 76e0683

Browse files
committed
[SelectionDAG][X86] Split via Concat <n x T> vector types for atomic load
Vector types that aren't widened are 'split' via CONCAT_VECTORS so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357
1 parent 4df72a2 commit 76e0683

File tree

3 files changed

+204
-0
lines changed

3 files changed

+204
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

+1
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
948948
void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
949949
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
950950
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
951+
void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD);
951952
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
952953
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
953954
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+32
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
11521152
SplitVecRes_STEP_VECTOR(N, Lo, Hi);
11531153
break;
11541154
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
1155+
case ISD::ATOMIC_LOAD:
1156+
SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
1157+
break;
11551158
case ISD::LOAD:
11561159
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
11571160
break;
@@ -1395,6 +1398,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
13951398
SetSplitVector(SDValue(N, ResNo), Lo, Hi);
13961399
}
13971400

1401+
void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
1402+
SDLoc dl(LD);
1403+
1404+
EVT MemoryVT = LD->getMemoryVT();
1405+
unsigned NumElts = MemoryVT.getVectorMinNumElements();
1406+
1407+
EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
1408+
EVT ElemVT = EVT::getVectorVT(*DAG.getContext(),
1409+
MemoryVT.getVectorElementType(), 1);
1410+
1411+
// Create a single atomic to load all the elements at once.
1412+
SDValue Atomic = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT,
1413+
LD->getChain(), LD->getBasePtr(),
1414+
LD->getMemOperand());
1415+
1416+
// Instead of splitting, put all the elements back into a vector.
1417+
SmallVector<SDValue, 4> Ops;
1418+
for (unsigned i = 0; i < NumElts; ++i) {
1419+
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic,
1420+
DAG.getVectorIdxConstant(i, dl));
1421+
Elt = DAG.getBitcast(ElemVT, Elt);
1422+
Ops.push_back(Elt);
1423+
}
1424+
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops);
1425+
1426+
ReplaceValueWith(SDValue(LD, 0), Concat);
1427+
ReplaceValueWith(SDValue(LD, 1), LD->getChain());
1428+
}
1429+
13981430
void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
13991431
MachinePointerInfo &MPI, SDValue &Ptr,
14001432
uint64_t *ScaledOffset) {

llvm/test/CodeGen/X86/atomic-load-store.ll

+171
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
204204
ret <2 x float> %ret
205205
}
206206

207+
define <2 x half> @atomic_vec2_half(ptr %x) {
208+
; CHECK3-LABEL: atomic_vec2_half:
209+
; CHECK3: ## %bb.0:
210+
; CHECK3-NEXT: movl (%rdi), %eax
211+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
212+
; CHECK3-NEXT: shrl $16, %eax
213+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
214+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
215+
; CHECK3-NEXT: retq
216+
;
217+
; CHECK0-LABEL: atomic_vec2_half:
218+
; CHECK0: ## %bb.0:
219+
; CHECK0-NEXT: movl (%rdi), %eax
220+
; CHECK0-NEXT: movl %eax, %ecx
221+
; CHECK0-NEXT: shrl $16, %ecx
222+
; CHECK0-NEXT: movw %cx, %dx
223+
; CHECK0-NEXT: ## implicit-def: $ecx
224+
; CHECK0-NEXT: movw %dx, %cx
225+
; CHECK0-NEXT: ## implicit-def: $xmm1
226+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
227+
; CHECK0-NEXT: movw %ax, %cx
228+
; CHECK0-NEXT: ## implicit-def: $eax
229+
; CHECK0-NEXT: movw %cx, %ax
230+
; CHECK0-NEXT: ## implicit-def: $xmm0
231+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
232+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
233+
; CHECK0-NEXT: retq
234+
%ret = load atomic <2 x half>, ptr %x acquire, align 4
235+
ret <2 x half> %ret
236+
}
237+
238+
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
239+
; CHECK3-LABEL: atomic_vec2_bfloat:
240+
; CHECK3: ## %bb.0:
241+
; CHECK3-NEXT: movl (%rdi), %eax
242+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
243+
; CHECK3-NEXT: shrl $16, %eax
244+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
245+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246+
; CHECK3-NEXT: retq
247+
;
248+
; CHECK0-LABEL: atomic_vec2_bfloat:
249+
; CHECK0: ## %bb.0:
250+
; CHECK0-NEXT: movl (%rdi), %eax
251+
; CHECK0-NEXT: movl %eax, %ecx
252+
; CHECK0-NEXT: shrl $16, %ecx
253+
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
254+
; CHECK0-NEXT: movw %ax, %dx
255+
; CHECK0-NEXT: ## implicit-def: $eax
256+
; CHECK0-NEXT: movw %dx, %ax
257+
; CHECK0-NEXT: ## implicit-def: $xmm0
258+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
259+
; CHECK0-NEXT: ## implicit-def: $eax
260+
; CHECK0-NEXT: movw %cx, %ax
261+
; CHECK0-NEXT: ## implicit-def: $xmm1
262+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
263+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
264+
; CHECK0-NEXT: retq
265+
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
266+
ret <2 x bfloat> %ret
267+
}
268+
207269
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
208270
; CHECK3-LABEL: atomic_vec1_ptr:
209271
; CHECK3: ## %bb.0:
@@ -376,6 +438,115 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
376438
ret <4 x i16> %ret
377439
}
378440

441+
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
442+
; CHECK3-LABEL: atomic_vec4_half:
443+
; CHECK3: ## %bb.0:
444+
; CHECK3-NEXT: movq (%rdi), %rax
445+
; CHECK3-NEXT: movl %eax, %ecx
446+
; CHECK3-NEXT: shrl $16, %ecx
447+
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
448+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
449+
; CHECK3-NEXT: movq %rax, %rcx
450+
; CHECK3-NEXT: shrq $32, %rcx
451+
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
452+
; CHECK3-NEXT: shrq $48, %rax
453+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
454+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
455+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
456+
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
457+
; CHECK3-NEXT: retq
458+
;
459+
; CHECK0-LABEL: atomic_vec4_half:
460+
; CHECK0: ## %bb.0:
461+
; CHECK0-NEXT: movq (%rdi), %rax
462+
; CHECK0-NEXT: movl %eax, %ecx
463+
; CHECK0-NEXT: shrl $16, %ecx
464+
; CHECK0-NEXT: movw %cx, %dx
465+
; CHECK0-NEXT: ## implicit-def: $ecx
466+
; CHECK0-NEXT: movw %dx, %cx
467+
; CHECK0-NEXT: ## implicit-def: $xmm2
468+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
469+
; CHECK0-NEXT: movw %ax, %dx
470+
; CHECK0-NEXT: ## implicit-def: $ecx
471+
; CHECK0-NEXT: movw %dx, %cx
472+
; CHECK0-NEXT: ## implicit-def: $xmm0
473+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
474+
; CHECK0-NEXT: movq %rax, %rcx
475+
; CHECK0-NEXT: shrq $32, %rcx
476+
; CHECK0-NEXT: movw %cx, %dx
477+
; CHECK0-NEXT: ## implicit-def: $ecx
478+
; CHECK0-NEXT: movw %dx, %cx
479+
; CHECK0-NEXT: ## implicit-def: $xmm1
480+
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
481+
; CHECK0-NEXT: shrq $48, %rax
482+
; CHECK0-NEXT: movw %ax, %cx
483+
; CHECK0-NEXT: ## implicit-def: $eax
484+
; CHECK0-NEXT: movw %cx, %ax
485+
; CHECK0-NEXT: ## implicit-def: $xmm3
486+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
487+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
488+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
489+
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490+
; CHECK0-NEXT: retq
491+
%ret = load atomic <4 x half>, ptr %x acquire, align 8
492+
ret <4 x half> %ret
493+
}
494+
495+
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
496+
; CHECK3-LABEL: atomic_vec4_bfloat:
497+
; CHECK3: ## %bb.0:
498+
; CHECK3-NEXT: movq (%rdi), %rax
499+
; CHECK3-NEXT: movq %rax, %rcx
500+
; CHECK3-NEXT: movq %rax, %rdx
501+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
502+
; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
503+
; CHECK3-NEXT: shrl $16, %eax
504+
; CHECK3-NEXT: shrq $32, %rcx
505+
; CHECK3-NEXT: shrq $48, %rdx
506+
; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
507+
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
508+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
509+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
510+
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
511+
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
512+
; CHECK3-NEXT: retq
513+
;
514+
; CHECK0-LABEL: atomic_vec4_bfloat:
515+
; CHECK0: ## %bb.0:
516+
; CHECK0-NEXT: movq (%rdi), %rax
517+
; CHECK0-NEXT: movl %eax, %ecx
518+
; CHECK0-NEXT: shrl $16, %ecx
519+
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
520+
; CHECK0-NEXT: movw %ax, %dx
521+
; CHECK0-NEXT: movq %rax, %rsi
522+
; CHECK0-NEXT: shrq $32, %rsi
523+
; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
524+
; CHECK0-NEXT: shrq $48, %rax
525+
; CHECK0-NEXT: movw %ax, %di
526+
; CHECK0-NEXT: ## implicit-def: $eax
527+
; CHECK0-NEXT: movw %di, %ax
528+
; CHECK0-NEXT: ## implicit-def: $xmm0
529+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
530+
; CHECK0-NEXT: ## implicit-def: $eax
531+
; CHECK0-NEXT: movw %si, %ax
532+
; CHECK0-NEXT: ## implicit-def: $xmm1
533+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
534+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
535+
; CHECK0-NEXT: ## implicit-def: $eax
536+
; CHECK0-NEXT: movw %dx, %ax
537+
; CHECK0-NEXT: ## implicit-def: $xmm0
538+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
539+
; CHECK0-NEXT: ## implicit-def: $eax
540+
; CHECK0-NEXT: movw %cx, %ax
541+
; CHECK0-NEXT: ## implicit-def: $xmm2
542+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543+
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544+
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545+
; CHECK0-NEXT: retq
546+
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
547+
ret <4 x bfloat> %ret
548+
}
549+
379550
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
380551
; CHECK-LABEL: atomic_vec4_float_align:
381552
; CHECK: ## %bb.0:

0 commit comments

Comments
 (0)