@@ -204,6 +204,76 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
204
204
ret <2 x float > %ret
205
205
}
206
206
207
+ define <2 x half > @atomic_vec2_half (ptr %x ) {
208
+ ; CHECK3-LABEL: atomic_vec2_half:
209
+ ; CHECK3: ## %bb.0:
210
+ ; CHECK3-NEXT: movl (%rdi), %eax
211
+ ; CHECK3-NEXT: movd %eax, %xmm1
212
+ ; CHECK3-NEXT: shrl $16, %eax
213
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
214
+ ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
215
+ ; CHECK3-NEXT: pand %xmm0, %xmm1
216
+ ; CHECK3-NEXT: pslld $16, %xmm2
217
+ ; CHECK3-NEXT: pandn %xmm2, %xmm0
218
+ ; CHECK3-NEXT: por %xmm1, %xmm0
219
+ ; CHECK3-NEXT: retq
220
+ ;
221
+ ; CHECK0-LABEL: atomic_vec2_half:
222
+ ; CHECK0: ## %bb.0:
223
+ ; CHECK0-NEXT: movl (%rdi), %eax
224
+ ; CHECK0-NEXT: movl %eax, %ecx
225
+ ; CHECK0-NEXT: shrl $16, %ecx
226
+ ; CHECK0-NEXT: movw %cx, %dx
227
+ ; CHECK0-NEXT: ## implicit-def: $ecx
228
+ ; CHECK0-NEXT: movw %dx, %cx
229
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
230
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
231
+ ; CHECK0-NEXT: movd %eax, %xmm0
232
+ ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
233
+ ; CHECK0-NEXT: pand %xmm1, %xmm0
234
+ ; CHECK0-NEXT: pslld $16, %xmm2
235
+ ; CHECK0-NEXT: pandn %xmm2, %xmm1
236
+ ; CHECK0-NEXT: por %xmm1, %xmm0
237
+ ; CHECK0-NEXT: retq
238
+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
239
+ ret <2 x half > %ret
240
+ }
241
+
242
+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
243
+ ; CHECK3-LABEL: atomic_vec2_bfloat:
244
+ ; CHECK3: ## %bb.0:
245
+ ; CHECK3-NEXT: movl (%rdi), %eax
246
+ ; CHECK3-NEXT: movd %eax, %xmm1
247
+ ; CHECK3-NEXT: shrl $16, %eax
248
+ ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
249
+ ; CHECK3-NEXT: pand %xmm0, %xmm1
250
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
251
+ ; CHECK3-NEXT: pslld $16, %xmm2
252
+ ; CHECK3-NEXT: pandn %xmm2, %xmm0
253
+ ; CHECK3-NEXT: por %xmm1, %xmm0
254
+ ; CHECK3-NEXT: retq
255
+ ;
256
+ ; CHECK0-LABEL: atomic_vec2_bfloat:
257
+ ; CHECK0: ## %bb.0:
258
+ ; CHECK0-NEXT: movl (%rdi), %eax
259
+ ; CHECK0-NEXT: movl %eax, %ecx
260
+ ; CHECK0-NEXT: shrl $16, %ecx
261
+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
262
+ ; CHECK0-NEXT: movd %eax, %xmm0
263
+ ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
264
+ ; CHECK0-NEXT: pand %xmm1, %xmm0
265
+ ; CHECK0-NEXT: ## implicit-def: $eax
266
+ ; CHECK0-NEXT: movw %cx, %ax
267
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
268
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
269
+ ; CHECK0-NEXT: pslld $16, %xmm2
270
+ ; CHECK0-NEXT: pandn %xmm2, %xmm1
271
+ ; CHECK0-NEXT: por %xmm1, %xmm0
272
+ ; CHECK0-NEXT: retq
273
+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
274
+ ret <2 x bfloat> %ret
275
+ }
276
+
207
277
define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
208
278
; CHECK3-LABEL: atomic_vec1_ptr:
209
279
; CHECK3: ## %bb.0:
@@ -376,6 +446,107 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
376
446
ret <4 x i16 > %ret
377
447
}
378
448
449
+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
450
+ ; CHECK3-LABEL: atomic_vec4_half:
451
+ ; CHECK3: ## %bb.0:
452
+ ; CHECK3-NEXT: movq (%rdi), %rax
453
+ ; CHECK3-NEXT: movl %eax, %ecx
454
+ ; CHECK3-NEXT: shrl $16, %ecx
455
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
456
+ ; CHECK3-NEXT: movq %rax, %rcx
457
+ ; CHECK3-NEXT: shrq $32, %rcx
458
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
459
+ ; CHECK3-NEXT: movq %rax, %xmm0
460
+ ; CHECK3-NEXT: shrq $48, %rax
461
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
462
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
463
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
464
+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
465
+ ; CHECK3-NEXT: retq
466
+ ;
467
+ ; CHECK0-LABEL: atomic_vec4_half:
468
+ ; CHECK0: ## %bb.0:
469
+ ; CHECK0-NEXT: movq (%rdi), %rax
470
+ ; CHECK0-NEXT: movl %eax, %ecx
471
+ ; CHECK0-NEXT: shrl $16, %ecx
472
+ ; CHECK0-NEXT: movw %cx, %dx
473
+ ; CHECK0-NEXT: ## implicit-def: $ecx
474
+ ; CHECK0-NEXT: movw %dx, %cx
475
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
476
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
477
+ ; CHECK0-NEXT: movq %rax, %rcx
478
+ ; CHECK0-NEXT: shrq $32, %rcx
479
+ ; CHECK0-NEXT: movw %cx, %dx
480
+ ; CHECK0-NEXT: ## implicit-def: $ecx
481
+ ; CHECK0-NEXT: movw %dx, %cx
482
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
483
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
484
+ ; CHECK0-NEXT: movq %rax, %rcx
485
+ ; CHECK0-NEXT: shrq $48, %rcx
486
+ ; CHECK0-NEXT: movw %cx, %dx
487
+ ; CHECK0-NEXT: ## implicit-def: $ecx
488
+ ; CHECK0-NEXT: movw %dx, %cx
489
+ ; CHECK0-NEXT: ## implicit-def: $xmm3
490
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm3
491
+ ; CHECK0-NEXT: movq %rax, %xmm0
492
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
493
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
494
+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
495
+ ; CHECK0-NEXT: retq
496
+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
497
+ ret <4 x half > %ret
498
+ }
499
+
500
+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
501
+ ; CHECK3-LABEL: atomic_vec4_bfloat:
502
+ ; CHECK3: ## %bb.0:
503
+ ; CHECK3-NEXT: movq (%rdi), %rax
504
+ ; CHECK3-NEXT: movq %rax, %xmm0
505
+ ; CHECK3-NEXT: movl %eax, %ecx
506
+ ; CHECK3-NEXT: shrl $16, %ecx
507
+ ; CHECK3-NEXT: movq %rax, %rdx
508
+ ; CHECK3-NEXT: shrq $32, %rdx
509
+ ; CHECK3-NEXT: shrq $48, %rax
510
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511
+ ; CHECK3-NEXT: pinsrw $0, %edx, %xmm2
512
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
513
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
514
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
515
+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
516
+ ; CHECK3-NEXT: retq
517
+ ;
518
+ ; CHECK0-LABEL: atomic_vec4_bfloat:
519
+ ; CHECK0: ## %bb.0:
520
+ ; CHECK0-NEXT: movq (%rdi), %rax
521
+ ; CHECK0-NEXT: movq %rax, %xmm0
522
+ ; CHECK0-NEXT: movl %eax, %ecx
523
+ ; CHECK0-NEXT: shrl $16, %ecx
524
+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
525
+ ; CHECK0-NEXT: movq %rax, %rdx
526
+ ; CHECK0-NEXT: shrq $32, %rdx
527
+ ; CHECK0-NEXT: ## kill: def $dx killed $dx killed $rdx
528
+ ; CHECK0-NEXT: shrq $48, %rax
529
+ ; CHECK0-NEXT: movw %ax, %si
530
+ ; CHECK0-NEXT: ## implicit-def: $eax
531
+ ; CHECK0-NEXT: movw %si, %ax
532
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
533
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
534
+ ; CHECK0-NEXT: ## implicit-def: $eax
535
+ ; CHECK0-NEXT: movw %dx, %ax
536
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
537
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
538
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
539
+ ; CHECK0-NEXT: ## implicit-def: $eax
540
+ ; CHECK0-NEXT: movw %cx, %ax
541
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
542
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544
+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545
+ ; CHECK0-NEXT: retq
546
+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
547
+ ret <4 x bfloat> %ret
548
+ }
549
+
379
550
define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
380
551
; CHECK-LABEL: atomic_vec4_float_align:
381
552
; CHECK: ## %bb.0:
0 commit comments