@@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
204
204
ret <2 x float > %ret
205
205
}
206
206
207
+ define <2 x half > @atomic_vec2_half (ptr %x ) {
208
+ ; CHECK3-LABEL: atomic_vec2_half:
209
+ ; CHECK3: ## %bb.0:
210
+ ; CHECK3-NEXT: movl (%rdi), %eax
211
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
212
+ ; CHECK3-NEXT: shrl $16, %eax
213
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
214
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
215
+ ; CHECK3-NEXT: retq
216
+ ;
217
+ ; CHECK0-LABEL: atomic_vec2_half:
218
+ ; CHECK0: ## %bb.0:
219
+ ; CHECK0-NEXT: movl (%rdi), %eax
220
+ ; CHECK0-NEXT: movl %eax, %ecx
221
+ ; CHECK0-NEXT: shrl $16, %ecx
222
+ ; CHECK0-NEXT: movw %cx, %dx
223
+ ; CHECK0-NEXT: ## implicit-def: $ecx
224
+ ; CHECK0-NEXT: movw %dx, %cx
225
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
226
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
227
+ ; CHECK0-NEXT: movw %ax, %cx
228
+ ; CHECK0-NEXT: ## implicit-def: $eax
229
+ ; CHECK0-NEXT: movw %cx, %ax
230
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
231
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
232
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
233
+ ; CHECK0-NEXT: retq
234
+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
235
+ ret <2 x half > %ret
236
+ }
237
+
238
+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
239
+ ; CHECK3-LABEL: atomic_vec2_bfloat:
240
+ ; CHECK3: ## %bb.0:
241
+ ; CHECK3-NEXT: movl (%rdi), %eax
242
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
243
+ ; CHECK3-NEXT: shrl $16, %eax
244
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
245
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246
+ ; CHECK3-NEXT: retq
247
+ ;
248
+ ; CHECK0-LABEL: atomic_vec2_bfloat:
249
+ ; CHECK0: ## %bb.0:
250
+ ; CHECK0-NEXT: movl (%rdi), %eax
251
+ ; CHECK0-NEXT: movl %eax, %ecx
252
+ ; CHECK0-NEXT: shrl $16, %ecx
253
+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
254
+ ; CHECK0-NEXT: movw %ax, %dx
255
+ ; CHECK0-NEXT: ## implicit-def: $eax
256
+ ; CHECK0-NEXT: movw %dx, %ax
257
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
258
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
259
+ ; CHECK0-NEXT: ## implicit-def: $eax
260
+ ; CHECK0-NEXT: movw %cx, %ax
261
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
262
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
263
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
264
+ ; CHECK0-NEXT: retq
265
+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
266
+ ret <2 x bfloat> %ret
267
+ }
268
+
207
269
define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
208
270
; CHECK3-LABEL: atomic_vec1_ptr:
209
271
; CHECK3: ## %bb.0:
@@ -376,6 +438,115 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
376
438
ret <4 x i16 > %ret
377
439
}
378
440
441
+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
442
+ ; CHECK3-LABEL: atomic_vec4_half:
443
+ ; CHECK3: ## %bb.0:
444
+ ; CHECK3-NEXT: movq (%rdi), %rax
445
+ ; CHECK3-NEXT: movl %eax, %ecx
446
+ ; CHECK3-NEXT: shrl $16, %ecx
447
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
448
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
449
+ ; CHECK3-NEXT: movq %rax, %rcx
450
+ ; CHECK3-NEXT: shrq $32, %rcx
451
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
452
+ ; CHECK3-NEXT: shrq $48, %rax
453
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
454
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
455
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
456
+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
457
+ ; CHECK3-NEXT: retq
458
+ ;
459
+ ; CHECK0-LABEL: atomic_vec4_half:
460
+ ; CHECK0: ## %bb.0:
461
+ ; CHECK0-NEXT: movq (%rdi), %rax
462
+ ; CHECK0-NEXT: movl %eax, %ecx
463
+ ; CHECK0-NEXT: shrl $16, %ecx
464
+ ; CHECK0-NEXT: movw %cx, %dx
465
+ ; CHECK0-NEXT: ## implicit-def: $ecx
466
+ ; CHECK0-NEXT: movw %dx, %cx
467
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
468
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
469
+ ; CHECK0-NEXT: movw %ax, %dx
470
+ ; CHECK0-NEXT: ## implicit-def: $ecx
471
+ ; CHECK0-NEXT: movw %dx, %cx
472
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
473
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
474
+ ; CHECK0-NEXT: movq %rax, %rcx
475
+ ; CHECK0-NEXT: shrq $32, %rcx
476
+ ; CHECK0-NEXT: movw %cx, %dx
477
+ ; CHECK0-NEXT: ## implicit-def: $ecx
478
+ ; CHECK0-NEXT: movw %dx, %cx
479
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
480
+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
481
+ ; CHECK0-NEXT: shrq $48, %rax
482
+ ; CHECK0-NEXT: movw %ax, %cx
483
+ ; CHECK0-NEXT: ## implicit-def: $eax
484
+ ; CHECK0-NEXT: movw %cx, %ax
485
+ ; CHECK0-NEXT: ## implicit-def: $xmm3
486
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
487
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
488
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
489
+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490
+ ; CHECK0-NEXT: retq
491
+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
492
+ ret <4 x half > %ret
493
+ }
494
+
495
+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
496
+ ; CHECK3-LABEL: atomic_vec4_bfloat:
497
+ ; CHECK3: ## %bb.0:
498
+ ; CHECK3-NEXT: movq (%rdi), %rax
499
+ ; CHECK3-NEXT: movq %rax, %rcx
500
+ ; CHECK3-NEXT: movq %rax, %rdx
501
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
502
+ ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
503
+ ; CHECK3-NEXT: shrl $16, %eax
504
+ ; CHECK3-NEXT: shrq $32, %rcx
505
+ ; CHECK3-NEXT: shrq $48, %rdx
506
+ ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
507
+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
508
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
509
+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
510
+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
511
+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
512
+ ; CHECK3-NEXT: retq
513
+ ;
514
+ ; CHECK0-LABEL: atomic_vec4_bfloat:
515
+ ; CHECK0: ## %bb.0:
516
+ ; CHECK0-NEXT: movq (%rdi), %rax
517
+ ; CHECK0-NEXT: movl %eax, %ecx
518
+ ; CHECK0-NEXT: shrl $16, %ecx
519
+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
520
+ ; CHECK0-NEXT: movw %ax, %dx
521
+ ; CHECK0-NEXT: movq %rax, %rsi
522
+ ; CHECK0-NEXT: shrq $32, %rsi
523
+ ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
524
+ ; CHECK0-NEXT: shrq $48, %rax
525
+ ; CHECK0-NEXT: movw %ax, %di
526
+ ; CHECK0-NEXT: ## implicit-def: $eax
527
+ ; CHECK0-NEXT: movw %di, %ax
528
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
529
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
530
+ ; CHECK0-NEXT: ## implicit-def: $eax
531
+ ; CHECK0-NEXT: movw %si, %ax
532
+ ; CHECK0-NEXT: ## implicit-def: $xmm1
533
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
534
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
535
+ ; CHECK0-NEXT: ## implicit-def: $eax
536
+ ; CHECK0-NEXT: movw %dx, %ax
537
+ ; CHECK0-NEXT: ## implicit-def: $xmm0
538
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
539
+ ; CHECK0-NEXT: ## implicit-def: $eax
540
+ ; CHECK0-NEXT: movw %cx, %ax
541
+ ; CHECK0-NEXT: ## implicit-def: $xmm2
542
+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543
+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544
+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545
+ ; CHECK0-NEXT: retq
546
+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
547
+ ret <4 x bfloat> %ret
548
+ }
549
+
379
550
define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
380
551
; CHECK-LABEL: atomic_vec4_float_align:
381
552
; CHECK: ## %bb.0:
0 commit comments