@@ -205,71 +205,19 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205
205
}
206
206
207
207
define <2 x half> @atomic_vec2_half(ptr %x) {
208
- ; CHECK3-LABEL: atomic_vec2_half:
209
- ; CHECK3: ## %bb.0:
210
- ; CHECK3-NEXT: movl (%rdi), %eax
211
- ; CHECK3-NEXT: movd %eax, %xmm1
212
- ; CHECK3-NEXT: shrl $16, %eax
213
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
214
- ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
215
- ; CHECK3-NEXT: pand %xmm0, %xmm1
216
- ; CHECK3-NEXT: pslld $16, %xmm2
217
- ; CHECK3-NEXT: pandn %xmm2, %xmm0
218
- ; CHECK3-NEXT: por %xmm1, %xmm0
219
- ; CHECK3-NEXT: retq
220
- ;
221
- ; CHECK0-LABEL: atomic_vec2_half:
222
- ; CHECK0: ## %bb.0:
223
- ; CHECK0-NEXT: movl (%rdi), %eax
224
- ; CHECK0-NEXT: movl %eax, %ecx
225
- ; CHECK0-NEXT: shrl $16, %ecx
226
- ; CHECK0-NEXT: movw %cx, %dx
227
- ; CHECK0-NEXT: ## implicit-def: $ecx
228
- ; CHECK0-NEXT: movw %dx, %cx
229
- ; CHECK0-NEXT: ## implicit-def: $xmm2
230
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
231
- ; CHECK0-NEXT: movd %eax, %xmm0
232
- ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
233
- ; CHECK0-NEXT: pand %xmm1, %xmm0
234
- ; CHECK0-NEXT: pslld $16, %xmm2
235
- ; CHECK0-NEXT: pandn %xmm2, %xmm1
236
- ; CHECK0-NEXT: por %xmm1, %xmm0
237
- ; CHECK0-NEXT: retq
208
+ ; CHECK-LABEL: atomic_vec2_half:
209
+ ; CHECK: ## %bb.0:
210
+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211
+ ; CHECK-NEXT: retq
238
212
%ret = load atomic <2 x half>, ptr %x acquire, align 4
239
213
ret <2 x half> %ret
240
214
}
241
215
242
216
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
243
- ; CHECK3-LABEL: atomic_vec2_bfloat:
244
- ; CHECK3: ## %bb.0:
245
- ; CHECK3-NEXT: movl (%rdi), %eax
246
- ; CHECK3-NEXT: movd %eax, %xmm1
247
- ; CHECK3-NEXT: shrl $16, %eax
248
- ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
249
- ; CHECK3-NEXT: pand %xmm0, %xmm1
250
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
251
- ; CHECK3-NEXT: pslld $16, %xmm2
252
- ; CHECK3-NEXT: pandn %xmm2, %xmm0
253
- ; CHECK3-NEXT: por %xmm1, %xmm0
254
- ; CHECK3-NEXT: retq
255
- ;
256
- ; CHECK0-LABEL: atomic_vec2_bfloat:
257
- ; CHECK0: ## %bb.0:
258
- ; CHECK0-NEXT: movl (%rdi), %eax
259
- ; CHECK0-NEXT: movl %eax, %ecx
260
- ; CHECK0-NEXT: shrl $16, %ecx
261
- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
262
- ; CHECK0-NEXT: movd %eax, %xmm0
263
- ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
264
- ; CHECK0-NEXT: pand %xmm1, %xmm0
265
- ; CHECK0-NEXT: ## implicit-def: $eax
266
- ; CHECK0-NEXT: movw %cx, %ax
267
- ; CHECK0-NEXT: ## implicit-def: $xmm2
268
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
269
- ; CHECK0-NEXT: pslld $16, %xmm2
270
- ; CHECK0-NEXT: pandn %xmm2, %xmm1
271
- ; CHECK0-NEXT: por %xmm1, %xmm0
272
- ; CHECK0-NEXT: retq
217
+ ; CHECK-LABEL: atomic_vec2_bfloat:
218
+ ; CHECK: ## %bb.0:
219
+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220
+ ; CHECK-NEXT: retq
273
221
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
274
222
ret <2 x bfloat> %ret
275
223
}
@@ -447,102 +395,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
447
395
}
448
396
449
397
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
450
- ; CHECK3-LABEL: atomic_vec4_half:
451
- ; CHECK3: ## %bb.0:
452
- ; CHECK3-NEXT: movq (%rdi), %rax
453
- ; CHECK3-NEXT: movl %eax, %ecx
454
- ; CHECK3-NEXT: shrl $16, %ecx
455
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
456
- ; CHECK3-NEXT: movq %rax, %rcx
457
- ; CHECK3-NEXT: shrq $32, %rcx
458
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
459
- ; CHECK3-NEXT: movq %rax, %xmm0
460
- ; CHECK3-NEXT: shrq $48, %rax
461
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
462
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
463
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
464
- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
465
- ; CHECK3-NEXT: retq
466
- ;
467
- ; CHECK0-LABEL: atomic_vec4_half:
468
- ; CHECK0: ## %bb.0:
469
- ; CHECK0-NEXT: movq (%rdi), %rax
470
- ; CHECK0-NEXT: movl %eax, %ecx
471
- ; CHECK0-NEXT: shrl $16, %ecx
472
- ; CHECK0-NEXT: movw %cx, %dx
473
- ; CHECK0-NEXT: ## implicit-def: $ecx
474
- ; CHECK0-NEXT: movw %dx, %cx
475
- ; CHECK0-NEXT: ## implicit-def: $xmm2
476
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
477
- ; CHECK0-NEXT: movq %rax, %rcx
478
- ; CHECK0-NEXT: shrq $32, %rcx
479
- ; CHECK0-NEXT: movw %cx, %dx
480
- ; CHECK0-NEXT: ## implicit-def: $ecx
481
- ; CHECK0-NEXT: movw %dx, %cx
482
- ; CHECK0-NEXT: ## implicit-def: $xmm1
483
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
484
- ; CHECK0-NEXT: movq %rax, %rcx
485
- ; CHECK0-NEXT: shrq $48, %rcx
486
- ; CHECK0-NEXT: movw %cx, %dx
487
- ; CHECK0-NEXT: ## implicit-def: $ecx
488
- ; CHECK0-NEXT: movw %dx, %cx
489
- ; CHECK0-NEXT: ## implicit-def: $xmm3
490
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm3
491
- ; CHECK0-NEXT: movq %rax, %xmm0
492
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
493
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
494
- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
495
- ; CHECK0-NEXT: retq
398
+ ; CHECK-LABEL: atomic_vec4_half:
399
+ ; CHECK: ## %bb.0:
400
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
401
+ ; CHECK-NEXT: retq
496
402
%ret = load atomic <4 x half>, ptr %x acquire, align 8
497
403
ret <4 x half> %ret
498
404
}
499
405
500
406
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
501
- ; CHECK3-LABEL: atomic_vec4_bfloat:
502
- ; CHECK3: ## %bb.0:
503
- ; CHECK3-NEXT: movq (%rdi), %rax
504
- ; CHECK3-NEXT: movq %rax, %xmm0
505
- ; CHECK3-NEXT: movl %eax, %ecx
506
- ; CHECK3-NEXT: shrl $16, %ecx
507
- ; CHECK3-NEXT: movq %rax, %rdx
508
- ; CHECK3-NEXT: shrq $32, %rdx
509
- ; CHECK3-NEXT: shrq $48, %rax
510
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511
- ; CHECK3-NEXT: pinsrw $0, %edx, %xmm2
512
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
513
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
514
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
515
- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
516
- ; CHECK3-NEXT: retq
517
- ;
518
- ; CHECK0-LABEL: atomic_vec4_bfloat:
519
- ; CHECK0: ## %bb.0:
520
- ; CHECK0-NEXT: movq (%rdi), %rax
521
- ; CHECK0-NEXT: movq %rax, %xmm0
522
- ; CHECK0-NEXT: movl %eax, %ecx
523
- ; CHECK0-NEXT: shrl $16, %ecx
524
- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
525
- ; CHECK0-NEXT: movq %rax, %rdx
526
- ; CHECK0-NEXT: shrq $32, %rdx
527
- ; CHECK0-NEXT: ## kill: def $dx killed $dx killed $rdx
528
- ; CHECK0-NEXT: shrq $48, %rax
529
- ; CHECK0-NEXT: movw %ax, %si
530
- ; CHECK0-NEXT: ## implicit-def: $eax
531
- ; CHECK0-NEXT: movw %si, %ax
532
- ; CHECK0-NEXT: ## implicit-def: $xmm2
533
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
534
- ; CHECK0-NEXT: ## implicit-def: $eax
535
- ; CHECK0-NEXT: movw %dx, %ax
536
- ; CHECK0-NEXT: ## implicit-def: $xmm1
537
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
538
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
539
- ; CHECK0-NEXT: ## implicit-def: $eax
540
- ; CHECK0-NEXT: movw %cx, %ax
541
- ; CHECK0-NEXT: ## implicit-def: $xmm2
542
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544
- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545
- ; CHECK0-NEXT: retq
407
+ ; CHECK-LABEL: atomic_vec4_bfloat:
408
+ ; CHECK: ## %bb.0:
409
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
410
+ ; CHECK-NEXT: retq
546
411
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
547
412
ret <4 x bfloat> %ret
548
413
}
0 commit comments