-
Notifications
You must be signed in to change notification settings - Fork 13.4k
release/19.x: [SystemZ] Fix codegen for _[u]128 intrinsics #111376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-systemz Author: None (llvmbot) ChangesBackport baf9b7d Requested by: @uweigand Full diff: https://github.com/llvm/llvm-project/pull/111376.diff 2 Files Affected:
diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
index 1f51e32c0d136d..609c7cf0b7a6f9 100644
--- a/clang/lib/Headers/vecintrin.h
+++ b/clang/lib/Headers/vecintrin.h
@@ -8359,7 +8359,9 @@ vec_min(__vector double __a, __vector double __b) {
static inline __ATTRS_ai __vector unsigned char
vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return (__vector unsigned char)((__int128)__a + (__int128)__b);
+ return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
+ ((__int128)__a + (__int128)__b);
}
/*-- vec_addc ---------------------------------------------------------------*/
@@ -8389,6 +8391,7 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_ai __vector unsigned char
vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) {
return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
__builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b);
}
@@ -8398,6 +8401,7 @@ static inline __ATTRS_ai __vector unsigned char
vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
__builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
@@ -8408,6 +8412,7 @@ static inline __ATTRS_ai __vector unsigned char
vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
__builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
@@ -8483,7 +8488,9 @@ vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai __vector unsigned char
vec_gfmsum_128(__vector unsigned long long __a,
__vector unsigned long long __b) {
- return (__vector unsigned char)__builtin_s390_vgfmg(__a, __b);
+ return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
+ __builtin_s390_vgfmg(__a, __b);
}
/*-- vec_gfmsum_accum -------------------------------------------------------*/
@@ -8513,6 +8520,7 @@ vec_gfmsum_accum_128(__vector unsigned long long __a,
__vector unsigned long long __b,
__vector unsigned char __c) {
return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
__builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c);
}
@@ -8810,6 +8818,7 @@ vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
#define vec_msum_u128(X, Y, Z, W) \
((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \
+ (unsigned __int128 __attribute__((__vector_size__(16)))) \
__builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W)))
#endif
@@ -8817,7 +8826,9 @@ vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
static inline __ATTRS_ai __vector unsigned char
vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return (__vector unsigned char)((__int128)__a - (__int128)__b);
+ return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
+ ((__int128)__a - (__int128)__b);
}
/*-- vec_subc ---------------------------------------------------------------*/
@@ -8847,6 +8858,7 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_ai __vector unsigned char
vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) {
return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
__builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b);
}
@@ -8856,6 +8868,7 @@ static inline __ATTRS_ai __vector unsigned char
vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
__builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
@@ -8866,6 +8879,7 @@ static inline __ATTRS_ai __vector unsigned char
vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
__builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
@@ -8886,12 +8900,16 @@ vec_sum2(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai __vector unsigned char
vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) {
- return (__vector unsigned char)__builtin_s390_vsumqf(__a, __b);
+ return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
+ __builtin_s390_vsumqf(__a, __b);
}
static inline __ATTRS_o_ai __vector unsigned char
vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) {
- return (__vector unsigned char)__builtin_s390_vsumqg(__a, __b);
+ return (__vector unsigned char)
+ (unsigned __int128 __attribute__((__vector_size__(16))))
+ __builtin_s390_vsumqg(__a, __b);
}
/*-- vec_sum4 ---------------------------------------------------------------*/
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c
new file mode 100644
index 00000000000000..896cef515743c6
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c
@@ -0,0 +1,165 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu z14 -triple s390x-linux-gnu \
+// RUN: -O2 -fzvector -flax-vector-conversions=none \
+// RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
+
+#include <vecintrin.h>
+
+volatile vector unsigned char vuc;
+volatile vector unsigned short vus;
+volatile vector unsigned int vui;
+volatile vector unsigned long long vul;
+
+// CHECK-LABEL: define dso_local void @test(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
+// CHECK-NEXT: [[ADD_I:%.*]] = add nsw i128 [[TMP3]], [[TMP2]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[ADD_I]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP5]] to i128
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to i128
+// CHECK-NEXT: [[TMP9:%.*]] = tail call i128 @llvm.s390.vaccq(i128 [[TMP7]], i128 [[TMP8]])
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP11]] to i128
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP12]] to i128
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP13]] to i128
+// CHECK-NEXT: [[TMP17:%.*]] = tail call i128 @llvm.s390.vacq(i128 [[TMP14]], i128 [[TMP15]], i128 [[TMP16]])
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP19]] to i128
+// CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i8> [[TMP20]] to i128
+// CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP21]] to i128
+// CHECK-NEXT: [[TMP25:%.*]] = tail call i128 @llvm.s390.vacccq(i128 [[TMP22]], i128 [[TMP23]], i128 [[TMP24]])
+// CHECK-NEXT: [[TMP26:%.*]] = bitcast i128 [[TMP25]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i8> [[TMP27]] to i128
+// CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[TMP28]] to i128
+// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i128 [[TMP29]], [[TMP30]]
+// CHECK-NEXT: [[TMP31:%.*]] = bitcast i128 [[SUB_I]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP34:%.*]] = bitcast <16 x i8> [[TMP32]] to i128
+// CHECK-NEXT: [[TMP35:%.*]] = bitcast <16 x i8> [[TMP33]] to i128
+// CHECK-NEXT: [[TMP36:%.*]] = tail call i128 @llvm.s390.vscbiq(i128 [[TMP34]], i128 [[TMP35]])
+// CHECK-NEXT: [[TMP37:%.*]] = bitcast i128 [[TMP36]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i8> [[TMP38]] to i128
+// CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i8> [[TMP39]] to i128
+// CHECK-NEXT: [[TMP43:%.*]] = bitcast <16 x i8> [[TMP40]] to i128
+// CHECK-NEXT: [[TMP44:%.*]] = tail call i128 @llvm.s390.vsbiq(i128 [[TMP41]], i128 [[TMP42]], i128 [[TMP43]])
+// CHECK-NEXT: [[TMP45:%.*]] = bitcast i128 [[TMP44]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP49:%.*]] = bitcast <16 x i8> [[TMP46]] to i128
+// CHECK-NEXT: [[TMP50:%.*]] = bitcast <16 x i8> [[TMP47]] to i128
+// CHECK-NEXT: [[TMP51:%.*]] = bitcast <16 x i8> [[TMP48]] to i128
+// CHECK-NEXT: [[TMP52:%.*]] = tail call i128 @llvm.s390.vsbcbiq(i128 [[TMP49]], i128 [[TMP50]], i128 [[TMP51]])
+// CHECK-NEXT: [[TMP53:%.*]] = bitcast i128 [[TMP52]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP56:%.*]] = tail call i128 @llvm.s390.vsumqf(<4 x i32> [[TMP54]], <4 x i32> [[TMP55]])
+// CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP60:%.*]] = tail call i128 @llvm.s390.vsumqg(<2 x i64> [[TMP58]], <2 x i64> [[TMP59]])
+// CHECK-NEXT: [[TMP61:%.*]] = bitcast i128 [[TMP60]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP64:%.*]] = tail call i128 @llvm.s390.vgfmg(<2 x i64> [[TMP62]], <2 x i64> [[TMP63]])
+// CHECK-NEXT: [[TMP65:%.*]] = bitcast i128 [[TMP64]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP69:%.*]] = bitcast <16 x i8> [[TMP68]] to i128
+// CHECK-NEXT: [[TMP70:%.*]] = tail call i128 @llvm.s390.vgfmag(<2 x i64> [[TMP66]], <2 x i64> [[TMP67]], i128 [[TMP69]])
+// CHECK-NEXT: [[TMP71:%.*]] = bitcast i128 [[TMP70]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP75:%.*]] = bitcast <16 x i8> [[TMP74]] to i128
+// CHECK-NEXT: [[TMP76:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP72]], <2 x i64> [[TMP73]], i128 [[TMP75]], i32 0)
+// CHECK-NEXT: [[TMP77:%.*]] = bitcast i128 [[TMP76]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP81:%.*]] = bitcast <16 x i8> [[TMP80]] to i128
+// CHECK-NEXT: [[TMP82:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP78]], <2 x i64> [[TMP79]], i128 [[TMP81]], i32 4)
+// CHECK-NEXT: [[TMP83:%.*]] = bitcast i128 [[TMP82]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP87:%.*]] = bitcast <16 x i8> [[TMP86]] to i128
+// CHECK-NEXT: [[TMP88:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP84]], <2 x i64> [[TMP85]], i128 [[TMP87]], i32 8)
+// CHECK-NEXT: [[TMP89:%.*]] = bitcast i128 [[TMP88]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP93:%.*]] = bitcast <16 x i8> [[TMP92]] to i128
+// CHECK-NEXT: [[TMP94:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP90]], <2 x i64> [[TMP91]], i128 [[TMP93]], i32 12)
+// CHECK-NEXT: [[TMP95:%.*]] = bitcast i128 [[TMP94]] to <16 x i8>
+// CHECK-NEXT: store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: [[TMP98:%.*]] = tail call <2 x i64> @llvm.s390.vbperm(<16 x i8> [[TMP96]], <16 x i8> [[TMP97]])
+// CHECK-NEXT: store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT: ret void
+//
+void test(void) {
+ vuc = vec_add_u128(vuc, vuc);
+ vuc = vec_addc_u128(vuc, vuc);
+ vuc = vec_adde_u128(vuc, vuc, vuc);
+ vuc = vec_addec_u128(vuc, vuc, vuc);
+
+ vuc = vec_sub_u128(vuc, vuc);
+ vuc = vec_subc_u128(vuc, vuc);
+ vuc = vec_sube_u128(vuc, vuc, vuc);
+ vuc = vec_subec_u128(vuc, vuc, vuc);
+
+ vuc = vec_sum_u128(vui, vui);
+ vuc = vec_sum_u128(vul, vul);
+
+ vuc = vec_gfmsum_128(vul, vul);
+ vuc = vec_gfmsum_accum_128(vul, vul, vuc);
+
+ vuc = vec_msum_u128(vul, vul, vuc, 0);
+ vuc = vec_msum_u128(vul, vul, vuc, 4);
+ vuc = vec_msum_u128(vul, vul, vuc, 8);
+ vuc = vec_msum_u128(vul, vul, vuc, 12);
+
+ vul = vec_bperm_u128(vuc, vuc);
+}
+//.
+// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+//.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be a candidate for backport, as the problem is a miscompilation regression and the fix is straightforward.
PR llvm#74625 introduced a regression in the code generated for the following set of intrinsic: vec_add_u128, vec_addc_u128, vec_adde_u128, vec_addec_u128 vec_sub_u128, vec_subc_u128, vec_sube_u128, vec_subec_u128 vec_sum_u128, vec_msum_u128 vec_gfmsum_128, vec_gfmsum_accum_128 This is because the new code incorrectly assumed that a cast from "unsigned __int128" to "vector unsigned char" would simply be a bitcast re-interpretation; instead, this cast actually truncates the __int128 to char and splats the result. Fixed by adding an intermediate cast via a single-element 128-bit integer vector. Fixes: llvm#109113 (cherry picked from commit baf9b7d)
@uweigand (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Fix a regression introduced with the LLVM 18 release, which caused incorrect code to be generated by the following SystemZ vector intrinsics: vec_add_u128, vec_addc_u128, vec_adde_u128, vec_addec_u128, vec_sub_u128, vec_subc_u128, vec_sube_u128, vec_subec_u128, vec_sum_u128, vec_msum_u128, vec_gfmsum_128, vec_gfmsum_accum_128. |
Backport baf9b7d
Requested by: @uweigand