-
Notifications
You must be signed in to change notification settings - Fork 13.5k
error : couldn't allocate output register for constraint 'w' #141022
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
According to the document https://llvm.org/docs/LangRef.html?referer=https%3A%2F%2Fcloud.tencent.com%2Fdeveloper%2Fask%2Fsof%2F114457294#supported-constraint-code-list, llvm support constraint w, i'm fussed about that, is this a compiler issue? |
@llvm/issue-subscribers-backend-x86 Author: None (scoutzeng)
Env as belows:
Clang version: 19.1.1
Visual Studio 17 2022
When i compile an arm asm snippet with clang-cl, compiler output that it can't allocate output register for constraint 'w'. __mmask64 _mm512_test_epi8_mask(__m512i a, __m512i b)
{
uint8x16_t mask_and = vld1q_u8(g_mask_epi8);
__m512i tmp;
tmp.vect_u8[0] = vandq_u8(vtstq_u8(a.vect_u8[0], b.vect_u8[0]), mask_and);
tmp.vect_u8[1] = vandq_u8(vtstq_u8(a.vect_u8[1], b.vect_u8[1]), mask_and);
tmp.vect_u8[2] = vandq_u8(vtstq_u8(a.vect_u8[2], b.vect_u8[2]), mask_and);
tmp.vect_u8[3] = vandq_u8(vtstq_u8(a.vect_u8[3], b.vect_u8[3]), mask_and);
uint8_t r[8];
__asm__ __volatile__ (
"addv %b[r0], %[t0].8b \n\t"
"addv %b[r2], %[t1].8b \n\t"
"addv %b[r4], %[t2].8b \n\t"
"addv %b[r6], %[t3].8b \n\t"
"ins %[t0].d[0], %[t0].d[1] \n\t"
"ins %[t1].d[0], %[t1].d[1] \n\t"
"ins %[t2].d[0], %[t2].d[1] \n\t"
"ins %[t3].d[0], %[t3].d[1] \n\t"
"addv %b[r1], %[t0].8b \n\t"
"addv %b[r3], %[t1].8b \n\t"
"addv %b[r5], %[t2].8b \n\t"
"addv %b[r7], %[t3].8b \n\t"
:[r0]"=w"(r[0]), [r1]"=w"(r[1]), [r2]"=w"(r[2]), [r3]"=w"(r[3]), [r4]"=w"(r[4]), [r5]"=w"(r[5]), [r6]"=w"(r[6]),
[r7]"=w"(r[7]),
[t0]"+w"(tmp.vect_u8[0]), [t1]"+w"(tmp.vect_u8[1]), [t2]"+w"(tmp.vect_u8[2]), [t3]"+w"(tmp.vect_u8[3])
);
uint64x1_t res = vreinterpret_u64_u8(vld1_u8((const uint8_t *)r));
return vget_lane_u64(res, 0);
} |
@llvm/issue-subscribers-backend-aarch64 Author: None (scoutzeng)
Env as belows:
Clang version: 19.1.1
Visual Studio 17 2022
When i compile an arm asm snippet with clang-cl, compiler output that it can't allocate output register for constraint 'w'. __mmask64 _mm512_test_epi8_mask(__m512i a, __m512i b)
{
uint8x16_t mask_and = vld1q_u8(g_mask_epi8);
__m512i tmp;
tmp.vect_u8[0] = vandq_u8(vtstq_u8(a.vect_u8[0], b.vect_u8[0]), mask_and);
tmp.vect_u8[1] = vandq_u8(vtstq_u8(a.vect_u8[1], b.vect_u8[1]), mask_and);
tmp.vect_u8[2] = vandq_u8(vtstq_u8(a.vect_u8[2], b.vect_u8[2]), mask_and);
tmp.vect_u8[3] = vandq_u8(vtstq_u8(a.vect_u8[3], b.vect_u8[3]), mask_and);
uint8_t r[8];
__asm__ __volatile__ (
"addv %b[r0], %[t0].8b \n\t"
"addv %b[r2], %[t1].8b \n\t"
"addv %b[r4], %[t2].8b \n\t"
"addv %b[r6], %[t3].8b \n\t"
"ins %[t0].d[0], %[t0].d[1] \n\t"
"ins %[t1].d[0], %[t1].d[1] \n\t"
"ins %[t2].d[0], %[t2].d[1] \n\t"
"ins %[t3].d[0], %[t3].d[1] \n\t"
"addv %b[r1], %[t0].8b \n\t"
"addv %b[r3], %[t1].8b \n\t"
"addv %b[r5], %[t2].8b \n\t"
"addv %b[r7], %[t3].8b \n\t"
:[r0]"=w"(r[0]), [r1]"=w"(r[1]), [r2]"=w"(r[2]), [r3]"=w"(r[3]), [r4]"=w"(r[4]), [r5]"=w"(r[5]), [r6]"=w"(r[6]),
[r7]"=w"(r[7]),
[t0]"+w"(tmp.vect_u8[0]), [t1]"+w"(tmp.vect_u8[1]), [t2]"+w"(tmp.vect_u8[2]), [t3]"+w"(tmp.vect_u8[3])
);
uint64x1_t res = vreinterpret_u64_u8(vld1_u8((const uint8_t *)r));
return vget_lane_u64(res, 0);
} |
It looks like the compiler is specifically unhappy that the type of the output value is uint8_t, as opposed to something wider (uint32_t etc.). I guess it's a bug, since gcc is happy with it... As a workaround, you can change the output value to a wider integer. Reduced testcase:
|
Out of interest - why do you need inline assembly, and can you just use addp to accumulate the bits? Something like
|
Env as belows:
Clang version: 19.1.1
Visual Studio 17 2022
When i compile an arm asm snippet with clang-cl, compiler output that it can't allocate output register for constraint 'w'.
The text was updated successfully, but these errors were encountered: