Skip to content

Commit 46d218d

Browse files
authored
[clang][bytecode] Implement __builtin_{memchr,strchr,char_memchr} (llvm#130420)
llvm has recently started to use `__builitn_memchr` at compile time, so implement this. Still needs some work but the basics are done.
1 parent b01c71b commit 46d218d

File tree

2 files changed

+224
-1
lines changed

2 files changed

+224
-1
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1960,13 +1960,103 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
19601960

19611961
// However, if we read all the available bytes but were instructed to read
19621962
// even more, diagnose this as a "read of dereferenced one-past-the-end
1963-
// pointer". This is what would happen if we called CheckRead() on every array
1963+
// pointer". This is what would happen if we called CheckLoad() on every array
19641964
// element.
19651965
S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_past_end)
19661966
<< AK_Read << S.Current->getRange(OpPC);
19671967
return false;
19681968
}
19691969

1970+
static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
1971+
const InterpFrame *Frame,
1972+
const Function *Func, const CallExpr *Call) {
1973+
unsigned ID = Func->getBuiltinID();
1974+
if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
1975+
ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
1976+
diagnoseNonConstexprBuiltin(S, OpPC, ID);
1977+
1978+
const Pointer &Ptr = getParam<Pointer>(Frame, 0);
1979+
APSInt Desired;
1980+
std::optional<APSInt> MaxLength;
1981+
if (Call->getNumArgs() == 3) {
1982+
MaxLength =
1983+
peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)), 0);
1984+
Desired = peekToAPSInt(
1985+
S.Stk, *S.getContext().classify(Call->getArg(1)),
1986+
align(primSize(*S.getContext().classify(Call->getArg(2)))) +
1987+
align(primSize(*S.getContext().classify(Call->getArg(1)))));
1988+
} else {
1989+
Desired = peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(1)));
1990+
}
1991+
1992+
if (MaxLength && MaxLength->isZero()) {
1993+
S.Stk.push<Pointer>();
1994+
return true;
1995+
}
1996+
1997+
if (Ptr.isDummy())
1998+
return false;
1999+
2000+
// Null is only okay if the given size is 0.
2001+
if (Ptr.isZero()) {
2002+
S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_null)
2003+
<< AK_Read;
2004+
return false;
2005+
}
2006+
2007+
QualType ElemTy = Ptr.getFieldDesc()->isArray()
2008+
? Ptr.getFieldDesc()->getElemQualType()
2009+
: Ptr.getFieldDesc()->getType();
2010+
bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
2011+
2012+
// Give up on byte-oriented matching against multibyte elements.
2013+
if (IsRawByte && !isOneByteCharacterType(ElemTy)) {
2014+
S.FFDiag(S.Current->getSource(OpPC),
2015+
diag::note_constexpr_memchr_unsupported)
2016+
<< S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
2017+
return false;
2018+
}
2019+
2020+
if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
2021+
// strchr compares directly to the passed integer, and therefore
2022+
// always fails if given an int that is not a char.
2023+
if (Desired !=
2024+
Desired.trunc(S.getASTContext().getCharWidth()).getSExtValue()) {
2025+
S.Stk.push<Pointer>();
2026+
return true;
2027+
}
2028+
}
2029+
2030+
uint64_t DesiredVal =
2031+
Desired.trunc(S.getASTContext().getCharWidth()).getZExtValue();
2032+
bool StopAtZero =
2033+
(ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr);
2034+
2035+
size_t Index = Ptr.getIndex();
2036+
for (;;) {
2037+
const Pointer &ElemPtr = Index > 0 ? Ptr.atIndex(Index) : Ptr;
2038+
2039+
if (!CheckLoad(S, OpPC, ElemPtr))
2040+
return false;
2041+
2042+
unsigned char V = static_cast<unsigned char>(ElemPtr.deref<char>());
2043+
if (V == DesiredVal) {
2044+
S.Stk.push<Pointer>(ElemPtr);
2045+
return true;
2046+
}
2047+
2048+
if (StopAtZero && V == 0)
2049+
break;
2050+
2051+
++Index;
2052+
if (MaxLength && Index == MaxLength->getZExtValue())
2053+
break;
2054+
}
2055+
2056+
S.Stk.push<Pointer>();
2057+
return true;
2058+
}
2059+
19702060
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
19712061
const CallExpr *Call, uint32_t BuiltinID) {
19722062
const InterpFrame *Frame = S.Current;
@@ -2445,6 +2535,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
24452535
return false;
24462536
break;
24472537

2538+
case Builtin::BImemchr:
2539+
case Builtin::BI__builtin_memchr:
2540+
case Builtin::BIstrchr:
2541+
case Builtin::BI__builtin_strchr:
2542+
#if 0
2543+
case Builtin::BIwcschr:
2544+
case Builtin::BI__builtin_wcschr:
2545+
case Builtin::BImemchr:
2546+
case Builtin::BI__builtin_wmemchr:
2547+
#endif
2548+
case Builtin::BI__builtin_char_memchr:
2549+
if (!interp__builtin_memchr(S, OpPC, Frame, F, Call))
2550+
return false;
2551+
break;
2552+
24482553
default:
24492554
S.FFDiag(S.Current->getLocation(OpPC),
24502555
diag::note_invalid_subexpr_in_const_expr)

clang/test/AST/ByteCode/builtin-functions.cpp

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
extern "C" {
1919
typedef decltype(sizeof(int)) size_t;
2020
extern size_t wcslen(const wchar_t *p);
21+
extern void *memchr(const void *s, int c, size_t n);
22+
extern char *strchr(const char *s, int c);
2123
}
2224

2325
namespace strcmp {
@@ -1351,3 +1353,119 @@ namespace Memcmp {
13511353
static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 6) == -1);
13521354
static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 5) == 0);
13531355
}
1356+
1357+
namespace Memchr {
1358+
constexpr const char *kStr = "abca\xff\0d";
1359+
constexpr char kFoo[] = {'f', 'o', 'o'};
1360+
1361+
static_assert(__builtin_memchr(kStr, 'a', 0) == nullptr);
1362+
static_assert(__builtin_memchr(kStr, 'a', 1) == kStr);
1363+
static_assert(__builtin_memchr(kStr, '\0', 5) == nullptr);
1364+
static_assert(__builtin_memchr(kStr, '\0', 6) == kStr + 5);
1365+
static_assert(__builtin_memchr(kStr, '\xff', 8) == kStr + 4);
1366+
static_assert(__builtin_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
1367+
static_assert(__builtin_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
1368+
static_assert(__builtin_memchr(kFoo, 'x', 3) == nullptr);
1369+
static_assert(__builtin_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
1370+
// both-note {{dereferenced one-past-the-end}}
1371+
static_assert(__builtin_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
1372+
// both-note {{dereferenced null}}
1373+
static_assert(__builtin_memchr(nullptr, 'x', 0) == nullptr);
1374+
1375+
1376+
#if defined(CHAR8_T)
1377+
constexpr const char8_t *kU8Str = u8"abca\xff\0d";
1378+
constexpr char8_t kU8Foo[] = {u8'f', u8'o', u8'o'};
1379+
static_assert(__builtin_memchr(kU8Str, u8'a', 0) == nullptr);
1380+
static_assert(__builtin_memchr(kU8Str, u8'a', 1) == kU8Str);
1381+
static_assert(__builtin_memchr(kU8Str, u8'\0', 5) == nullptr);
1382+
static_assert(__builtin_memchr(kU8Str, u8'\0', 6) == kU8Str + 5);
1383+
static_assert(__builtin_memchr(kU8Str, u8'\xff', 8) == kU8Str + 4);
1384+
static_assert(__builtin_memchr(kU8Str, u8'\xff' + 256, 8) == kU8Str + 4);
1385+
static_assert(__builtin_memchr(kU8Str, u8'\xff' - 256, 8) == kU8Str + 4);
1386+
static_assert(__builtin_memchr(kU8Foo, u8'x', 3) == nullptr);
1387+
static_assert(__builtin_memchr(kU8Foo, u8'x', 4) == nullptr); // both-error {{not an integral constant}} \
1388+
// both-note {{dereferenced one-past-the-end}}
1389+
static_assert(__builtin_memchr(nullptr, u8'x', 3) == nullptr); // both-error {{not an integral constant}} \
1390+
// both-note {{dereferenced null}}
1391+
static_assert(__builtin_memchr(nullptr, u8'x', 0) == nullptr);
1392+
#endif
1393+
1394+
extern struct Incomplete incomplete;
1395+
static_assert(__builtin_memchr(&incomplete, 0, 0u) == nullptr);
1396+
static_assert(__builtin_memchr(&incomplete, 0, 1u) == nullptr); // both-error {{not an integral constant}} \
1397+
// ref-note {{read of incomplete type 'struct Incomplete'}}
1398+
1399+
const unsigned char &u1 = 0xf0;
1400+
auto &&i1 = (const signed char []){-128};
1401+
static_assert(__builtin_memchr(&u1, -(0x0f + 1), 1) == &u1);
1402+
static_assert(__builtin_memchr(i1, 0x80, 1) == i1);
1403+
1404+
enum class E : unsigned char {};
1405+
struct EPair { E e, f; };
1406+
constexpr EPair ee{E{240}};
1407+
static_assert(__builtin_memchr(&ee.e, 240, 1) == &ee.e); // both-error {{constant}} \
1408+
// both-note {{not supported}}
1409+
1410+
constexpr bool kBool[] = {false, true, false};
1411+
constexpr const bool *const kBoolPastTheEndPtr = kBool + 3;
1412+
static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, 1, 99) == kBool + 1); // both-error {{constant}} \
1413+
// both-note {{not supported}}
1414+
static_assert(sizeof(bool) != 1u || __builtin_memchr(kBool + 1, 0, 99) == kBoolPastTheEndPtr - 1); // both-error {{constant}} \
1415+
// both-note {{not supported}}
1416+
static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, -1, 3) == nullptr); // both-error {{constant}} \
1417+
// both-note {{not supported}}
1418+
static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr, 0, 1) == nullptr); // both-error {{constant}} \
1419+
// both-note {{not supported}}
1420+
1421+
static_assert(__builtin_char_memchr(kStr, 'a', 0) == nullptr);
1422+
static_assert(__builtin_char_memchr(kStr, 'a', 1) == kStr);
1423+
static_assert(__builtin_char_memchr(kStr, '\0', 5) == nullptr);
1424+
static_assert(__builtin_char_memchr(kStr, '\0', 6) == kStr + 5);
1425+
static_assert(__builtin_char_memchr(kStr, '\xff', 8) == kStr + 4);
1426+
static_assert(__builtin_char_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
1427+
static_assert(__builtin_char_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
1428+
static_assert(__builtin_char_memchr(kFoo, 'x', 3) == nullptr);
1429+
static_assert(__builtin_char_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
1430+
// both-note {{dereferenced one-past-the-end}}
1431+
static_assert(__builtin_char_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
1432+
// both-note {{dereferenced null}}
1433+
static_assert(__builtin_char_memchr(nullptr, 'x', 0) == nullptr);
1434+
1435+
static_assert(*__builtin_char_memchr(kStr, '\xff', 8) == '\xff');
1436+
constexpr bool char_memchr_mutable() {
1437+
char buffer[] = "mutable";
1438+
*__builtin_char_memchr(buffer, 't', 8) = 'r';
1439+
*__builtin_char_memchr(buffer, 'm', 8) = 'd';
1440+
return __builtin_strcmp(buffer, "durable") == 0;
1441+
}
1442+
static_assert(char_memchr_mutable());
1443+
1444+
constexpr bool b = !memchr("hello", 'h', 3); // both-error {{constant expression}} \
1445+
// both-note {{non-constexpr function 'memchr' cannot be used in a constant expression}}
1446+
1447+
}
1448+
1449+
namespace Strchr {
1450+
constexpr const char *kStr = "abca\xff\0d";
1451+
constexpr char kFoo[] = {'f', 'o', 'o'};
1452+
static_assert(__builtin_strchr(kStr, 'a') == kStr);
1453+
static_assert(__builtin_strchr(kStr, 'b') == kStr + 1);
1454+
static_assert(__builtin_strchr(kStr, 'c') == kStr + 2);
1455+
static_assert(__builtin_strchr(kStr, 'd') == nullptr);
1456+
static_assert(__builtin_strchr(kStr, 'e') == nullptr);
1457+
static_assert(__builtin_strchr(kStr, '\0') == kStr + 5);
1458+
static_assert(__builtin_strchr(kStr, 'a' + 256) == nullptr);
1459+
static_assert(__builtin_strchr(kStr, 'a' - 256) == nullptr);
1460+
static_assert(__builtin_strchr(kStr, '\xff') == kStr + 4);
1461+
static_assert(__builtin_strchr(kStr, '\xff' + 256) == nullptr);
1462+
static_assert(__builtin_strchr(kStr, '\xff' - 256) == nullptr);
1463+
static_assert(__builtin_strchr(kFoo, 'o') == kFoo + 1);
1464+
static_assert(__builtin_strchr(kFoo, 'x') == nullptr); // both-error {{not an integral constant}} \
1465+
// both-note {{dereferenced one-past-the-end}}
1466+
static_assert(__builtin_strchr(nullptr, 'x') == nullptr); // both-error {{not an integral constant}} \
1467+
// both-note {{dereferenced null}}
1468+
1469+
constexpr bool a = !strchr("hello", 'h'); // both-error {{constant expression}} \
1470+
// both-note {{non-constexpr function 'strchr' cannot be used in a constant expression}}
1471+
}

0 commit comments

Comments
 (0)