Skip to content

Commit 8d55836

Browse files
[SYCL] Avoid infinite loop when kernel fails to compile with memory error (#18888)
The runtime tries to build a kernel again if compilation fails. But if UR returns a memory error the attempt counter was not compared against the maximum number of attempts, so the compiler was continuously called and eventually the loop counter would have overflowed.
1 parent a95c342 commit 8d55836

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

sycl/source/detail/kernel_program_cache.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,9 @@ class KernelProgramCache {
818818
BuildResult->Error.Code == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
819819
reset();
820820
BuildResult->updateAndNotify(BuildState::BS_Initial);
821-
continue;
821+
if (AttemptCounter + 1 < MaxAttempts) {
822+
continue;
823+
}
822824
}
823825

824826
BuildResult->updateAndNotify(BuildState::BS_Failed);

sycl/unittests/kernel-and-program/OutOfResources.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,31 @@ TEST_P(OutOfResourcesTestSuite, urProgramCreate) {
109109
}
110110
}
111111

112+
static ur_result_t ProgramCreateWithILAlwaysFail(void *) { return ErrorCode; }
113+
114+
TEST_P(OutOfResourcesTestSuite, urProgramCreateAlwaysFail) {
115+
sycl::unittest::UrMock<> Mock;
116+
ErrorCode = GetParam();
117+
mock::getCallbacks().set_before_callback("urProgramCreateWithIL",
118+
&ProgramCreateWithILAlwaysFail);
119+
120+
sycl::platform Plt{sycl::platform()};
121+
sycl::context Ctx{Plt};
122+
auto CtxImpl = detail::getSyclObjImpl(Ctx);
123+
queue Q(Ctx, default_selector_v);
124+
125+
bool ThrewException = false;
126+
127+
try {
128+
Q.single_task<class OutOfResourcesKernel1>([] {});
129+
} catch (exception &Ex) {
130+
auto Code = detail::get_ur_error(Ex);
131+
EXPECT_EQ(Code, ErrorCode);
132+
ThrewException = true;
133+
}
134+
EXPECT_TRUE(ThrewException);
135+
}
136+
112137
static int nProgramLink = 0;
113138

114139
static ur_result_t redefinedProgramLink(void *) {

0 commit comments

Comments
 (0)