-
Notifications
You must be signed in to change notification settings - Fork 5k
Interpreter GC info stage 3: Report locals to the GC #114469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9fbbaaa
dae3ff6
10dcabb
1757342
0a41ac0
feda13a
7cd0589
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
#endif // debug_instrumented_return | ||
|
||
#include "interpreter.h" | ||
#include "stackmap.h" | ||
|
||
#include <inttypes.h> | ||
|
||
|
@@ -832,7 +833,10 @@ void InterpCompiler::EmitCode() | |
for (InterpInst *ins = bb->pFirstIns; ins != NULL; ins = ins->pNext) | ||
{ | ||
if (InterpOpIsEmitNop(ins->opcode)) | ||
{ | ||
ins->nativeOffset = (int32_t)(ip - m_pMethodCode); | ||
continue; | ||
} | ||
|
||
ip = EmitCodeIns(ip, ins, &relocs); | ||
} | ||
|
@@ -862,25 +866,158 @@ void InterpCompiler::EmitCode() | |
m_compHnd->setBoundaries(m_methodInfo->ftn, m_ILToNativeMapSize, m_pILToNativeMap); | ||
} | ||
|
||
#ifdef FEATURE_INTERPRETER | ||
class InterpGcSlotAllocator | ||
{ | ||
InterpCompiler *m_compiler; | ||
InterpreterGcInfoEncoder *m_encoder; | ||
// [pObjects, pByrefs] | ||
GcSlotId *m_slotTables[2]; | ||
unsigned m_slotTableSize; | ||
|
||
#ifdef DEBUG | ||
bool m_verbose; | ||
#endif | ||
|
||
GcSlotId* LocateGcSlotTableEntry(uint32_t offsetBytes, GcSlotFlags flags) | ||
{ | ||
GcSlotId *slotTable = m_slotTables[(flags & GC_SLOT_INTERIOR) == GC_SLOT_INTERIOR]; | ||
uint32_t slotIndex = offsetBytes / sizeof(void *); | ||
assert(slotIndex < m_slotTableSize); | ||
return &slotTable[slotIndex]; | ||
} | ||
|
||
public: | ||
InterpGcSlotAllocator(InterpCompiler *compiler, InterpreterGcInfoEncoder *encoder) | ||
: m_compiler(compiler) | ||
, m_encoder(encoder) | ||
, m_slotTableSize(compiler->m_totalVarsStackSize / sizeof(void *)) | ||
#ifdef DEBUG | ||
, m_verbose(compiler->m_verbose) | ||
#endif | ||
{ | ||
for (int i = 0; i < 2; i++) | ||
{ | ||
m_slotTables[i] = new (compiler) GcSlotId[m_slotTableSize]; | ||
// 0 is a valid slot id so default-initialize all the slots to 0xFFFFFFFF | ||
memset(m_slotTables[i], 0xFF, sizeof(GcSlotId) * m_slotTableSize); | ||
} | ||
} | ||
|
||
void AllocateOrReuseGcSlot(uint32_t offsetBytes, GcSlotFlags flags) | ||
{ | ||
GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags); | ||
bool allocateNewSlot = *pSlot == ((GcSlotId)-1); | ||
|
||
if (allocateNewSlot) | ||
{ | ||
// Important to pass GC_FRAMEREG_REL, the default is broken due to GET_CALLER_SP being unimplemented | ||
*pSlot = m_encoder->GetStackSlotId(offsetBytes, flags, GC_FRAMEREG_REL); | ||
} | ||
else | ||
{ | ||
assert((flags & GC_SLOT_UNTRACKED) == 0); | ||
} | ||
|
||
INTERP_DUMP( | ||
"%s %s%sgcslot %u at %u\n", | ||
allocateNewSlot ? "Allocated" : "Reused", | ||
(flags & GC_SLOT_UNTRACKED) ? "global " : "", | ||
(flags & GC_SLOT_INTERIOR) ? "interior " : "", | ||
*pSlot, | ||
offsetBytes | ||
); | ||
} | ||
|
||
void ReportLiveRange(uint32_t offsetBytes, GcSlotFlags flags, int varIndex) | ||
{ | ||
GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags); | ||
assert(varIndex < m_compiler->m_varsSize); | ||
|
||
InterpVar *pVar = &m_compiler->m_pVars[varIndex]; | ||
if (pVar->global) | ||
kg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return; | ||
|
||
GcSlotId slot = *pSlot; | ||
assert(slot != ((GcSlotId)-1)); | ||
assert(pVar->liveStart); | ||
assert(pVar->liveEnd); | ||
uint32_t startOffset = m_compiler->ConvertOffset(m_compiler->GetLiveStartOffset(varIndex)), | ||
endOffset = m_compiler->ConvertOffset(m_compiler->GetLiveEndOffset(varIndex)); | ||
INTERP_DUMP( | ||
"Slot %u (%s var #%d offset %u) live [IR_%04x - IR_%04x] [%u - %u]\n", | ||
slot, pVar->global ? "global" : "local", | ||
varIndex, pVar->offset, | ||
m_compiler->GetLiveStartOffset(varIndex), m_compiler->GetLiveEndOffset(varIndex), | ||
startOffset, endOffset | ||
); | ||
m_encoder->SetSlotState(startOffset, slot, GC_SLOT_LIVE); | ||
m_encoder->SetSlotState(endOffset, slot, GC_SLOT_DEAD); | ||
} | ||
}; | ||
#endif | ||
|
||
void InterpCompiler::BuildGCInfo(InterpMethod *pInterpMethod) | ||
{ | ||
#ifdef FEATURE_INTERPRETER | ||
InterpIAllocator* pAllocator = new (this) InterpIAllocator(this); | ||
InterpreterGcInfoEncoder* gcInfoEncoder = new (this) InterpreterGcInfoEncoder(m_compHnd, m_methodInfo, pAllocator, Interp_NOMEM); | ||
assert(gcInfoEncoder); | ||
InterpGcSlotAllocator slotAllocator (this, gcInfoEncoder); | ||
|
||
gcInfoEncoder->SetCodeLength(ConvertOffset(m_methodCodeSize)); | ||
|
||
gcInfoEncoder->SetCodeLength(m_methodCodeSize); | ||
INTERP_DUMP("Allocating gcinfo slots for %u vars\n", m_varsSize); | ||
|
||
for (int pass = 0; pass < 2; pass++) | ||
{ | ||
for (int i = 0; i < m_varsSize; i++) | ||
{ | ||
InterpVar *pVar = &m_pVars[i]; | ||
GcSlotFlags flags = pVar->global | ||
? (GcSlotFlags)GC_SLOT_UNTRACKED | ||
: (GcSlotFlags)0; | ||
|
||
// TODO: Request slot IDs for all our locals before finalizing | ||
switch (pVar->interpType) { | ||
case InterpTypeO: | ||
break; | ||
case InterpTypeByRef: | ||
flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); | ||
break; | ||
case InterpTypeVT: | ||
{ | ||
InterpreterStackMap *stackMap = GetInterpreterStackMap(m_compHnd, pVar->clsHnd); | ||
for (unsigned j = 0; j < stackMap->m_slotCount; j++) | ||
{ | ||
InterpreterStackMapSlot slotInfo = stackMap->m_slots[j]; | ||
unsigned fieldOffset = pVar->offset + slotInfo.m_offsetBytes; | ||
GcSlotFlags fieldFlags = (GcSlotFlags)(flags | slotInfo.m_gcSlotFlags); | ||
if (pass == 0) | ||
slotAllocator.AllocateOrReuseGcSlot(fieldOffset, fieldFlags); | ||
else | ||
slotAllocator.ReportLiveRange(fieldOffset, fieldFlags, i); | ||
} | ||
|
||
gcInfoEncoder->FinalizeSlotIds(); | ||
// Don't perform the regular allocateGcSlot call | ||
continue; | ||
} | ||
default: | ||
// Neither an object, interior pointer, or vt, so no slot needed | ||
continue; | ||
} | ||
|
||
// TODO: Use finalized slot IDs to declare live ranges | ||
if (pass == 0) | ||
slotAllocator.AllocateOrReuseGcSlot(pVar->offset, flags); | ||
else | ||
slotAllocator.ReportLiveRange(pVar->offset, flags, i); | ||
} | ||
|
||
gcInfoEncoder->Build(); | ||
if (pass == 0) | ||
gcInfoEncoder->FinalizeSlotIds(); | ||
else | ||
gcInfoEncoder->Build(); | ||
} | ||
|
||
// GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) | ||
// let's save the values anyway for debugging purposes | ||
gcInfoEncoder->Emit(); | ||
#endif | ||
} | ||
|
@@ -908,6 +1045,8 @@ int32_t* InterpCompiler::GetCode(int32_t *pCodeSize) | |
|
||
InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, | ||
CORINFO_METHOD_INFO* methodInfo) | ||
: m_pInitLocalsIns(nullptr) | ||
, m_globalVarsStackTop(0) | ||
{ | ||
// Fill in the thread-local used for assertions | ||
t_InterpJitInfoTls = compHnd; | ||
|
@@ -948,6 +1087,7 @@ InterpMethod* InterpCompiler::CompileMethod() | |
#endif | ||
|
||
AllocOffsets(); | ||
PatchInitLocals(m_methodInfo); | ||
|
||
EmitCode(); | ||
|
||
|
@@ -965,6 +1105,34 @@ InterpMethod* InterpCompiler::CompileMethod() | |
return CreateInterpMethod(); | ||
} | ||
|
||
void InterpCompiler::PatchInitLocals(CORINFO_METHOD_INFO* methodInfo) | ||
{ | ||
// We may have global vars containing managed pointers or interior pointers, so we need | ||
// to zero the region of the stack containing global vars, not just IL locals. | ||
// data[0] is either the start of IL locals or the end of IL locals depending on whether local | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is outdated now |
||
// zeroing was enabled for this method. We want to preserve that so we don't unnecessarily | ||
// zero the IL locals if the method's author didn't want them zeroed | ||
int32_t startOffset = m_pInitLocalsIns->data[0]; | ||
int32_t totalSize = m_globalVarsStackTop - startOffset; | ||
if (totalSize > m_pInitLocalsIns->data[1]) | ||
{ | ||
INTERP_DUMP( | ||
"Expanding initlocals from [%d-%d] to [%d-%d]\n", | ||
startOffset, startOffset + m_pInitLocalsIns->data[1], | ||
startOffset, startOffset + totalSize | ||
); | ||
m_pInitLocalsIns->data[1] = totalSize; | ||
} | ||
else | ||
{ | ||
INTERP_DUMP( | ||
"Not expanding initlocals from [%d-%d] for global vars stack top of %d\n", | ||
startOffset, startOffset + m_pInitLocalsIns->data[1], | ||
m_globalVarsStackTop | ||
); | ||
} | ||
} | ||
|
||
// Adds a conversion instruction for the value pointed to by sp, also updating the stack information | ||
void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp) | ||
{ | ||
|
@@ -1916,12 +2084,13 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) | |
AddIns(INTOP_BREAKPOINT); | ||
#endif | ||
|
||
if ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) && m_ILLocalsSize > 0) | ||
{ | ||
AddIns(INTOP_INITLOCALS); | ||
m_pLastNewIns->data[0] = m_ILLocalsOffset; | ||
m_pLastNewIns->data[1] = m_ILLocalsSize; | ||
} | ||
// We need to always generate this opcode because even if we have no IL locals, we may have | ||
// global vars which contain managed pointers or interior pointers | ||
m_pInitLocalsIns = AddIns(INTOP_INITLOCALS); | ||
// if (methodInfo->options & CORINFO_OPT_INIT_LOCALS) | ||
// FIXME: We can't currently skip zeroing locals because we don't have accurate liveness for global refs and byrefs | ||
m_pInitLocalsIns->data[0] = m_ILLocalsOffset; | ||
m_pInitLocalsIns->data[1] = m_ILLocalsSize; | ||
|
||
codeEnd = m_ip + m_ILCodeSize; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,19 +103,19 @@ void InterpCompiler::InitializeGlobalVars() | |
// the max offset of all call offsets on which the call depends. Stack ensures that all call offsets | ||
// on which the call depends are calculated before the call in question, by deferring calls from the | ||
// last to the first one. | ||
// | ||
// | ||
// This method allocates offsets of resolved calls following a constraint where the base offset | ||
// of a call must be greater than the offset of any argument of other active call args. It first | ||
// removes the call from an array of active calls. If a match is found, the call is removed from | ||
// the array by moving the last entry into its place. Otherwise, it is a call without arguments. | ||
// | ||
// | ||
// If there are active calls, the call in question is pushed onto the stack as a deferred call. | ||
// The call contains a list of other active calls on which it depends. Those calls need to be | ||
// resolved first in order to determine optimal base offset for the call in question. Otherwise, | ||
// if there are no active calls, we resolve the call in question and deferred calls from the stack. | ||
// | ||
// | ||
// For better understanding, consider a simple example: | ||
// a <- _ | ||
// a <- _ | ||
// b <- _ | ||
// call1 c <- b | ||
// d <- _ | ||
|
@@ -145,7 +145,7 @@ void InterpCompiler::EndActiveCall(InterpInst *call) | |
for (int i = 0; i < m_pActiveCalls->GetSize(); i++) | ||
callDeps = TSList<InterpInst*>::Push(callDeps, m_pActiveCalls->Get(i)); | ||
call->info.pCallInfo->callDeps = callDeps; | ||
|
||
m_pDeferredCalls = TSList<InterpInst*>::Push(m_pDeferredCalls, call); | ||
} | ||
else | ||
|
@@ -226,7 +226,8 @@ void InterpCompiler::AllocOffsets() | |
|
||
INTERP_DUMP("\nAllocating var offsets\n"); | ||
|
||
int finalVarsStackSize = m_totalVarsStackSize; | ||
int finalVarsStackSize = m_totalVarsStackSize, | ||
globalVarsStackTop = m_totalVarsStackSize; | ||
|
||
// We now have the top of stack offset. All local regs are allocated after this offset, with each basic block | ||
for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) | ||
|
@@ -396,14 +397,34 @@ void InterpCompiler::AllocOffsets() | |
m_paramAreaOffset = finalVarsStackSize; | ||
for (int32_t i = 0; i < m_varsSize; i++) | ||
{ | ||
InterpVar *pVar = &m_pVars[i]; | ||
// These are allocated separately at the end of the stack | ||
if (m_pVars[i].callArgs) | ||
if (pVar->callArgs) | ||
{ | ||
m_pVars[i].offset += m_paramAreaOffset; | ||
int32_t topOffset = m_pVars[i].offset + m_pVars[i].size; | ||
pVar->offset += m_paramAreaOffset; | ||
int32_t topOffset = pVar->offset + pVar->size; | ||
if (finalVarsStackSize < topOffset) | ||
finalVarsStackSize = topOffset; | ||
} | ||
|
||
// For any global vars that might contain managed pointers we need to maintain a 'global stack top' | ||
// which specifies what stack region we need to zero at method entry in order to avoid reporting | ||
// garbage pointers to the GC when it does a stackwalk | ||
// Non-global vars have accurate liveness ranges we report to the GC, so we don't care about them | ||
if ( | ||
pVar->global && ( | ||
(pVar->interpType == InterpTypeO) || | ||
(pVar->interpType == InterpTypeByRef) || | ||
(pVar->interpType == InterpTypeVT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you want you could improve the check here for valuetypes with |
||
) | ||
) | ||
{ | ||
int32_t endOfVar = pVar->offset + pVar->size; | ||
if (endOfVar > globalVarsStackTop) | ||
globalVarsStackTop = endOfVar; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This name is a bit misleading, since it is the top of the global vars that might contain refs, and not the end of all global vars. Maybe a more explicit name, like |
||
} | ||
} | ||
|
||
m_globalVarsStackTop = globalVarsStackTop; | ||
m_totalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,6 @@ | |
#include <malloc.h> | ||
#endif | ||
|
||
#include <algorithm> | ||
|
||
#include "corhdr.h" | ||
#include "corjit.h" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As discussed in offline conversation, it would be better to start reusing code from the regular JIT. There appears to be a quite a bit of code that can be shared between regular JIT and the interpreter JIT.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll create a 'jitshared' and move JitHashTable into it and use that, then.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving JitHashTable is looking very time intensive, could I do it in a follow-up PR?