Skip to content

Interpreter GC info stage 3: Report locals to the GC #114469

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/coreclr/interpreter/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
set(CMAKE_INCLUDE_CURRENT_DIR ON)

# So simdhash will build correctly without a dn-config.h
add_compile_definitions(NO_CONFIG_H)

set(INTERPRETER_SOURCES
compiler.cpp
compileropt.cpp
intops.cpp
interpconfig.cpp
eeinterp.cpp)
eeinterp.cpp
stackmap.cpp
../../native/containers/dn-simdhash.c
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed in offline conversation, it would be better to start reusing code from the regular JIT. There appears to be a quite a bit of code that can be shared between regular JIT and the interpreter JIT.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll create a 'jitshared' and move JitHashTable into it and use that, then.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving JitHashTable is looking very time intensive, could I do it in a follow-up PR?

../../native/containers/dn-simdhash-ptr-ptr.c)

set(INTERPRETER_LINK_LIBRARIES
gcinfo
Expand Down
195 changes: 182 additions & 13 deletions src/coreclr/interpreter/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#endif // debug_instrumented_return

#include "interpreter.h"
#include "stackmap.h"

#include <inttypes.h>

Expand Down Expand Up @@ -832,7 +833,10 @@ void InterpCompiler::EmitCode()
for (InterpInst *ins = bb->pFirstIns; ins != NULL; ins = ins->pNext)
{
if (InterpOpIsEmitNop(ins->opcode))
{
ins->nativeOffset = (int32_t)(ip - m_pMethodCode);
continue;
}

ip = EmitCodeIns(ip, ins, &relocs);
}
Expand Down Expand Up @@ -862,25 +866,158 @@ void InterpCompiler::EmitCode()
m_compHnd->setBoundaries(m_methodInfo->ftn, m_ILToNativeMapSize, m_pILToNativeMap);
}

#ifdef FEATURE_INTERPRETER
class InterpGcSlotAllocator
{
InterpCompiler *m_compiler;
InterpreterGcInfoEncoder *m_encoder;
// [pObjects, pByrefs]
GcSlotId *m_slotTables[2];
unsigned m_slotTableSize;

#ifdef DEBUG
bool m_verbose;
#endif

GcSlotId* LocateGcSlotTableEntry(uint32_t offsetBytes, GcSlotFlags flags)
{
GcSlotId *slotTable = m_slotTables[(flags & GC_SLOT_INTERIOR) == GC_SLOT_INTERIOR];
uint32_t slotIndex = offsetBytes / sizeof(void *);
assert(slotIndex < m_slotTableSize);
return &slotTable[slotIndex];
}

public:
InterpGcSlotAllocator(InterpCompiler *compiler, InterpreterGcInfoEncoder *encoder)
: m_compiler(compiler)
, m_encoder(encoder)
, m_slotTableSize(compiler->m_totalVarsStackSize / sizeof(void *))
#ifdef DEBUG
, m_verbose(compiler->m_verbose)
#endif
{
for (int i = 0; i < 2; i++)
{
m_slotTables[i] = new (compiler) GcSlotId[m_slotTableSize];
// 0 is a valid slot id so default-initialize all the slots to 0xFFFFFFFF
memset(m_slotTables[i], 0xFF, sizeof(GcSlotId) * m_slotTableSize);
}
}

void AllocateOrReuseGcSlot(uint32_t offsetBytes, GcSlotFlags flags)
{
GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags);
bool allocateNewSlot = *pSlot == ((GcSlotId)-1);

if (allocateNewSlot)
{
// Important to pass GC_FRAMEREG_REL, the default is broken due to GET_CALLER_SP being unimplemented
*pSlot = m_encoder->GetStackSlotId(offsetBytes, flags, GC_FRAMEREG_REL);
}
else
{
assert((flags & GC_SLOT_UNTRACKED) == 0);
}

INTERP_DUMP(
"%s %s%sgcslot %u at %u\n",
allocateNewSlot ? "Allocated" : "Reused",
(flags & GC_SLOT_UNTRACKED) ? "global " : "",
(flags & GC_SLOT_INTERIOR) ? "interior " : "",
*pSlot,
offsetBytes
);
}

void ReportLiveRange(uint32_t offsetBytes, GcSlotFlags flags, int varIndex)
{
GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags);
assert(varIndex < m_compiler->m_varsSize);

InterpVar *pVar = &m_compiler->m_pVars[varIndex];
if (pVar->global)
return;

GcSlotId slot = *pSlot;
assert(slot != ((GcSlotId)-1));
assert(pVar->liveStart);
assert(pVar->liveEnd);
uint32_t startOffset = m_compiler->ConvertOffset(m_compiler->GetLiveStartOffset(varIndex)),
endOffset = m_compiler->ConvertOffset(m_compiler->GetLiveEndOffset(varIndex));
INTERP_DUMP(
"Slot %u (%s var #%d offset %u) live [IR_%04x - IR_%04x] [%u - %u]\n",
slot, pVar->global ? "global" : "local",
varIndex, pVar->offset,
m_compiler->GetLiveStartOffset(varIndex), m_compiler->GetLiveEndOffset(varIndex),
startOffset, endOffset
);
m_encoder->SetSlotState(startOffset, slot, GC_SLOT_LIVE);
m_encoder->SetSlotState(endOffset, slot, GC_SLOT_DEAD);
}
};
#endif

void InterpCompiler::BuildGCInfo(InterpMethod *pInterpMethod)
{
#ifdef FEATURE_INTERPRETER
InterpIAllocator* pAllocator = new (this) InterpIAllocator(this);
InterpreterGcInfoEncoder* gcInfoEncoder = new (this) InterpreterGcInfoEncoder(m_compHnd, m_methodInfo, pAllocator, Interp_NOMEM);
assert(gcInfoEncoder);
InterpGcSlotAllocator slotAllocator (this, gcInfoEncoder);

gcInfoEncoder->SetCodeLength(ConvertOffset(m_methodCodeSize));

gcInfoEncoder->SetCodeLength(m_methodCodeSize);
INTERP_DUMP("Allocating gcinfo slots for %u vars\n", m_varsSize);

for (int pass = 0; pass < 2; pass++)
{
for (int i = 0; i < m_varsSize; i++)
{
InterpVar *pVar = &m_pVars[i];
GcSlotFlags flags = pVar->global
? (GcSlotFlags)GC_SLOT_UNTRACKED
: (GcSlotFlags)0;

// TODO: Request slot IDs for all our locals before finalizing
switch (pVar->interpType) {
case InterpTypeO:
break;
case InterpTypeByRef:
flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
break;
case InterpTypeVT:
{
InterpreterStackMap *stackMap = GetInterpreterStackMap(m_compHnd, pVar->clsHnd);
for (unsigned j = 0; j < stackMap->m_slotCount; j++)
{
InterpreterStackMapSlot slotInfo = stackMap->m_slots[j];
unsigned fieldOffset = pVar->offset + slotInfo.m_offsetBytes;
GcSlotFlags fieldFlags = (GcSlotFlags)(flags | slotInfo.m_gcSlotFlags);
if (pass == 0)
slotAllocator.AllocateOrReuseGcSlot(fieldOffset, fieldFlags);
else
slotAllocator.ReportLiveRange(fieldOffset, fieldFlags, i);
}

gcInfoEncoder->FinalizeSlotIds();
// Don't perform the regular allocateGcSlot call
continue;
}
default:
// Neither an object, interior pointer, or vt, so no slot needed
continue;
}

// TODO: Use finalized slot IDs to declare live ranges
if (pass == 0)
slotAllocator.AllocateOrReuseGcSlot(pVar->offset, flags);
else
slotAllocator.ReportLiveRange(pVar->offset, flags, i);
}

gcInfoEncoder->Build();
if (pass == 0)
gcInfoEncoder->FinalizeSlotIds();
else
gcInfoEncoder->Build();
}

// GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
// let's save the values anyway for debugging purposes
gcInfoEncoder->Emit();
#endif
}
Expand Down Expand Up @@ -908,6 +1045,8 @@ int32_t* InterpCompiler::GetCode(int32_t *pCodeSize)

InterpCompiler::InterpCompiler(COMP_HANDLE compHnd,
CORINFO_METHOD_INFO* methodInfo)
: m_pInitLocalsIns(nullptr)
, m_globalVarsStackTop(0)
{
// Fill in the thread-local used for assertions
t_InterpJitInfoTls = compHnd;
Expand Down Expand Up @@ -948,6 +1087,7 @@ InterpMethod* InterpCompiler::CompileMethod()
#endif

AllocOffsets();
PatchInitLocals(m_methodInfo);

EmitCode();

Expand All @@ -965,6 +1105,34 @@ InterpMethod* InterpCompiler::CompileMethod()
return CreateInterpMethod();
}

void InterpCompiler::PatchInitLocals(CORINFO_METHOD_INFO* methodInfo)
{
// We may have global vars containing managed pointers or interior pointers, so we need
// to zero the region of the stack containing global vars, not just IL locals.
// data[0] is either the start of IL locals or the end of IL locals depending on whether local
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is outdated now

// zeroing was enabled for this method. We want to preserve that so we don't unnecessarily
// zero the IL locals if the method's author didn't want them zeroed
int32_t startOffset = m_pInitLocalsIns->data[0];
int32_t totalSize = m_globalVarsStackTop - startOffset;
if (totalSize > m_pInitLocalsIns->data[1])
{
INTERP_DUMP(
"Expanding initlocals from [%d-%d] to [%d-%d]\n",
startOffset, startOffset + m_pInitLocalsIns->data[1],
startOffset, startOffset + totalSize
);
m_pInitLocalsIns->data[1] = totalSize;
}
else
{
INTERP_DUMP(
"Not expanding initlocals from [%d-%d] for global vars stack top of %d\n",
startOffset, startOffset + m_pInitLocalsIns->data[1],
m_globalVarsStackTop
);
}
}

// Adds a conversion instruction for the value pointed to by sp, also updating the stack information
void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp)
{
Expand Down Expand Up @@ -1916,12 +2084,13 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo)
AddIns(INTOP_BREAKPOINT);
#endif

if ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) && m_ILLocalsSize > 0)
{
AddIns(INTOP_INITLOCALS);
m_pLastNewIns->data[0] = m_ILLocalsOffset;
m_pLastNewIns->data[1] = m_ILLocalsSize;
}
// We need to always generate this opcode because even if we have no IL locals, we may have
// global vars which contain managed pointers or interior pointers
m_pInitLocalsIns = AddIns(INTOP_INITLOCALS);
// if (methodInfo->options & CORINFO_OPT_INIT_LOCALS)
// FIXME: We can't currently skip zeroing locals because we don't have accurate liveness for global refs and byrefs
m_pInitLocalsIns->data[0] = m_ILLocalsOffset;
m_pInitLocalsIns->data[1] = m_ILLocalsSize;

codeEnd = m_ip + m_ILCodeSize;

Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/interpreter/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ class InterpIAllocator;
class InterpCompiler
{
friend class InterpIAllocator;
friend class InterpGcSlotAllocator;

private:
CORINFO_METHOD_HANDLE m_methodHnd;
Expand All @@ -281,6 +282,7 @@ class InterpCompiler
uint8_t* m_pILCode;
int32_t m_ILCodeSize;
int32_t m_currentILOffset;
InterpInst* m_pInitLocalsIns;

// This represents a mapping from indexes to pointer sized data. During compilation, an
// instruction can request an index for some data (like a MethodDesc pointer), that it
Expand All @@ -292,6 +294,7 @@ class InterpCompiler
int32_t GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle);

int GenerateCode(CORINFO_METHOD_INFO* methodInfo);
void PatchInitLocals(CORINFO_METHOD_INFO* methodInfo);

void ResolveToken(uint32_t token, CorInfoTokenKind tokenKind, CORINFO_RESOLVED_TOKEN *pResolvedToken);
CORINFO_METHOD_HANDLE ResolveMethodToken(uint32_t token);
Expand Down Expand Up @@ -356,7 +359,7 @@ class InterpCompiler

int32_t CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size);

int32_t m_totalVarsStackSize;
int32_t m_totalVarsStackSize, m_globalVarsStackTop;
int32_t m_paramAreaOffset = 0;
int32_t m_ILLocalsOffset, m_ILLocalsSize;
void AllocVarOffsetCB(int *pVar, void *pData);
Expand Down
39 changes: 30 additions & 9 deletions src/coreclr/interpreter/compileropt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,19 @@ void InterpCompiler::InitializeGlobalVars()
// the max offset of all call offsets on which the call depends. Stack ensures that all call offsets
// on which the call depends are calculated before the call in question, by deferring calls from the
// last to the first one.
//
//
// This method allocates offsets of resolved calls following a constraint where the base offset
// of a call must be greater than the offset of any argument of other active call args. It first
// removes the call from an array of active calls. If a match is found, the call is removed from
// the array by moving the last entry into its place. Otherwise, it is a call without arguments.
//
//
// If there are active calls, the call in question is pushed onto the stack as a deferred call.
// The call contains a list of other active calls on which it depends. Those calls need to be
// resolved first in order to determine optimal base offset for the call in question. Otherwise,
// if there are no active calls, we resolve the call in question and deferred calls from the stack.
//
//
// For better understanding, consider a simple example:
// a <- _
// a <- _
// b <- _
// call1 c <- b
// d <- _
Expand Down Expand Up @@ -145,7 +145,7 @@ void InterpCompiler::EndActiveCall(InterpInst *call)
for (int i = 0; i < m_pActiveCalls->GetSize(); i++)
callDeps = TSList<InterpInst*>::Push(callDeps, m_pActiveCalls->Get(i));
call->info.pCallInfo->callDeps = callDeps;

m_pDeferredCalls = TSList<InterpInst*>::Push(m_pDeferredCalls, call);
}
else
Expand Down Expand Up @@ -226,7 +226,8 @@ void InterpCompiler::AllocOffsets()

INTERP_DUMP("\nAllocating var offsets\n");

int finalVarsStackSize = m_totalVarsStackSize;
int finalVarsStackSize = m_totalVarsStackSize,
globalVarsStackTop = m_totalVarsStackSize;

// We now have the top of stack offset. All local regs are allocated after this offset, with each basic block
for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB)
Expand Down Expand Up @@ -396,14 +397,34 @@ void InterpCompiler::AllocOffsets()
m_paramAreaOffset = finalVarsStackSize;
for (int32_t i = 0; i < m_varsSize; i++)
{
InterpVar *pVar = &m_pVars[i];
// These are allocated separately at the end of the stack
if (m_pVars[i].callArgs)
if (pVar->callArgs)
{
m_pVars[i].offset += m_paramAreaOffset;
int32_t topOffset = m_pVars[i].offset + m_pVars[i].size;
pVar->offset += m_paramAreaOffset;
int32_t topOffset = pVar->offset + pVar->size;
if (finalVarsStackSize < topOffset)
finalVarsStackSize = topOffset;
}

// For any global vars that might contain managed pointers we need to maintain a 'global stack top'
// which specifies what stack region we need to zero at method entry in order to avoid reporting
// garbage pointers to the GC when it does a stackwalk
// Non-global vars have accurate liveness ranges we report to the GC, so we don't care about them
if (
pVar->global && (
(pVar->interpType == InterpTypeO) ||
(pVar->interpType == InterpTypeByRef) ||
(pVar->interpType == InterpTypeVT)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want you could improve the check here for valuetypes with m_compHnd->getClassAttribs(pVar->clsHnd) & CORINFO_FLG_CONTAINS_GC_PTR

)
)
{
int32_t endOfVar = pVar->offset + pVar->size;
if (endOfVar > globalVarsStackTop)
globalVarsStackTop = endOfVar;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name is a bit misleading, since it is the top of the global vars that might contain refs, and not the end of all global vars. Maybe a more explicit name, like globalVarsWithRefsStackTop or some other name that you think is good.

}
}

m_globalVarsStackTop = globalVarsStackTop;
m_totalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT);
}
2 changes: 0 additions & 2 deletions src/coreclr/interpreter/interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
#include <malloc.h>
#endif

#include <algorithm>

#include "corhdr.h"
#include "corjit.h"

Expand Down
Loading
Loading