diff --git a/src/coreclr/interpreter/CMakeLists.txt b/src/coreclr/interpreter/CMakeLists.txt index 8962c612a71cde..43c644f09027cc 100644 --- a/src/coreclr/interpreter/CMakeLists.txt +++ b/src/coreclr/interpreter/CMakeLists.txt @@ -1,10 +1,17 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) + +# So simdhash will build correctly without a dn-config.h +add_compile_definitions(NO_CONFIG_H) + set(INTERPRETER_SOURCES compiler.cpp compileropt.cpp intops.cpp interpconfig.cpp - eeinterp.cpp) + eeinterp.cpp + stackmap.cpp + ../../native/containers/dn-simdhash.c + ../../native/containers/dn-simdhash-ptr-ptr.c) set(INTERPRETER_LINK_LIBRARIES gcinfo diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 272d6efb346d1e..5b970bb5535756 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -8,6 +8,7 @@ #endif // debug_instrumented_return #include "interpreter.h" +#include "stackmap.h" #include @@ -832,7 +833,10 @@ void InterpCompiler::EmitCode() for (InterpInst *ins = bb->pFirstIns; ins != NULL; ins = ins->pNext) { if (InterpOpIsEmitNop(ins->opcode)) + { + ins->nativeOffset = (int32_t)(ip - m_pMethodCode); continue; + } ip = EmitCodeIns(ip, ins, &relocs); } @@ -862,25 +866,158 @@ void InterpCompiler::EmitCode() m_compHnd->setBoundaries(m_methodInfo->ftn, m_ILToNativeMapSize, m_pILToNativeMap); } +#ifdef FEATURE_INTERPRETER +class InterpGcSlotAllocator +{ + InterpCompiler *m_compiler; + InterpreterGcInfoEncoder *m_encoder; + // [pObjects, pByrefs] + GcSlotId *m_slotTables[2]; + unsigned m_slotTableSize; + +#ifdef DEBUG + bool m_verbose; +#endif + + GcSlotId* LocateGcSlotTableEntry(uint32_t offsetBytes, GcSlotFlags flags) + { + GcSlotId *slotTable = m_slotTables[(flags & GC_SLOT_INTERIOR) == GC_SLOT_INTERIOR]; + uint32_t slotIndex = offsetBytes / sizeof(void *); + assert(slotIndex < m_slotTableSize); + return &slotTable[slotIndex]; + } + +public: + InterpGcSlotAllocator(InterpCompiler *compiler, InterpreterGcInfoEncoder *encoder) + : m_compiler(compiler) + , m_encoder(encoder) + , m_slotTableSize(compiler->m_totalVarsStackSize / sizeof(void *)) +#ifdef DEBUG + , m_verbose(compiler->m_verbose) +#endif + { + for (int i = 0; i < 2; i++) + { + m_slotTables[i] = new (compiler) GcSlotId[m_slotTableSize]; + // 0 is a valid slot id so default-initialize all the slots to 0xFFFFFFFF + memset(m_slotTables[i], 0xFF, sizeof(GcSlotId) * m_slotTableSize); + } + } + + void AllocateOrReuseGcSlot(uint32_t offsetBytes, GcSlotFlags flags) + { + GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags); + bool allocateNewSlot = *pSlot == ((GcSlotId)-1); + + if (allocateNewSlot) + { + // Important to pass GC_FRAMEREG_REL, the default is broken due to GET_CALLER_SP being unimplemented + *pSlot = m_encoder->GetStackSlotId(offsetBytes, flags, GC_FRAMEREG_REL); + } + else + { + assert((flags & GC_SLOT_UNTRACKED) == 0); + } + + INTERP_DUMP( + "%s %s%sgcslot %u at %u\n", + allocateNewSlot ? "Allocated" : "Reused", + (flags & GC_SLOT_UNTRACKED) ? "global " : "", + (flags & GC_SLOT_INTERIOR) ? "interior " : "", + *pSlot, + offsetBytes + ); + } + + void ReportLiveRange(uint32_t offsetBytes, GcSlotFlags flags, int varIndex) + { + GcSlotId *pSlot = LocateGcSlotTableEntry(offsetBytes, flags); + assert(varIndex < m_compiler->m_varsSize); + + InterpVar *pVar = &m_compiler->m_pVars[varIndex]; + if (pVar->global) + return; + + GcSlotId slot = *pSlot; + assert(slot != ((GcSlotId)-1)); + assert(pVar->liveStart); + assert(pVar->liveEnd); + uint32_t startOffset = m_compiler->ConvertOffset(m_compiler->GetLiveStartOffset(varIndex)), + endOffset = m_compiler->ConvertOffset(m_compiler->GetLiveEndOffset(varIndex)); + INTERP_DUMP( + "Slot %u (%s var #%d offset %u) live [IR_%04x - IR_%04x] [%u - %u]\n", + slot, pVar->global ? "global" : "local", + varIndex, pVar->offset, + m_compiler->GetLiveStartOffset(varIndex), m_compiler->GetLiveEndOffset(varIndex), + startOffset, endOffset + ); + m_encoder->SetSlotState(startOffset, slot, GC_SLOT_LIVE); + m_encoder->SetSlotState(endOffset, slot, GC_SLOT_DEAD); + } +}; +#endif + void InterpCompiler::BuildGCInfo(InterpMethod *pInterpMethod) { #ifdef FEATURE_INTERPRETER InterpIAllocator* pAllocator = new (this) InterpIAllocator(this); InterpreterGcInfoEncoder* gcInfoEncoder = new (this) InterpreterGcInfoEncoder(m_compHnd, m_methodInfo, pAllocator, Interp_NOMEM); - assert(gcInfoEncoder); + InterpGcSlotAllocator slotAllocator (this, gcInfoEncoder); + + gcInfoEncoder->SetCodeLength(ConvertOffset(m_methodCodeSize)); - gcInfoEncoder->SetCodeLength(m_methodCodeSize); + INTERP_DUMP("Allocating gcinfo slots for %u vars\n", m_varsSize); + + for (int pass = 0; pass < 2; pass++) + { + for (int i = 0; i < m_varsSize; i++) + { + InterpVar *pVar = &m_pVars[i]; + GcSlotFlags flags = pVar->global + ? (GcSlotFlags)GC_SLOT_UNTRACKED + : (GcSlotFlags)0; - // TODO: Request slot IDs for all our locals before finalizing + switch (pVar->interpType) { + case InterpTypeO: + break; + case InterpTypeByRef: + flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); + break; + case InterpTypeVT: + { + InterpreterStackMap *stackMap = GetInterpreterStackMap(m_compHnd, pVar->clsHnd); + for (unsigned j = 0; j < stackMap->m_slotCount; j++) + { + InterpreterStackMapSlot slotInfo = stackMap->m_slots[j]; + unsigned fieldOffset = pVar->offset + slotInfo.m_offsetBytes; + GcSlotFlags fieldFlags = (GcSlotFlags)(flags | slotInfo.m_gcSlotFlags); + if (pass == 0) + slotAllocator.AllocateOrReuseGcSlot(fieldOffset, fieldFlags); + else + slotAllocator.ReportLiveRange(fieldOffset, fieldFlags, i); + } - gcInfoEncoder->FinalizeSlotIds(); + // Don't perform the regular allocateGcSlot call + continue; + } + default: + // Neither an object, interior pointer, or vt, so no slot needed + continue; + } - // TODO: Use finalized slot IDs to declare live ranges + if (pass == 0) + slotAllocator.AllocateOrReuseGcSlot(pVar->offset, flags); + else + slotAllocator.ReportLiveRange(pVar->offset, flags, i); + } - gcInfoEncoder->Build(); + if (pass == 0) + gcInfoEncoder->FinalizeSlotIds(); + else + gcInfoEncoder->Build(); + } // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) - // let's save the values anyway for debugging purposes gcInfoEncoder->Emit(); #endif } @@ -908,6 +1045,8 @@ int32_t* InterpCompiler::GetCode(int32_t *pCodeSize) InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo) + : m_pInitLocalsIns(nullptr) + , m_globalVarsStackTop(0) { // Fill in the thread-local used for assertions t_InterpJitInfoTls = compHnd; @@ -948,6 +1087,7 @@ InterpMethod* InterpCompiler::CompileMethod() #endif AllocOffsets(); + PatchInitLocals(m_methodInfo); EmitCode(); @@ -965,6 +1105,34 @@ InterpMethod* InterpCompiler::CompileMethod() return CreateInterpMethod(); } +void InterpCompiler::PatchInitLocals(CORINFO_METHOD_INFO* methodInfo) +{ + // We may have global vars containing managed pointers or interior pointers, so we need + // to zero the region of the stack containing global vars, not just IL locals. + // data[0] is either the start of IL locals or the end of IL locals depending on whether local + // zeroing was enabled for this method. We want to preserve that so we don't unnecessarily + // zero the IL locals if the method's author didn't want them zeroed + int32_t startOffset = m_pInitLocalsIns->data[0]; + int32_t totalSize = m_globalVarsStackTop - startOffset; + if (totalSize > m_pInitLocalsIns->data[1]) + { + INTERP_DUMP( + "Expanding initlocals from [%d-%d] to [%d-%d]\n", + startOffset, startOffset + m_pInitLocalsIns->data[1], + startOffset, startOffset + totalSize + ); + m_pInitLocalsIns->data[1] = totalSize; + } + else + { + INTERP_DUMP( + "Not expanding initlocals from [%d-%d] for global vars stack top of %d\n", + startOffset, startOffset + m_pInitLocalsIns->data[1], + m_globalVarsStackTop + ); + } +} + // Adds a conversion instruction for the value pointed to by sp, also updating the stack information void InterpCompiler::EmitConv(StackInfo *sp, InterpInst *prevIns, StackType type, InterpOpcode convOp) { @@ -1916,12 +2084,13 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) AddIns(INTOP_BREAKPOINT); #endif - if ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) && m_ILLocalsSize > 0) - { - AddIns(INTOP_INITLOCALS); - m_pLastNewIns->data[0] = m_ILLocalsOffset; - m_pLastNewIns->data[1] = m_ILLocalsSize; - } + // We need to always generate this opcode because even if we have no IL locals, we may have + // global vars which contain managed pointers or interior pointers + m_pInitLocalsIns = AddIns(INTOP_INITLOCALS); + // if (methodInfo->options & CORINFO_OPT_INIT_LOCALS) + // FIXME: We can't currently skip zeroing locals because we don't have accurate liveness for global refs and byrefs + m_pInitLocalsIns->data[0] = m_ILLocalsOffset; + m_pInitLocalsIns->data[1] = m_ILLocalsSize; codeEnd = m_ip + m_ILCodeSize; diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index de1884b763f4b0..f0eafcd28b4bd6 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -264,6 +264,7 @@ class InterpIAllocator; class InterpCompiler { friend class InterpIAllocator; + friend class InterpGcSlotAllocator; private: CORINFO_METHOD_HANDLE m_methodHnd; @@ -281,6 +282,7 @@ class InterpCompiler uint8_t* m_pILCode; int32_t m_ILCodeSize; int32_t m_currentILOffset; + InterpInst* m_pInitLocalsIns; // This represents a mapping from indexes to pointer sized data. During compilation, an // instruction can request an index for some data (like a MethodDesc pointer), that it @@ -292,6 +294,7 @@ class InterpCompiler int32_t GetMethodDataItemIndex(CORINFO_METHOD_HANDLE mHandle); int GenerateCode(CORINFO_METHOD_INFO* methodInfo); + void PatchInitLocals(CORINFO_METHOD_INFO* methodInfo); void ResolveToken(uint32_t token, CorInfoTokenKind tokenKind, CORINFO_RESOLVED_TOKEN *pResolvedToken); CORINFO_METHOD_HANDLE ResolveMethodToken(uint32_t token); @@ -356,7 +359,7 @@ class InterpCompiler int32_t CreateVarExplicit(InterpType interpType, CORINFO_CLASS_HANDLE clsHnd, int size); - int32_t m_totalVarsStackSize; + int32_t m_totalVarsStackSize, m_globalVarsStackTop; int32_t m_paramAreaOffset = 0; int32_t m_ILLocalsOffset, m_ILLocalsSize; void AllocVarOffsetCB(int *pVar, void *pData); diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index 0d586a71968cd4..a3e489f555964c 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -103,19 +103,19 @@ void InterpCompiler::InitializeGlobalVars() // the max offset of all call offsets on which the call depends. Stack ensures that all call offsets // on which the call depends are calculated before the call in question, by deferring calls from the // last to the first one. -// +// // This method allocates offsets of resolved calls following a constraint where the base offset // of a call must be greater than the offset of any argument of other active call args. It first // removes the call from an array of active calls. If a match is found, the call is removed from // the array by moving the last entry into its place. Otherwise, it is a call without arguments. -// +// // If there are active calls, the call in question is pushed onto the stack as a deferred call. // The call contains a list of other active calls on which it depends. Those calls need to be // resolved first in order to determine optimal base offset for the call in question. Otherwise, // if there are no active calls, we resolve the call in question and deferred calls from the stack. -// +// // For better understanding, consider a simple example: -// a <- _ +// a <- _ // b <- _ // call1 c <- b // d <- _ @@ -145,7 +145,7 @@ void InterpCompiler::EndActiveCall(InterpInst *call) for (int i = 0; i < m_pActiveCalls->GetSize(); i++) callDeps = TSList::Push(callDeps, m_pActiveCalls->Get(i)); call->info.pCallInfo->callDeps = callDeps; - + m_pDeferredCalls = TSList::Push(m_pDeferredCalls, call); } else @@ -226,7 +226,8 @@ void InterpCompiler::AllocOffsets() INTERP_DUMP("\nAllocating var offsets\n"); - int finalVarsStackSize = m_totalVarsStackSize; + int finalVarsStackSize = m_totalVarsStackSize, + globalVarsStackTop = m_totalVarsStackSize; // We now have the top of stack offset. All local regs are allocated after this offset, with each basic block for (pBB = m_pEntryBB; pBB != NULL; pBB = pBB->pNextBB) @@ -396,14 +397,34 @@ void InterpCompiler::AllocOffsets() m_paramAreaOffset = finalVarsStackSize; for (int32_t i = 0; i < m_varsSize; i++) { + InterpVar *pVar = &m_pVars[i]; // These are allocated separately at the end of the stack - if (m_pVars[i].callArgs) + if (pVar->callArgs) { - m_pVars[i].offset += m_paramAreaOffset; - int32_t topOffset = m_pVars[i].offset + m_pVars[i].size; + pVar->offset += m_paramAreaOffset; + int32_t topOffset = pVar->offset + pVar->size; if (finalVarsStackSize < topOffset) finalVarsStackSize = topOffset; } + + // For any global vars that might contain managed pointers we need to maintain a 'global stack top' + // which specifies what stack region we need to zero at method entry in order to avoid reporting + // garbage pointers to the GC when it does a stackwalk + // Non-global vars have accurate liveness ranges we report to the GC, so we don't care about them + if ( + pVar->global && ( + (pVar->interpType == InterpTypeO) || + (pVar->interpType == InterpTypeByRef) || + (pVar->interpType == InterpTypeVT) + ) + ) + { + int32_t endOfVar = pVar->offset + pVar->size; + if (endOfVar > globalVarsStackTop) + globalVarsStackTop = endOfVar; + } } + + m_globalVarsStackTop = globalVarsStackTop; m_totalVarsStackSize = ALIGN_UP_TO(finalVarsStackSize, INTERP_STACK_ALIGNMENT); } diff --git a/src/coreclr/interpreter/interpreter.h b/src/coreclr/interpreter/interpreter.h index f0a9fe898a878e..a06dfefbab5e05 100644 --- a/src/coreclr/interpreter/interpreter.h +++ b/src/coreclr/interpreter/interpreter.h @@ -16,8 +16,6 @@ #include #endif -#include - #include "corhdr.h" #include "corjit.h" diff --git a/src/coreclr/interpreter/stackmap.cpp b/src/coreclr/interpreter/stackmap.cpp new file mode 100644 index 00000000000000..36242c5526c1bb --- /dev/null +++ b/src/coreclr/interpreter/stackmap.cpp @@ -0,0 +1,86 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "gcinfoencoder.h" // for GcSlotFlags + +// HACK: debugreturn.h (included by gcinfoencoder.h) breaks constexpr +#if defined(debug_instrumented_return) || defined(_DEBUGRETURN_H_) +#undef return +#endif // debug_instrumented_return + +#include "interpreter.h" +#include "stackmap.h" + +extern "C" { + #include "../../native/containers/dn-simdhash.h" + #include "../../native/containers/dn-simdhash-specializations.h" + + void assertAbort(const char* why, const char* file, unsigned line); + + void + dn_simdhash_assert_fail (const char* file, int line, const char* condition); + + void + dn_simdhash_assert_fail (const char* file, int line, const char* condition) { + assertAbort(condition, file, line); + } +} + +thread_local dn_simdhash_ptr_ptr_t *t_sharedStackMapLookup = nullptr; + +InterpreterStackMap* GetInterpreterStackMap(ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle) +{ + InterpreterStackMap* result = nullptr; + if (!t_sharedStackMapLookup) + t_sharedStackMapLookup = dn_simdhash_ptr_ptr_new(0, nullptr); + if (!dn_simdhash_ptr_ptr_try_get_value(t_sharedStackMapLookup, classHandle, (void **)&result)) + { + result = new InterpreterStackMap(jitInfo, classHandle); + dn_simdhash_ptr_ptr_try_add(t_sharedStackMapLookup, classHandle, result); + } + return result; +} + +void InterpreterStackMap::PopulateStackMap(ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle) +{ + unsigned size = jitInfo->getClassSize(classHandle); + // getClassGClayout assumes it's given a buffer of exactly this size + unsigned maxGcPtrs = (size + sizeof(void *) - 1) / sizeof(void *); + if (maxGcPtrs < 1) + return; + + uint8_t *gcPtrs = (uint8_t *)alloca(maxGcPtrs); + unsigned numGcPtrs = jitInfo->getClassGClayout(classHandle, gcPtrs), + newCapacity = m_slotCount + numGcPtrs; + + // Allocate enough space in case all the offsets in the buffer are GC pointers + m_slots = (InterpreterStackMapSlot *)malloc(sizeof(InterpreterStackMapSlot) * newCapacity); + + for (unsigned i = 0; i < numGcPtrs; i++) { + GcSlotFlags flags; + + switch (gcPtrs[i]) { + case TYPE_GC_NONE: + case TYPE_GC_OTHER: + continue; + case TYPE_GC_BYREF: + flags = GC_SLOT_INTERIOR; + break; + case TYPE_GC_REF: + flags = GC_SLOT_BASE; + break; + default: + assert(false); + continue; + } + + unsigned slotOffset = (sizeof(void *) * i); + m_slots[m_slotCount++] = { slotOffset, (unsigned)flags }; + } + + // Shrink our allocation based on the number of slots we actually recorded + unsigned finalSize = sizeof(InterpreterStackMapSlot) * m_slotCount; + if (finalSize == 0) + finalSize = sizeof(InterpreterStackMapSlot); + m_slots = (InterpreterStackMapSlot *)realloc(m_slots, finalSize); +} diff --git a/src/coreclr/interpreter/stackmap.h b/src/coreclr/interpreter/stackmap.h new file mode 100644 index 00000000000000..fb8b6ab4398812 --- /dev/null +++ b/src/coreclr/interpreter/stackmap.h @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +struct InterpreterStackMapSlot +{ + unsigned m_offsetBytes; + unsigned m_gcSlotFlags; +}; + +class InterpreterStackMap +{ + void PopulateStackMap (ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle); + +public: + unsigned m_slotCount; + InterpreterStackMapSlot* m_slots; + + InterpreterStackMap (ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle) + : m_slotCount(0) + , m_slots(nullptr) + { + PopulateStackMap(jitInfo, classHandle); + } +}; + +InterpreterStackMap* GetInterpreterStackMap(ICorJitInfo* jitInfo, CORINFO_CLASS_HANDLE classHandle); diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index b08f1ebd5f0836..f5e06efff64ef5 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1264,6 +1264,8 @@ template void GcSlotDecoder::DecodeSlo { // We have untracked stack slots left and more room to predecode + // Interpreter-TODO: Add a configurable way to skip encoding/decoding the base for the interpreter, because + // all interpreter locals are at positive offsets relative to FP. GcStackSlotBase spBase = (GcStackSlotBase) reader.Read(2); UINT32 normSpOffset = (INT32) reader.DecodeVarLengthSigned(GcInfoEncoding::STACK_SLOT_ENCBASE); INT32 spOffset = GcInfoEncoding::DENORMALIZE_STACK_SLOT(normSpOffset); @@ -2183,6 +2185,37 @@ template void TGcInfoDecoder::ReportRe #endif // Unknown platform +#ifdef FEATURE_INTERPRETER +template <> OBJECTREF* TGcInfoDecoder::GetStackSlot( + INT32 spOffset, + GcStackSlotBase spBase, + PREGDISPLAY pRD + ) +{ + OBJECTREF* pObjRef = NULL; + + if( GC_SP_REL == spBase ) + { + _ASSERTE(!"GC_SP_REL is invalid for interpreter frames"); + } + else if( GC_CALLER_SP_REL == spBase ) + { + _ASSERTE(!"GC_CALLER_SP_REL is invalid for interpreter frames"); + } + else + { + // Interpreter-TODO: Enhance GcInfoEncoder/Decoder to allow omitting the stack slot base register for interpreted + // methods, since only one base (fp) is ever used for interpreter locals. See Interpreter-TODO in DecodeSlotTable. + _ASSERTE( GC_FRAMEREG_REL == spBase ); + uint8_t* fp = (uint8_t *)GetFP(pRD->pCurrentContext); + _ASSERTE(fp); + pObjRef = (OBJECTREF*)(fp + spOffset); + } + + return pObjRef; +} +#endif + template OBJECTREF* TGcInfoDecoder::GetStackSlot( INT32 spOffset, diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index c01008f89eee39..883d12e336e22c 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -1179,7 +1179,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr { pInterpreterFrame->SetTopInterpMethodContextFrame(pFrame); GCX_COOP(); - GCHeapUtilities::GetGCHeap()->GarbageCollect(-1, false, 0x00000002); + GCHeapUtilities::GetGCHeap()->GarbageCollect(-1, false, collection_blocking | collection_aggressive); } ip++; break; diff --git a/src/native/containers/dn-simdhash-ptr-ptr.c b/src/native/containers/dn-simdhash-ptr-ptr.c index e4009373637bcd..63933fdeccc00e 100644 --- a/src/native/containers/dn-simdhash-ptr-ptr.c +++ b/src/native/containers/dn-simdhash-ptr-ptr.c @@ -14,10 +14,10 @@ #define DN_SIMDHASH_KEY_HASHER(data, key) (MurmurHash3_32_ptr(key, 0)) #define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) (lhs == rhs) // perfect cache alignment. 128-byte buckets for 64-bit pointers, 64-byte buckets for 32-bit pointers -#if SIZEOF_VOID_P == 8 -#define DN_SIMDHASH_BUCKET_CAPACITY 14 -#else +#if SIZEOF_VOID_P == 4 #define DN_SIMDHASH_BUCKET_CAPACITY 12 +#else +#define DN_SIMDHASH_BUCKET_CAPACITY 11 #endif #include "dn-simdhash-specialization.h" diff --git a/src/native/containers/dn-simdhash.c b/src/native/containers/dn-simdhash.c index 9ea5fd7a1ec986..24ea919f8ae365 100644 --- a/src/native/containers/dn-simdhash.c +++ b/src/native/containers/dn-simdhash.c @@ -167,6 +167,7 @@ dn_simdhash_new_internal (dn_simdhash_meta_t *meta, dn_simdhash_vtable_t vtable, { const size_t size = sizeof(dn_simdhash_t) + meta->data_size; dn_simdhash_t *result = (dn_simdhash_t *)dn_allocator_alloc(allocator, size); + dn_simdhash_assert(result); memset(result, 0, size); dn_simdhash_assert(meta); @@ -248,7 +249,13 @@ dn_simdhash_ensure_capacity_internal (dn_simdhash_t *hash, uint32_t capacity) size_t buckets_size_bytes = (bucket_count * hash->meta->bucket_size_bytes) + DN_SIMDHASH_VECTOR_WIDTH, values_size_bytes = value_count * hash->meta->value_size; - hash->buffers.buckets = dn_allocator_alloc(hash->buffers.allocator, buckets_size_bytes); + void *new_buckets = dn_allocator_alloc(hash->buffers.allocator, buckets_size_bytes), + *new_values = dn_allocator_alloc(hash->buffers.allocator, values_size_bytes); + + dn_simdhash_assert(new_buckets); + dn_simdhash_assert(new_values); + + hash->buffers.buckets = new_buckets; memset(hash->buffers.buckets, 0, buckets_size_bytes); // Calculate necessary bias for alignment @@ -257,7 +264,7 @@ dn_simdhash_ensure_capacity_internal (dn_simdhash_t *hash, uint32_t capacity) hash->buffers.buckets = (void *)(((uint8_t *)hash->buffers.buckets) + hash->buffers.buckets_bias); // No need to go out of our way to align values - hash->buffers.values = dn_allocator_alloc(hash->buffers.allocator, values_size_bytes); + hash->buffers.values = new_values; // Skip this for performance; memset is especially slow in wasm // memset(hash->buffers.values, 0, values_size_bytes); diff --git a/src/tests/JIT/interpreter/Interpreter.cs b/src/tests/JIT/interpreter/Interpreter.cs index daec2582cc8fe5..2ccfcc3b9f4c0e 100644 --- a/src/tests/JIT/interpreter/Interpreter.cs +++ b/src/tests/JIT/interpreter/Interpreter.cs @@ -65,6 +65,17 @@ public MyStruct2(int val) } } +public struct StructWithRefs +{ + public MyObj o1, o2; + + public StructWithRefs(int val1, int val2) + { + o1 = new MyObj(val1); + o2 = new MyObj(val2); + } +} + public class InterpreterTest { static int Main(string[] args) @@ -91,12 +102,15 @@ public static void RunInterpreterTests() if (!TestJitFields()) Environment.FailFast(null); - // Disable below tests because they are potentially unstable since they do allocation - // and we currently don't have GC support. They should pass locally though. -// if (!TestFields()) -// Environment.FailFast(null); -// if (!TestSpecialFields()) -// Environment.FailFast(null); + if (!TestFields()) + Environment.FailFast(null); + if (!TestStructRefFields()) + Environment.FailFast(null); + // FIXME: Calling TestSpecialFields causes the following System.GC.Collect to fail. + /* + if (!TestSpecialFields()) + Environment.FailFast(null); + */ if (!TestFloat()) Environment.FailFast(null); @@ -106,7 +120,6 @@ public static void RunInterpreterTests() // if (!TestVirtual()) // Environment.FailFast(null); - // For stackwalking validation System.GC.Collect(); } @@ -193,19 +206,46 @@ public static bool TestFields() if (sum != 33) return false; + ref int str_a = ref str.str.a; + + System.GC.Collect(); + staticObj = obj; staticStr = str; + System.GC.Collect(); + sum = staticObj.str.a + staticStr.str.a + staticObj.ct + staticStr.ct; if (sum != 33) return false; - WriteInt(ref str.str.a, 11); + WriteInt(ref str_a, 11); WriteInt(ref staticObj.str.a, 22); - sum = ReadInt(ref str.str.a) + ReadInt(ref staticObj.str.a); + sum = ReadInt(ref str_a) + ReadInt(ref staticObj.str.a); if (sum != 33) return false; + if (str_a != str.str.a) + return false; + + return true; + } + + public static bool TestStructRefFields() + { + StructWithRefs s = new StructWithRefs(3, 42); + if (s.o1.str.a != 3) + return false; + if (s.o2.str.a != 42) + return false; + + System.GC.Collect(); + + if (s.o1.str.a != 3) + return false; + if (s.o2.str.a != 42) + return false; + return true; } @@ -219,6 +259,9 @@ public static bool TestSpecialFields() threadStaticObj = new MyObj(1); threadStaticStr = new MyStruct2(2); + // FIXME: This crashes in the GC. We don't report any live GC locals and inspection of the stack shows no GC objects on it + // System.GC.Collect(); + int sum = threadStaticObj.str.a + threadStaticStr.str.a + threadStaticObj.ct + threadStaticStr.ct; if (sum != 33) return false;