diff --git a/runtime/compiler/runtime/Runtime.cpp b/runtime/compiler/runtime/Runtime.cpp index ad5b517523c..299e8885e60 100644 --- a/runtime/compiler/runtime/Runtime.cpp +++ b/runtime/compiler/runtime/Runtime.cpp @@ -291,8 +291,6 @@ JIT_HELPER(prefetchTLH); JIT_HELPER(newPrefetchTLH); JIT_HELPER(outlinedNewObject); JIT_HELPER(outlinedNewArray); -JIT_HELPER(outlinedNewObjectNoZeroInit); -JIT_HELPER(outlinedNewArrayNoZeroInit); JIT_HELPER(_arrayTranslateTRTO); JIT_HELPER(_arrayTranslateTROTNoBreak); @@ -1113,10 +1111,6 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP) SET(TR_X86interpreterUnresolvedFieldGlue, (void *)interpreterUnresolvedFieldGlue, TR_Helper); SET(TR_X86interpreterUnresolvedFieldSetterGlue, (void *)interpreterUnresolvedFieldSetterGlue, TR_Helper); - SET(TR_X86OutlinedNew, (void *)outlinedNewObject, TR_Helper); - SET(TR_X86OutlinedNewArray, (void *)outlinedNewArray, TR_Helper); - SET(TR_X86OutlinedNewNoZeroInit, (void *)outlinedNewObjectNoZeroInit, TR_Helper); - SET(TR_X86OutlinedNewArrayNoZeroInit, (void *)outlinedNewArrayNoZeroInit, TR_Helper); SET(TR_X86prefetchTLH, (void *)prefetchTLH, TR_Helper); SET(TR_X86newPrefetchTLH, (void *)newPrefetchTLH, TR_Helper); SET(TR_X86CodeCachePrefetchHelper, (void *)prefetchTLH, TR_Helper); // needs to be set while compiling diff --git a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp index a2660ca4c4d..cb68e053844 100644 --- a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp @@ -9288,7 +9288,6 @@ J9::X86::TreeEvaluator::VMnewEvaluator( bool realTimeGC = comp->getOptions()->realTimeGC(); bool generateArraylets = comp->generateArraylets(); - bool outlineNew = false; TR::Register *segmentReg = NULL; TR::Register *tempReg = NULL; @@ -9394,43 +9393,6 @@ J9::X86::TreeEvaluator::VMnewEvaluator( } } - bool disableOutlinedNew = comp->getOption(TR_DisableOutlinedNew); - - if (generateArraylets) - { - if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "OUTLINED NEW: Disable for %s %p because outlined allocation can't deal with arraylets\n", node->getOpCode().getName(), node); - disableOutlinedNew = true; - cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "cg.new/refusedToOutline/arraylets/%s", node->getOpCode().getName()), 1, TR::DebugCounter::Undetermined); - } - else if (comp->getMethodHotness() > warm) - { - if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "OUTLINED NEW: Disable for %p because opt level is %s\n", node, comp->getHotnessName()); - disableOutlinedNew = true; - cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "cg.new/refusedToOutline/optlevel/%s", comp->getHotnessName()), 1, TR::DebugCounter::Undetermined); - } - else if (objectSize !=0 && objectSize <=0x40) - { - if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "OUTLINED NEW: Disable for %p because allocation size is small %d\n", node, objectSize); - - disableOutlinedNew = true; - } - else if (comp->getDebug() && - comp->getOptions()->getPackedTestRegex() && - TR::SimpleRegex::match(comp->getOptions()->getPackedTestRegex(), "disablePackedOutlineNew")) - { - disableOutlinedNew = true; - } - else if(!comp->getOption(TR_EnableOutlinedNew) && cg->getX86ProcessorInfo().isGenuineIntel() && !cg->getX86ProcessorInfo().isIntelOldMachine()) - { - disableOutlinedNew = true; //disable outlinednew for new machines - } - - if (!disableOutlinedNew && performTransformation(comp, "O^O OUTLINED NEW: outlining %s %p, size %d\n", node->getOpCode().getName(), node, allocationSize)) - outlineNew = true; - TR::LabelSymbol *startLabel = generateLabelSymbol(cg); TR::LabelSymbol *fallThru = generateLabelSymbol(cg); startLabel->setStartInternalControlFlow(); @@ -9523,104 +9485,15 @@ J9::X86::TreeEvaluator::VMnewEvaluator( if (skipOutlineZeroInit && !performTransformation(comp, "O^O OUTLINED NEW: skip outlined zero init on %s %p\n", cg->getDebug()->getName(node), node)) skipOutlineZeroInit = false; - if (skipOutlineZeroInit) - { - // The NoZeroInit outlined allocation causes sanity failures. If we - // can't do outlined allocation without zero init, just do it inline instead. - // TODO: Figure this out and fix it. - // - outlineNew = false; - if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "OUTLINED NEW: Disable for %p because we can't do it without zero-init\n", node); - disableOutlinedNew = true; - cg->generateDebugCounter("cg.new/refusedToOutline/noZeroInitBug", 1, TR::DebugCounter::Undetermined); - } - - if (outlineNew) + // Faster inlined sequence. It does not understand arraylet shapes yet. + // + if (canUseFastInlineAllocation) { - // Detect overflow in array size and let jitNewArray JIT helper handle overflow - // - if (sizeReg && !node->getFirstChild()->isNonNegative()) - { - TR::Register *elementCountReg = sizeReg; - uintptrj_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow(); - uintptrj_t maxObjectSizeInElements = maxObjectSize / elementSize; - // 64 bit and max array size is larger than 0x7fffffff - if (TR::Compiler->target.is64Bit() && !(maxObjectSizeInElements > 0 && maxObjectSizeInElements <= (uintptrj_t)INT_MAX)) - { - generateRegImm64Instruction(MOV8RegImm64, node, tempReg, maxObjectSizeInElements, cg); - generateRegRegInstruction(CMP8RegReg, node, elementCountReg, tempReg, cg); - } - else - { - generateRegImmInstruction(CMPRegImm4(), node, elementCountReg, (int32_t)maxObjectSizeInElements, cg); - } - generateLabelInstruction(JAE4, node, failLabel, cg); - } - - if (sizeReg) - { - int32_t round = (elementSize < fej9->getObjectAlignmentInBytes()) ? - fej9->getObjectAlignmentInBytes() : 0; - int32_t disp = round ? (round-1) : 0; - - // Now compute size of object in bytes - // - generateRegMemInstruction(LEARegMem(), - node, - segmentReg, - generateX86MemoryReference(NULL, - sizeReg, - TR::MemoryReference::convertMultiplierToStride(elementSize), - allocationSize + disp, cg), cg); - if (round) - { - generateRegImmInstruction(ANDRegImms(), node, segmentReg, -round, cg); - } - } - else - { - // make sure the allocationSize is aligned - // - allocationSize = (allocationSize + fej9->getObjectAlignmentInBytes()-1) & (-fej9->getObjectAlignmentInBytes()); - generateRegImmInstruction(MOV4RegImm4, node, segmentReg, allocationSize, cg); - } - - if (skipOutlineZeroInit) - { - TR_RuntimeHelper helper; - if (isArrayNew && sizeReg) - helper = TR_X86OutlinedNewArrayNoZeroInit; - else - helper = TR_X86OutlinedNewNoZeroInit; - cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "cg.new/outlined/noZeroInit/%s", node->getOpCode().getName()), 1, TR::DebugCounter::Undetermined); - generateHelperCallInstruction(node, helper, NULL, cg)->setAdjustsFramePointerBy(0); - } - else - { - TR_RuntimeHelper helper; - if (isArrayNew && sizeReg) - helper = TR_X86OutlinedNewArray; - else - helper = TR_X86OutlinedNew; - cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "cg.new/outlined/zeroInit/%s", node->getOpCode().getName()), 1, TR::DebugCounter::Undetermined); - generateHelperCallInstruction(node, helper, NULL, cg)->setAdjustsFramePointerBy(0); - } - generateRegRegInstruction(TESTRegReg(), node, targetReg, targetReg, cg); - generateLabelInstruction(JE4, node, failLabel, cg); + genHeapAlloc2(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg); } else { - // Faster inlined sequence. It does not understand arraylet shapes yet. - // - if (canUseFastInlineAllocation) - { - genHeapAlloc2(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg); - } - else - { - genHeapAlloc(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg); - } + genHeapAlloc(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg); } // -------------------------------------------------------------------------------- @@ -9647,7 +9520,7 @@ J9::X86::TreeEvaluator::VMnewEvaluator( maxZeroInitWordsPerIteration = MAX_ZERO_INIT_WORDS_PER_ITERATION; // Use default value } - if (initInfo && initInfo->zeroInitSlots && !outlineNew) + if (initInfo && initInfo->zeroInitSlots) { // If there are too many words to be individually initialized, initialize // them all @@ -9742,7 +9615,7 @@ J9::X86::TreeEvaluator::VMnewEvaluator( monitorSlotIsInitialized = true; } else if ((!initInfo || initInfo->numZeroInitSlots > 0) && - !node->canSkipZeroInitialization() && !outlineNew) + !node->canSkipZeroInitialization()) { // Initialize all slots // @@ -9773,7 +9646,7 @@ J9::X86::TreeEvaluator::VMnewEvaluator( // for non-native 64-bit targets where the discontiguous length slot is already initialized // via the contiguous length slot. // - if (node->getOpCodeValue() != TR::New && !outlineNew && + if (node->getOpCodeValue() != TR::New && (TR::Compiler->target.is32Bit() || comp->useCompressedPointers())) { generateMemImmInstruction(SMemImm4(), node, @@ -9878,11 +9751,6 @@ J9::X86::TreeEvaluator::VMnewEvaluator( numDeps += 2; } - if (outlineNew) - { - numDeps++; - } - // Create dependencies for the allocation registers here. // The size and class registers, if they exist, must be the first // dependencies since the heap allocation snippet needs to find them to grab @@ -9898,7 +9766,7 @@ J9::X86::TreeEvaluator::VMnewEvaluator( deps->addPostCondition(targetReg, TR::RealRegister::eax, cg); deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg); - if (useRepInstruction || outlineNew) + if (useRepInstruction) { deps->addPostCondition(tempReg, TR::RealRegister::ecx, cg); deps->addPostCondition(segmentReg, TR::RealRegister::edi, cg); @@ -9916,13 +9784,6 @@ J9::X86::TreeEvaluator::VMnewEvaluator( cg->stopUsingRegister(scratchReg); } - if (outlineNew) - { - TR::Register *dummyReg = cg->allocateRegister(); - deps->addPostCondition(dummyReg, TR::RealRegister::esi, cg); - cg->stopUsingRegister(dummyReg); - } - if (outlinedHelperCall) { TR::Node *callNode = outlinedHelperCall->getCallNode(); diff --git a/runtime/compiler/x/runtime/X86Codert.asm b/runtime/compiler/x/runtime/X86Codert.asm index 285b3891096..ca6726e0081 100644 --- a/runtime/compiler/x/runtime/X86Codert.asm +++ b/runtime/compiler/x/runtime/X86Codert.asm @@ -898,318 +898,6 @@ eq_J9VMThread_PrefetchCursor equ J9TR_VMThread_tlhPrefetchFTA public prefetchTLH public newPrefetchTLH - public outlinedNewObject - public outlinedNewArray - public outlinedNewObjectNoZeroInit - public outlinedNewArrayNoZeroInit - -; outlinedNewArray -; -; Allocates new object out of line. Callable by the JIT generated code. -; Assumes EBP = vmThread, EAX is the return value. -; If we are out of TLH space, it returns 0. -; IT DOES NOT INITIALIZE THE HEADER -; -; Parameters: -; -; class -; instance size -; -; Returns: -; -; eax - allocated object address or 0 if we are out of TLH space -; - align 16 -ifdef TR_HOST_64BIT -outlinedNewArray proc -else -outlinedNewArray proc near -endif - -ifndef J9TR_VMThread_heapAlloc - int 3 ; outlined allocation not supported without heapAlloc field -else - cmp _rdi, 8h - jle cannotAllocateNZI - mov _rax, 10h - cmp _rdi, 10h - cmovb _rdi, _rax - ;jmp startNew - - mov _rax, [_rbp + eq_J9VMThread_heapAlloc] ; load heap alloc - mov _rcx, [_rbp + eq_J9VMThread_heapTop] ; check if it's larger than heapTop - sub _rcx, _rax ; compute how much space is available - cmp _rcx, _rdi ; see if it's enough for our object - jb cannotAllocateNewArray - lea _rcx, [_rax + _rdi] ; compute new heapAlloc - prefetchnta [_rcx + 0c0h] - mov [_rbp + eq_J9VMThread_heapAlloc], _rcx ; store new heapAlloc - prefetchnta [_rcx + 0100h] - prefetchnta [_rcx + 0140h] - prefetchnta [_rcx + 0180h] - xor _rcx, _rcx -ifdef TR_HOST_64BIT -ifdef ASM_J9VM_INTERP_COMPRESSED_OBJECT_HEADER - mov dword ptr [_rax + 8], ecx -else - mov dword ptr [_rax + 12], ecx -endif -else - mov dword ptr [_rax + 8], ecx -endif - -donePrefetch: - - cmp _rdi, 40 - jb doArrayLoopCopy - -ifdef TR_HOST_64BIT - lea _rcx, [_rdi + 7] - shr _rcx, 3 -else - lea _rcx, [_rdi + 3] - shr _rcx, 2 -endif - mov _rdi, _rax - mov _rsi, _rax - xor _rax, _rax - -ifdef TR_HOST_64BIT - rep stosq -else - rep stosd -endif - mov _rax, _rsi - ret - -doArrayLoopCopy: - ; clear the memory - xor _rcx, _rcx - sub _rdi, eq_gpr_size ; added right subtract in init loop -zeroInitLoopNewArray: - mov [_rax + _rdi], _rcx ; store zero - sub _rdi, eq_gpr_size - jge zeroInitLoopNewArray - ret - -cannotAllocateNewArray: - xor _rax, _rax - ret - -endif ; ifndef J9TR_VMThread_heapAlloc - -outlinedNewArray endp - - align 16 -ifdef TR_HOST_64BIT -outlinedNewArrayNoZeroInit proc -else -outlinedNewArrayNoZeroInit proc near -endif - -ifndef J9TR_VMThread_heapAlloc - int 3 ; outlined allocation not supported without heapAlloc field -else - cmp _rdi, 8h - jle cannotAllocateNZI - mov _rax, 10h - cmp _rdi, 10h - cmovb _rdi, _rax - ;jmp startNewNZI - - mov _rax, [_rbp + eq_J9VMThread_heapAlloc] ; load heap alloc - mov _rcx, [_rbp + eq_J9VMThread_heapTop] ; check if it's larger than heapTop - sub _rcx, _rax ; compute how much space is available - cmp _rcx, _rdi ; see if it's enough for our object - jb cannotAllocateNZI - lea _rcx, [_rax + _rdi] ; compute new heapAlloc - prefetchnta [_rcx + 0c0h] - mov [_rbp + eq_J9VMThread_heapAlloc], _rcx ; store new heapAlloc - prefetchnta [_rcx + 0100h] - prefetchnta [_rcx + 0140h] - prefetchnta [_rcx + 0180h] - xor _rcx, _rcx -ifdef TR_HOST_64BIT -ifdef ASM_J9VM_INTERP_COMPRESSED_OBJECT_HEADER - mov dword ptr [_rax + 8], ecx -else - mov dword ptr [_rax + 12], ecx -endif -else - mov dword ptr [_rax + 8], ecx -endif - -doneNZI: - ret - -cannotAllocateNZI: - xor _rax, _rax - jmp doneNZI - -endif ; ifndef J9TR_VMThread_heapAlloc - -outlinedNewArrayNoZeroInit endp - -; outlinedNewObject -; -; Allocates new object out of line. Callable by the JIT generated code. -; Assumes EBP = vmThread, EAX is the return value. -; If we are out of TLH space, it returns 0. -; IT DOES NOT INITIALIZE THE HEADER -; -; Parameters: -; -; class -; instance size -; -; Returns: -; -; eax - allocated object address or 0 if we are out of TLH space -; - align 16 -ifdef TR_HOST_64BIT -outlinedNewObject proc -else -outlinedNewObject proc near -endif -startNew: - -ifndef J9TR_VMThread_heapAlloc - int 3 ; outlined allocation not supported without heapAlloc field -else - - mov _rax, [_rbp + eq_J9VMThread_heapAlloc] ; load heap alloc - mov _rcx, [_rbp + eq_J9VMThread_heapTop] ; check if it's larger than heapTop - sub _rcx, _rax ; compute how much space is available - cmp _rcx, _rdi ; see if it's enough for our object - jb cannotAllocateNew - lea _rcx, [_rax + _rdi] ; compute new heapAlloc - prefetchnta [_rcx + 0c0h] - mov [_rbp + eq_J9VMThread_heapAlloc], _rcx ; store new heapAlloc - prefetchnta [_rcx + 0100h] - prefetchnta [_rcx + 0140h] - prefetchnta [_rcx + 0180h] - -ifdef ASM_J9VM_GC_TLH_PREFETCH_FTA_DISABLE - sub dword ptr [_rbp + J9TR_VMThread_tlhPrefetchFTA], edi - jle prefetchHeap ; do prefetch of the next TLH chunk -donePrefetch: -endif - - cmp _rdi, 40 - jb doObjectLoopCopy - -ifdef TR_HOST_64BIT - lea _rcx, [_rdi + 7] - shr _rcx, 3 -else - lea _rcx, [_rdi + 3] - shr _rcx, 2 -endif - mov _rdi, _rax - mov _rsi, _rax - xor _rax, _rax - -ifdef TR_HOST_64BIT - rep stosq -else - rep stosd -endif - mov _rax, _rsi - ret - -doObjectLoopCopy: - ; clear the memory - xor _rcx, _rcx - sub _rdi, eq_gpr_size ; added right subtract in init loop -zeroInitLoop: - mov [_rax + _rdi], _rcx ; store zero - sub _rdi, eq_gpr_size - jge zeroInitLoop - ret - -cannotAllocateNew: - xor _rax, _rax - ret - -ifdef ASM_J9VM_GC_TLH_PREFETCH_FTA_DISABLE -prefetchHeap: - call prefetchTLH; - jmp donePrefetch -endif - -endif ; ifndef J9TR_VMThread_heapAlloc - -outlinedNewObject endp - - align 16 -ifdef TR_HOST_64BIT -outlinedNewObjectNoZeroInit proc -else -outlinedNewObjectNoZeroInit proc near -endif -startNewNZI: - -ifndef J9TR_VMThread_heapAlloc - int 3 ; outlined allocation not supported without heapAlloc field -else - - mov _rax, [_rbp + eq_J9VMThread_heapAlloc] ; load heap alloc - mov _rcx, [_rbp + eq_J9VMThread_heapTop] ; check if it's larger than heapTop - sub _rcx, _rax ; compute how much space is available - cmp _rcx, _rdi ; see if it's enough for our object - jb cannotAllocateNI - lea _rcx, [_rax + _rdi] ; compute new heapAlloc - prefetchnta [_rcx + 0c0h] - mov [_rbp + eq_J9VMThread_heapAlloc], _rcx ; store new heapAlloc - prefetchnta [_rcx + 0100h] - prefetchnta [_rcx + 0140h] - prefetchnta [_rcx + 0180h] - -ifdef ASM_J9VM_GC_TLH_PREFETCH_FTA_DISABLE - sub dword ptr [_rbp + J9TR_VMThread_tlhPrefetchFTA], edi - jle prefetchHeapNZI ; do prefetch of the next TLH chunk -donePrefetchNZI: -endif - ; clear the memory -ifdef DO_CLASS_POINTER_INIT - xor _rcx, _rcx -endif - -ifdef ENABLE_THIS_CODE_FOR_LOCK_WORD_INIT -ifdef TR_HOST_64BIT - int3 ; reminder so that someone can add this code -else - mov _rdi, dword ptr [_rax + J9TR_J9Object_class] ; receiver class - and _rdi, eq_ObjectClassMask - mov _rdi, dword ptr [_rdi + J9TR_J9Class_lockOffset] ; offset of lock word in receiver class - cmp _rdi, 0 - jle skipLockWordInit - mov dword ptr [_rax + _rdi], _rcx -endif -skipLockWordInit: -endif - -ifdef DO_CLASS_POINTER_INIT - mov [_rax ], _rcx ; 8-byte store is harmless even with compressed object headers -endif -doneNI: - ret - -cannotAllocateNI: - xor _rax, _rax - jmp doneNI - -ifdef ASM_J9VM_GC_TLH_PREFETCH_FTA_DISABLE -prefetchHeapNZI: - call prefetchTLH; - jmp donePrefetchNZI -endif - -endif ; ifndef J9TR_VMThread_heapAlloc - -outlinedNewObjectNoZeroInit endp - ifdef ASM_J9VM_GC_TLH_PREFETCH_FTA