Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Switch reverse PInvoke to the NativeCallable plan (#34251)" #34306

Merged
merged 1 commit into from
Mar 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2921,7 +2921,8 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
if (this.MethodBeingCompiled.IsNativeCallable)
{
#if READYTORUN
if (targetArchitecture == TargetArchitecture.X86)
if (targetArchitecture == TargetArchitecture.X86
&& _compilation.TypeSystemContext.Target.OperatingSystem == TargetOS.Windows)
{
throw new RequiresRuntimeJitException("ReadyToRun: Methods with NativeCallableAttribute not implemented");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,9 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
pResult->methodFlags = FilterNamedIntrinsicMethodAttribs(pResult->methodFlags, methodToCall);

var targetDetails = _compilation.TypeSystemContext.Target;
if (targetDetails.Architecture == TargetArchitecture.X86 && targetMethod.IsNativeCallable)
if (targetDetails.Architecture == TargetArchitecture.X86
&& targetDetails.OperatingSystem == TargetOS.Windows
&& targetMethod.IsNativeCallable)
{
throw new RequiresRuntimeJitException("ReadyToRun: References to methods with NativeCallableAttribute not implemented");
}
Expand Down
241 changes: 241 additions & 0 deletions src/coreclr/src/vm/amd64/UMThunkStub.asm
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,13 @@
include <AsmMacros.inc>
include AsmConstants.inc

extern CreateThreadBlockThrow:proc
extern TheUMEntryPrestubWorker:proc
extern UMEntryPrestubUnwindFrameChainHandler:proc
extern UMThunkStubUnwindFrameChainHandler:proc
extern g_TrapReturningThreads:dword
extern UMThunkStubRareDisableWorker:proc
extern ReversePInvokeBadTransition:proc

;
; METHODDESC_REGISTER: UMEntryThunk*
Expand Down Expand Up @@ -73,4 +78,240 @@ endif

NESTED_END TheUMEntryPrestub, _TEXT


;
; METHODDESC_REGISTER: UMEntryThunk*
;
NESTED_ENTRY UMThunkStub, _TEXT, UMThunkStubUnwindFrameChainHandler

UMThunkStubAMD64_STACK_FRAME_SIZE = 0

; number of integer registers saved in prologue
UMThunkStubAMD64_NUM_REG_PUSHES = 2
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + (UMThunkStubAMD64_NUM_REG_PUSHES * 8)

; rare path spill area
UMThunkStubAMD64_RARE_PATH_SPILL_SIZE = 10h
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_RARE_PATH_SPILL_SIZE
UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE



; HOST_NOTIFY_FLAG
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE

; XMM save area
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL

; Ensure that the offset of the XMM save area will be 16-byte aligned.
if ((UMThunkStubAMD64_STACK_FRAME_SIZE + 8) MOD 16) ne 0 ; +8 for caller-pushed return address
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
endif

UMThunkStubAMD64_XMM_SAVE_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE

; Add in the callee scratch area size.
UMThunkStubAMD64_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE

; Now we have the full size of the stack frame. The offsets have been computed relative to the
; top, so negate them to make them relative to the post-prologue rsp.
UMThunkStubAMD64_FRAME_OFFSET = UMThunkStubAMD64_CALLEE_SCRATCH_SIZE
UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET
UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET
UMThunkStubAMD64_XMM_SAVE_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_XMM_SAVE_NEGOFFSET
UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 - UMThunkStubAMD64_FRAME_OFFSET ; +8 for return address
UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (UMThunkStubAMD64_NUM_REG_PUSHES * 8)

.errnz UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET, update UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET


;
; [ callee scratch ] <-- new RSP
; [ callee scratch ]
; [ callee scratch ]
; [ callee scratch ]
; {optional stack args passed to callee}
; xmm0 <-- RBP
; xmm1
; xmm2
; xmm3
; {optional padding to align xmm regs}
; HOST_NOTIFY_FLAG (needs to make ReverseLeaveRuntime call flag)
; [rare path spill area]
; [rare path spill area]
; rbp save
; r12 save
; return address <-- entry RSP
; [rcx home]
; [rdx home]
; [r8 home]
; [r9 home]
; stack arg 0
; stack arg 1
; ...

push_nonvol_reg r12
push_nonvol_reg rbp ; stack_args
alloc_stack UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE
set_frame rbp, UMThunkStubAMD64_FRAME_OFFSET ; stack_args
mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 ; hosted
END_PROLOGUE

;
; Call GetThread()
;
INLINE_GETTHREAD r12 ; will not trash r10
test r12, r12
jz DoThreadSetup

HaveThread:

;FailFast if a native callable method invoked via ldftn and calli.
cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1
jz InvalidTransition

;
; disable preemptive GC
;
mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1

;
; catch returning thread here if a GC is in progress
;
cmp [g_TrapReturningThreads], 0
jnz DoTrapReturningThreadsTHROW

InCooperativeMode:

mov r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo]
mov eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] ; stack_args
test rax, rax ; stack_args
jnz CopyStackArgs ; stack_args

ArgumentsSetup:

mov rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub] ; rax <- Stub*
call rax

PostCall:
;
; enable preemptive GC
;
mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0

; epilog
lea rsp, [rbp - UMThunkStubAMD64_FRAME_OFFSET + UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE]
pop rbp ; stack_args
pop r12
ret


DoThreadSetup:
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9

; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
; initial measurements indidcate that this could be worth about a 5% savings in reverse
; pinvoke overhead.
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3

mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
call CreateThreadBlockThrow
mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]

mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]

; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h]
movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]

mov r12, rax

jmp HaveThread

InvalidTransition:
; ReversePInvokeBadTransition will failfast
call ReversePInvokeBadTransition

DoTrapReturningThreadsTHROW:

mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9

; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
; initial measurements indidcate that this could be worth about a 5% savings in reverse
; pinvoke overhead.
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3

mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
mov rcx, r12 ; Thread* pThread
mov rdx, METHODDESC_REGISTER ; UMEntryThunk* pUMEntry
call UMThunkStubRareDisableWorker
mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]

mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]

; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h]
movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]

jmp InCooperativeMode

CopyStackArgs:
; rax = cbStackArgs (with 20h for register args subtracted out already)

sub rsp, rax
and rsp, -16

mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8

; rax = number of bytes

lea rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES]
lea rdx, [rsp + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE]

CopyLoop:
; rax = number of bytes
; rcx = src
; rdx = dest
; r8 = sratch

add rax, -8
mov r8, [rcx + rax]
mov [rdx + rax], r8
jnz CopyLoop

mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]

jmp ArgumentsSetup

NESTED_END UMThunkStub, _TEXT

end

17 changes: 17 additions & 0 deletions src/coreclr/src/vm/amd64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,21 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__ComPrestubMethodFrame
#define SIZEOF__ComMethodFrame 0x20
ASMCONSTANTS_C_ASSERT(SIZEOF__ComMethodFrame
== sizeof(ComMethodFrame));
#endif // FEATURE_COMINTEROP

#define OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo 0x18
ASMCONSTANTS_C_ASSERT(OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo
== offsetof(UMEntryThunk, m_pUMThunkMarshInfo));

#define OFFSETOF__UMThunkMarshInfo__m_pILStub 0x00
ASMCONSTANTS_C_ASSERT(OFFSETOF__UMThunkMarshInfo__m_pILStub
== offsetof(UMThunkMarshInfo, m_pILStub));

#define OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize 0x08
ASMCONSTANTS_C_ASSERT(OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize
== offsetof(UMThunkMarshInfo, m_cbActualArgSize));

#ifdef FEATURE_COMINTEROP

#define OFFSETOF__ComPlusCallMethodDesc__m_pComPlusCallInfo DBG_FRE(0x30, 0x08)
ASMCONSTANTS_C_ASSERT(OFFSETOF__ComPlusCallMethodDesc__m_pComPlusCallInfo
Expand Down Expand Up @@ -482,6 +497,8 @@ ASMCONSTANTS_C_ASSERT(OFFSET__TEB__ThreadLocalStoragePointer == offsetof(TEB, Th

#define THROWSTUB_ESTABLISHER_OFFSET_FaultingExceptionFrame 0x30

#define UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET (0x40) // xmm save size

#define Thread__ObjectRefFlush ?ObjectRefFlush@Thread@@SAXPEAV1@@Z


Expand Down
Loading