diff --git a/common/include/Utilities/PageFaultSource.h b/common/include/Utilities/PageFaultSource.h index 823ef804a8bf0..a4d971b0109a2 100644 --- a/common/include/Utilities/PageFaultSource.h +++ b/common/include/Utilities/PageFaultSource.h @@ -342,8 +342,8 @@ class SpatialArrayReserve : public BaseVmReserveListener struct _EXCEPTION_POINTERS; extern int SysPageFaultExceptionFilter(struct _EXCEPTION_POINTERS* eps); -# define PCSX2_PAGEFAULT_PROTECT __try -# define PCSX2_PAGEFAULT_EXCEPT __except(SysPageFaultExceptionFilter(GetExceptionInformation())) {} +# define PCSX2_PAGEFAULT_PROTECT __try +# define PCSX2_PAGEFAULT_EXCEPT __except(SysPageFaultExceptionFilter(GetExceptionInformation())) {} #else # error PCSX2 - Unsupported operating system platform. @@ -352,5 +352,7 @@ extern int SysPageFaultExceptionFilter(struct _EXCEPTION_POINTERS* eps); extern void pxInstallSignalHandler(); extern void _platform_InstallSignalHandler(); +#include "Threading.h" extern SrcType_PageFault* Source_PageFault; +extern Threading::Mutex PageFault_Mutex; diff --git a/common/include/Utilities/Threading.h b/common/include/Utilities/Threading.h index 85f0134117f98..6e6edb1e9dbe4 100644 --- a/common/include/Utilities/Threading.h +++ b/common/include/Utilities/Threading.h @@ -179,17 +179,20 @@ namespace Threading // from these little beasties! (these are all implemented internally using cross-platform // implementations of _InterlockedExchange and such) + extern u32 AtomicRead( volatile u32& Target ); + extern s32 AtomicRead( volatile s32& Target ); extern u32 AtomicExchange( volatile u32& Target, u32 value ); - extern u32 AtomicExchangeAdd( volatile u32& Target, u32 value ); - extern u32 AtomicIncrement( volatile u32& Target ); - extern u32 AtomicDecrement( volatile u32& Target ); extern s32 AtomicExchange( volatile s32& Target, s32 value ); + extern u32 AtomicExchangeAdd( volatile u32& Target, u32 value ); extern s32 AtomicExchangeAdd( volatile s32& Target, s32 value ); extern s32 AtomicExchangeSub( volatile s32& Target, s32 value ); + extern u32 AtomicIncrement( volatile u32& Target ); extern s32 AtomicIncrement( volatile s32& Target ); + extern u32 AtomicDecrement( volatile u32& Target ); extern s32 AtomicDecrement( volatile s32& Target ); extern bool AtomicBitTestAndReset( volatile u32& bitset, u8 bit ); + extern bool AtomicBitTestAndReset( volatile s32& bitset, u8 bit ); extern void* _AtomicExchangePointer( volatile uptr& target, uptr value ); extern void* _AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand ); @@ -393,5 +396,34 @@ namespace Threading bool Failed() const { return !m_IsLocked; } }; + +// -------------------------------------------------------------------------------------- +// ScopedLockBool +// -------------------------------------------------------------------------------------- +// A ScopedLock in which you specify an external bool to get updated on locks/unlocks. +// Note that the isLockedBool should only be used as an indicator for the locked status, +// and not actually depended on for thread synchronization... + + struct ScopedLockBool { + ScopedLock m_lock; + volatile __aligned(4) bool& m_bool; + + ScopedLockBool(Mutex& mutexToLock, volatile __aligned(4) bool& isLockedBool) + : m_lock(mutexToLock), + m_bool(isLockedBool) { + m_bool = m_lock.IsLocked(); + } + virtual ~ScopedLockBool() throw() { + m_bool = false; + } + void Acquire() { + m_lock.Acquire(); + m_bool = m_lock.IsLocked(); + } + void Release() { + m_bool = false; + m_lock.Release(); + } + }; } diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index c862ee25c00b4..9c5ead8fc72dd 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -35,10 +35,12 @@ enum XMMSSEType // as a project option. The multithreaded emitter relies on native compiler support for // TLS -- Macs are crap out of luck there (for now). +#include "Utilities/Threading.h" + #ifndef x86EMIT_MULTITHREADED -# define x86EMIT_MULTITHREADED 0 -#else -# if !PCSX2_THREAD_LOCAL +# if PCSX2_THREAD_LOCAL +# define x86EMIT_MULTITHREADED 1 +# else // No TLS support? Force-clear the MT flag: # pragma message("x86emitter: TLS not available, multithreaded emitter disabled.") # undef x86EMIT_MULTITHREADED diff --git a/common/src/Utilities/Linux/LnxHostSys.cpp b/common/src/Utilities/Linux/LnxHostSys.cpp index ffacbc92321c5..c2804862baa9a 100644 --- a/common/src/Utilities/Linux/LnxHostSys.cpp +++ b/common/src/Utilities/Linux/LnxHostSys.cpp @@ -46,6 +46,12 @@ static void SysPageFaultSignalFilter( int signal, siginfo_t *siginfo, void * ) // Note: Use of stdio functions isn't safe here. Avoid console logs, // assertions, file logs, or just about anything else useful. + + // Note: This signal can be accessed by the EE or MTVU thread + // Source_PageFault is a global variable with its own state information + // so for now we lock this exception code unless someone can fix this better... + Threading::ScopedLock lock(PageFault_Mutex); + Source_PageFault->Dispatch( PageFaultInfo( (uptr)siginfo->si_addr & ~m_pagemask ) ); // resumes execution right where we left off (re-executes instruction that diff --git a/common/src/Utilities/ThreadTools.cpp b/common/src/Utilities/ThreadTools.cpp index 10ef9ea0b4deb..01ea10f1b1399 100644 --- a/common/src/Utilities/ThreadTools.cpp +++ b/common/src/Utilities/ThreadTools.cpp @@ -786,72 +786,70 @@ void Threading::WaitEvent::Wait() // InterlockedExchanges / AtomicExchanges (PCSX2's Helper versions) // -------------------------------------------------------------------------------------- // define some overloads for InterlockedExchanges for commonly used types, like u32 and s32. +// Note: For all of these atomic operations below to be atomic, the variables need to be 4-byte +// aligned. Read: http://msdn.microsoft.com/en-us/library/ms684122%28v=vs.85%29.aspx -__fi bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit ) -{ - return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0; +__fi u32 Threading::AtomicRead(volatile u32& Target) { + return Target; // Properly-aligned 32-bit reads are atomic } - -__fi u32 Threading::AtomicExchange( volatile u32& Target, u32 value ) -{ - return _InterlockedExchange( (volatile long*)&Target, value ); +__fi s32 Threading::AtomicRead(volatile s32& Target) { + return Target; // Properly-aligned 32-bit reads are atomic } -__fi u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value ) -{ - return _InterlockedExchangeAdd( (volatile long*)&Target, value ); +__fi bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit ) { + return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0; } - -__fi u32 Threading::AtomicIncrement( volatile u32& Target ) -{ - return _InterlockedExchangeAdd( (volatile long*)&Target, 1 ); +__fi bool Threading::AtomicBitTestAndReset( volatile s32& bitset, u8 bit ) { + return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0; } -__fi u32 Threading::AtomicDecrement( volatile u32& Target ) -{ - return _InterlockedExchangeAdd( (volatile long*)&Target, -1 ); +__fi u32 Threading::AtomicExchange(volatile u32& Target, u32 value ) { + return _InterlockedExchange( (volatile long*)&Target, value ); } - -__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value ) -{ +__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value ) { return _InterlockedExchange( (volatile long*)&Target, value ); } -__fi s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value ) -{ +__fi u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value ) { + return _InterlockedExchangeAdd( (volatile long*)&Target, value ); +} +__fi s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value ) { return _InterlockedExchangeAdd( (volatile long*)&Target, value ); } -__fi s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value ) -{ +__fi s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value ) { return _InterlockedExchangeAdd( (volatile long*)&Target, -value ); } -__fi s32 Threading::AtomicIncrement( volatile s32& Target ) -{ +__fi u32 Threading::AtomicIncrement( volatile u32& Target ) { + return _InterlockedExchangeAdd( (volatile long*)&Target, 1 ); +} +__fi s32 Threading::AtomicIncrement( volatile s32& Target) { return _InterlockedExchangeAdd( (volatile long*)&Target, 1 ); } -__fi s32 Threading::AtomicDecrement( volatile s32& Target ) -{ +__fi u32 Threading::AtomicDecrement( volatile u32& Target ) { return _InterlockedExchangeAdd( (volatile long*)&Target, -1 ); } +__fi s32 Threading::AtomicDecrement(volatile s32& Target) { + return _InterlockedExchangeAdd((volatile long*)&Target, -1); +} -__fi void* Threading::_AtomicExchangePointer( volatile uptr& target, uptr value ) +__fi void* Threading::_AtomicExchangePointer(volatile uptr& target, uptr value) { #ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place. - return (void*)_InterlockedExchange64( &(volatile s64&)target, value ); + return (void*)_InterlockedExchange64(&(volatile s64&)target, value); #else - return (void*)_InterlockedExchange( (volatile long*)&target, value ); + return (void*)_InterlockedExchange((volatile long*)&target, value); #endif } -__fi void* Threading::_AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand ) +__fi void* Threading::_AtomicCompareExchangePointer(volatile uptr& target, uptr value, uptr comparand) { #ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place. - return (void*)_InterlockedCompareExchange64( &(volatile s64&)target, value ); + return (void*)_InterlockedCompareExchange64(&(volatile s64&)target, value); #else - return (void*)_InterlockedCompareExchange( &(volatile long&)target, value, comparand ); + return (void*)_InterlockedCompareExchange(&(volatile long&)target, value, comparand); #endif } diff --git a/common/src/Utilities/VirtualMemory.cpp b/common/src/Utilities/VirtualMemory.cpp index 02521f4a5cca4..2e8742354e8b4 100644 --- a/common/src/Utilities/VirtualMemory.cpp +++ b/common/src/Utilities/VirtualMemory.cpp @@ -26,11 +26,11 @@ template class EventSource< IEventListener_PageFault >; SrcType_PageFault* Source_PageFault = NULL; +Threading::Mutex PageFault_Mutex; void pxInstallSignalHandler() { - if (!Source_PageFault) - { + if(!Source_PageFault) { Source_PageFault = new SrcType_PageFault(); } diff --git a/common/src/Utilities/Windows/WinHostSys.cpp b/common/src/Utilities/Windows/WinHostSys.cpp index f010aeec9307b..965e6ae287c8d 100644 --- a/common/src/Utilities/Windows/WinHostSys.cpp +++ b/common/src/Utilities/Windows/WinHostSys.cpp @@ -25,6 +25,10 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps ) if( eps->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION ) return EXCEPTION_CONTINUE_SEARCH; + // Note: This exception can be accessed by the EE or MTVU thread + // Source_PageFault is a global variable with its own state information + // so for now we lock this exception code unless someone can fix this better... + Threading::ScopedLock lock(PageFault_Mutex); Source_PageFault->Dispatch( PageFaultInfo( (uptr)eps->ExceptionRecord->ExceptionInformation[1] ) ); return Source_PageFault->WasHandled() ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH; } diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 1d56d24060909..36194e2a910ce 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -377,7 +377,8 @@ struct Pcsx2Config IntcStat :1, // tells Pcsx2 to fast-forward through intc_stat waits. WaitLoop :1, // enables constant loop detection and fast-forwarding vuFlagHack :1, // microVU specific flag hack - vuBlockHack :1; // microVU specific block flag no-propagation hack + vuBlockHack :1, // microVU specific block flag no-propagation hack + vuThread :1; // Enable Threaded VU1 BITFIELD_END u8 EECycleRate; // EE cycle rate selector (1.0, 1.5, 2.0) @@ -471,6 +472,7 @@ TraceLogFilters& SetTraceConfig(); // ------------ CPU / Recompiler Options --------------- +#define THREAD_VU1 (EmuConfig.Cpu.Recompiler.UseMicroVU1 && EmuConfig.Speedhacks.vuThread) #define CHECK_MICROVU0 (EmuConfig.Cpu.Recompiler.UseMicroVU0) #define CHECK_MICROVU1 (EmuConfig.Cpu.Recompiler.UseMicroVU1) #define CHECK_EEREC (EmuConfig.Cpu.Recompiler.EnableEE && GetCpuProviders().IsRecAvailable_EE()) diff --git a/pcsx2/FiFo.cpp b/pcsx2/FiFo.cpp index 0eb816b7beaa7..27ae9eead58e1 100644 --- a/pcsx2/FiFo.cpp +++ b/pcsx2/FiFo.cpp @@ -17,9 +17,8 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "Gif.h" -#include "Gif_Unit.h" #include "GS.h" +#include "Gif_Unit.h" #include "Vif.h" #include "Vif_Dma.h" #include "IPU/IPU.h" diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 779f10a8e6739..b00e8b65f385f 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -19,7 +19,6 @@ #include #include "GS.h" -#include "Gif.h" #include "Gif_Unit.h" #include "Counters.h" diff --git a/pcsx2/GS.h b/pcsx2/GS.h index e3fac7b2c8ffa..a9631824b3824 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -245,6 +245,7 @@ enum MTGS_RingCommand , GS_RINGTYPE_MODECHANGE // for issued mode changes. , GS_RINGTYPE_CRC , GS_RINGTYPE_GSPACKET +, GS_RINGTYPE_MTVU_GSPACKET }; @@ -263,8 +264,8 @@ class SysMtgsThread : public SysThreadBase public: // note: when m_ReadPos == m_WritePos, the fifo is empty - uint m_ReadPos; // cur pos gs is reading from - uint m_WritePos; // cur pos ee thread is writing to + __aligned(4) uint m_ReadPos; // cur pos gs is reading from + __aligned(4) uint m_WritePos; // cur pos ee thread is writing to volatile bool m_RingBufferIsBusy; volatile u32 m_SignalRingEnable; @@ -273,7 +274,9 @@ class SysMtgsThread : public SysThreadBase volatile s32 m_QueuedFrameCount; volatile u32 m_VsyncSignalListener; - Mutex m_mtx_RingBufferBusy; + Mutex m_mtx_RingBufferBusy; // Is obtained while processing ring-buffer data + Mutex m_mtx_RingBufferBusy2; // This one gets released on semaXGkick waiting... + Mutex m_mtx_WaitGS; Semaphore m_sem_OnRingReset; Semaphore m_sem_Vsync; @@ -304,8 +307,7 @@ class SysMtgsThread : public SysThreadBase virtual ~SysMtgsThread() throw(); // Waits for the GS to empty out the entire ring buffer contents. - // Used primarily for plugin startup/shutdown. - void WaitGS(); + void WaitGS(bool syncRegs=true, bool weakWait=false, bool isMTVU=false); void ResetGS(); void PrepDataPacket( MTGS_RingCommand cmd, u32 size ); diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index 642b682a54531..0a085b87d419b 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -17,7 +17,6 @@ #include "Common.h" #include "GS.h" -#include "Gif.h" #include "Gif_Unit.h" #include "Vif_Dma.h" @@ -87,6 +86,7 @@ __fi void gifInterrupt() } static u32 WRITERING_DMA(u32 *pMem, u32 qwc) { + //qwc = min(qwc, 1024u); uint size = gifUnit.TransferGSPacketData(GIF_TRANS_DMA, (u8*)pMem, qwc*16) / 16; incGifChAddr(size); return size; diff --git a/pcsx2/Gif.h b/pcsx2/Gif.h index 81238a269e562..bdbe46fe4a6bc 100644 --- a/pcsx2/Gif.h +++ b/pcsx2/Gif.h @@ -35,15 +35,17 @@ enum GIF_PATH { enum GIF_TRANSFER_TYPE { GIF_TRANS_INVALID = 0x000, // Invalid GIF_TRANS_XGKICK = 0x100, // Path 1 - GIF_TRANS_DIRECT = 0x201, // Path 2 - GIF_TRANS_DIRECTHL = 0x301, // Path 2 - GIF_TRANS_DMA = 0x402, // Path 3 - GIF_TRANS_FIFO = 0x502 // Path 3 + GIF_TRANS_MTVU = 0x200, // Path 1 + GIF_TRANS_DIRECT = 0x301, // Path 2 + GIF_TRANS_DIRECTHL = 0x401, // Path 2 + GIF_TRANS_DMA = 0x502, // Path 3 + GIF_TRANS_FIFO = 0x602 // Path 3 }; -static const char Gif_TransferStr[6][32] = { +static const char Gif_TransferStr[7][32] = { "Invalid Transfer Type", "GIF_TRANS_XGKICK", + "GIF_TRANS_MTVU", "GIF_TRANS_DIRECT", "GIF_TRANS_DIRECTHL", "GIF_TRANS_DMA", diff --git a/pcsx2/Gif_Logger.cpp b/pcsx2/Gif_Logger.cpp index bf28de58764e2..7811c3cc1e8d0 100644 --- a/pcsx2/Gif_Logger.cpp +++ b/pcsx2/Gif_Logger.cpp @@ -15,7 +15,6 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "Gif.h" #include "Gif_Unit.h" #define GIF_PARSE DevCon.WriteLn diff --git a/pcsx2/Gif_Unit.cpp b/pcsx2/Gif_Unit.cpp index 206043a0ffc08..eefd76cf07c41 100644 --- a/pcsx2/Gif_Unit.cpp +++ b/pcsx2/Gif_Unit.cpp @@ -19,6 +19,7 @@ #include "GS.h" #include "Gif_Unit.h" #include "Vif_Dma.h" +#include "MTVU.h" Gif_Unit gifUnit; @@ -76,12 +77,32 @@ bool Gif_HandlerAD(u8* pMem) { return false; } +// Returns true if pcsx2 needed to process the packet... +bool Gif_HandlerAD_Debug(u8* pMem) { + u32 reg = pMem[8]; + if (reg == 0x50) { Console.Error("GIF Handler Debug - BITBLTBUF"); return 1; } + elif (reg == 0x52) { Console.Error("GIF Handler Debug - TRXREG"); return 1; } + elif (reg == 0x53) { Console.Error("GIF Handler Debug - TRXDIR"); return 1; } + elif (reg == 0x60) { Console.Error("GIF Handler Debug - SIGNAL"); return 1; } + elif (reg == 0x61) { Console.Error("GIF Handler Debug - FINISH"); return 1; } + elif (reg == 0x62) { Console.Error("GIF Handler Debug - LABEL"); return 1; } + elif (reg >= 0x63 && reg != 0x7f) { + DevCon.Warning("GIF Handler Debug - Write to unknown register! [reg=%x]", reg); + } + return 0; +} + void Gif_FinishIRQ() { if (CSRreg.FINISH && !(GSIMR&0x200)) { gsIrq(); } } +// Used in MTVU mode... MTVU will later complete a real packet +void Gif_AddGSPacketMTVU(GS_Packet& gsPack, GIF_PATH path) { + GetMTGS().SendSimpleGSPacket(GS_RINGTYPE_MTVU_GSPACKET, 0, 0, path); +} + void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path) { //DevCon.WriteLn("Adding Completed Gif Packet [size=%x]", gsPack.size); if (COPY_GS_PACKET_TO_MTGS) { @@ -91,6 +112,7 @@ void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path) { GetMTGS().SendDataPacket(); } else { + pxAssertDev(!gsPack.readAmount, "Gif Unit - gsPack.readAmount only valid for MTVU path 1!"); AtomicExchangeAdd(gifUnit.gifPath[path].readAmount, gsPack.size); GetMTGS().SendSimpleGSPacket(GS_RINGTYPE_GSPACKET, gsPack.offset, gsPack.size, path); } @@ -102,35 +124,47 @@ void Gif_AddBlankGSPacket(u32 size, GIF_PATH path) { GetMTGS().SendSimpleGSPacket(GS_RINGTYPE_GSPACKET, ~0u, size, path); } -void Gif_MTGS_Wait() { - GetMTGS().WaitGS(); -} - -void Gif_Execute() { - gifUnit.Execute(); +void Gif_MTGS_Wait(bool isMTVU) { + GetMTGS().WaitGS(false, true, isMTVU); } void SaveStateBase::gifPathFreeze(u32 path) { Gif_Path& gifPath = gifUnit.gifPath[path]; - pxAssertDev(gifPath.readAmount==0, "Gif Path readAmount should be 0!"); + pxAssertDev(!gifPath.readAmount, "Gif Path readAmount should be 0!"); + pxAssertDev(!gifPath.gsPack.readAmount, "GS Pack readAmount should be 0!"); + pxAssertDev(!gifPath.GetPendingGSPackets(), "MTVU GS Pack Queue should be 0!"); + if (IsSaving()) { // Move all the buffered data to the start of buffer gifPath.RealignPacket(); // May add readAmount which we need to clear on load } u8* bufferPtr = gifPath.buffer; // Backup current buffer ptr - Freeze(gifPath); + Freeze(gifPath.mtvu.fakePackets); + FreezeMem(&gifPath, sizeof(gifPath) - sizeof(gifPath.mtvu)); FreezeMem(bufferPtr, gifPath.curSize); gifPath.buffer = bufferPtr; - if (!IsSaving()) gifPath.readAmount = 0; + if(!IsSaving()) { + gifPath.readAmount = 0; + gifPath.gsPack.readAmount = 0; + } } void SaveStateBase::gifFreeze() { - Gif_MTGS_Wait(); + bool mtvuMode = THREAD_VU1; + pxAssert(vu1Thread.IsDone()); + GetMTGS().WaitGS(); FreezeTag("Gif Unit"); + Freeze(mtvuMode); Freeze(gifUnit.stat); Freeze(gifUnit.gsSIGNAL); Freeze(gifUnit.lastTranType); gifPathFreeze(GIF_PATH_1); gifPathFreeze(GIF_PATH_2); gifPathFreeze(GIF_PATH_3); + if (!IsSaving()) { + if (mtvuMode != THREAD_VU1) { + DevCon.Warning("gifUnit: MTVU Mode has switched between save/load state"); + // ToDo: gifUnit.SwitchMTVU(mtvuMode); + } + } } diff --git a/pcsx2/Gif_Unit.h b/pcsx2/Gif_Unit.h index 9649bf1f9e797..dc792c3dd85eb 100644 --- a/pcsx2/Gif_Unit.h +++ b/pcsx2/Gif_Unit.h @@ -14,11 +14,16 @@ */ #pragma once +#include #include "System/SysThreads.h" +#include "Gif.h" struct GS_Packet; -extern void Gif_MTGS_Wait(); +extern void Gif_MTGS_Wait(bool isMTVU); extern void Gif_FinishIRQ(); extern bool Gif_HandlerAD(u8* pMem); +extern bool Gif_HandlerAD_Debug(u8* pMem); +extern void Gif_AddBlankGSPacket(u32 size, GIF_PATH path); +extern void Gif_AddGSPacketMTVU (GS_Packet& gsPack, GIF_PATH path); extern void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path); extern void Gif_ParsePacket(u8* data, u32 size, GIF_PATH path); extern void Gif_ParsePacket(GS_Packet& gsPack, GIF_PATH path); @@ -105,10 +110,11 @@ struct Gif_Tag { }; struct GS_Packet { - u32 offset; // Path buffer offset for start of packet - u32 size; // Full size of GS-Packet - s32 cycles; // EE Cycles taken to process this GS packet - bool done; // 0 = Incomplete, 1 = Complete + u32 offset; // Path buffer offset for start of packet + u32 size; // Full size of GS-Packet + s32 cycles; // EE Cycles taken to process this GS packet + s32 readAmount; // Dummy read-amount data needed for proper buffer calculations + bool done; // 0 = Incomplete, 1 = Complete GS_Packet() { Reset(); } void Reset() { memzero(*this); } }; @@ -124,8 +130,16 @@ static __fi void incTag(u32& offset, u32& size, u32 incAmount) { offset += incAmount; } +struct Gif_Path_MTVU { + u32 fakePackets; // Fake packets pending to be sent to MTGS + Mutex gsPackMutex; // Used for atomic access to gsPackQueue + std::deque gsPackQueue; // VU1 programs' XGkick(s) + Gif_Path_MTVU() { Reset(); } + void Reset() { fakePackets = 0; gsPackQueue.clear(); } +}; + struct Gif_Path { - volatile s32 __aligned(4) readAmount; // Amount of data MTGS still needs to read + __aligned(4) volatile s32 readAmount; // Amount of data MTGS still needs to read u8* buffer; // Path packet buffer u32 buffSize; // Full size of buffer u32 buffLimit; // Cut off limit to wrap around @@ -135,6 +149,7 @@ struct Gif_Path { GS_Packet gsPack; // Current GS Packet info GIF_PATH idx; // Gif Path Index GIF_PATH_STATE state; // Path State + Gif_Path_MTVU mtvu; // Must be last for saved states Gif_Path() {} ~Gif_Path() { _aligned_free(buffer); } @@ -156,6 +171,7 @@ struct Gif_Path { //curOffset = curSize; return; } + mtvu.Reset(); curSize = 0; curOffset = 0; readAmount = 0; @@ -163,32 +179,38 @@ struct Gif_Path { gsPack.Reset(); } + bool isMTVU() { return !idx && THREAD_VU1; } + s32 getReadAmount() { return AtomicRead(readAmount) + gsPack.readAmount; } bool hasDataRemaining() { return curOffset < curSize; } - bool isDone() { return !hasDataRemaining() && state == GIF_PATH_IDLE; } + bool isDone() { return isMTVU() ? !mtvu.fakePackets + : (!hasDataRemaining() && state == GIF_PATH_IDLE); } // Waits on the MTGS to process gs packets void mtgsReadWait() { - //pxAssertDev(AtomicExchangeAdd(readAmount, 0) != 0, "Gif Path Buffer Overflow!"); - DevCon.WriteLn(Color_Red, "Gif Path[%d] - MTGS Wait! [r=0x%x]", - idx+1, AtomicExchangeAdd(readAmount, 0)); - Gif_MTGS_Wait(); + if (IsDevBuild) { + DevCon.WriteLn(Color_Red, "Gif Path[%d] - MTGS Wait! [r=0x%x]", idx+1, getReadAmount()); + Gif_MTGS_Wait(isMTVU()); + DevCon.WriteLn(Color_Green, "Gif Path[%d] - MTGS Wait! [r=0x%x]", idx+1, getReadAmount()); + return; + } + Gif_MTGS_Wait(isMTVU()); } // Moves packet data to start of buffer void RealignPacket() { - extern void Gif_AddBlankGSPacket(u32 size, GIF_PATH path); GUNIT_LOG("Path Buffer: Realigning packet!"); s32 offset = curOffset - gsPack.size; s32 sizeToAdd = curSize - offset; s32 intersect = sizeToAdd - offset; if (intersect < 0) intersect = 0; for(;;) { - s32 frontFree = offset - AtomicExchangeAdd(readAmount, 0); + s32 frontFree = offset - getReadAmount(); if (frontFree >= sizeToAdd - intersect) break; mtgsReadWait(); } if (offset < (s32)buffLimit) { // Needed for correct readAmount values - Gif_AddBlankGSPacket(buffLimit - offset, idx); + if (isMTVU()) gsPack.readAmount += buffLimit - offset; + else Gif_AddBlankGSPacket(buffLimit - offset, idx); } //DevCon.WriteLn("Realign Packet [%d]", curSize - offset); if (intersect) memmove(buffer, &buffer[offset], curSize - offset); @@ -200,12 +222,12 @@ struct Gif_Path { void CopyGSPacketData(u8* pMem, u32 size, bool aligned = false) { if (curSize + size > buffSize) { // Move gsPack to front of buffer - DevCon.Warning("CopyGSPacketData: Realigning packet!"); + GUNIT_LOG("CopyGSPacketData: Realigning packet!"); RealignPacket(); } for(;;) { s32 offset = curOffset - gsPack.size; - s32 readPos = offset - AtomicExchangeAdd(readAmount, 0); + s32 readPos = offset - getReadAmount(); if (readPos >= 0) break; // MTGS is reading in back of curOffset if ((s32)buffLimit + readPos > (s32)curSize + (s32)size) break; // Enough free front space mtgsReadWait(); // Let MTGS run to free up buffer space @@ -217,12 +239,21 @@ struct Gif_Path { } // If completed a GS packet (with EOP) then returned GS_Packet.done = 1 + // MTVU: This function only should be called called on EE thread GS_Packet ExecuteGSPacket() { + if (mtvu.fakePackets) { // For MTVU mode... + mtvu.fakePackets--; + GS_Packet fakePack; + fakePack.done = 1; // Fake packets don't get processed by pcsx2 + fakePack.size =~0u; // Used to indicate that its a fake packet + return fakePack; + } + pxAssert(!isMTVU()); for(;;) { if (!gifTag.isValid) { // Need new Gif Tag // We don't have enough data for a Gif Tag if (curOffset + 16 > curSize) { - GUNIT_LOG("Path Buffer: Not enough data for gif tag! [%d]", curSize-curOffset); + //GUNIT_LOG("Path Buffer: Not enough data for gif tag! [%d]", curSize-curOffset); return gsPack; } @@ -249,7 +280,7 @@ struct Gif_Path { while(gifTag.nLoop && !dblSIGNAL) { if (curOffset + 16 > curSize) return gsPack; // Exit Early if (gifTag.curReg() == GIF_REG_A_D) { - dblSIGNAL = Gif_HandlerAD(&buffer[curOffset]); + if (!isMTVU()) dblSIGNAL = Gif_HandlerAD(&buffer[curOffset]); } incTag(curOffset, gsPack.size, 16); // 1 QWC gifTag.packedStep(); @@ -271,6 +302,84 @@ struct Gif_Path { } } } + + // MTVU: Gets called on VU XGkicks on MTVU thread + void ExecuteGSPacketMTVU() { + // Move packet to start of buffer + if (curOffset > buffLimit) { + RealignPacket(); + } + if (IsDevBuild) { // We check the packet to see if it actually + for(;;) { // needed to be processed by pcsx2... + if (curOffset + 16 > curSize) break; + gifTag.setTag(&buffer[curOffset], 1); + + if(!gifTag.hasAD && curOffset + 16 + gifTag.len > curSize) break; + incTag(curOffset, gsPack.size, 16); // Tag Size + + if (gifTag.hasAD) { // Only can be true if GIF_FLG_PACKED + while(gifTag.nLoop) { + if (curOffset + 16 > curSize) break; // Exit Early + if (gifTag.curReg() == GIF_REG_A_D) { + pxAssert(!Gif_HandlerAD_Debug(&buffer[curOffset])); + } + incTag(curOffset, gsPack.size, 16); // 1 QWC + gifTag.packedStep(); + } + } + else incTag(curOffset, gsPack.size, gifTag.len); // Data length + if (curOffset >= curSize) break; + if (gifTag.tag.EOP) break; + } + pxAssert(curOffset == curSize); + gifTag.isValid = false; + } + else { + // We assume every packet is a full GS Packet + // And we don't process anything on pcsx2 side + gsPack.size += curSize - curOffset; + curOffset = curSize; + } + } + + // MTVU: Gets called after VU1 execution on MTVU thread + void FinishGSPacketMTVU() { + if (1) { + ScopedLock lock(mtvu.gsPackMutex); + AtomicExchangeAdd(readAmount, gsPack.size + gsPack.readAmount); + mtvu.gsPackQueue.push_back(gsPack); + } + gsPack.Reset(); + gsPack.offset = curOffset; + } + + // MTVU: Gets called by MTGS thread + GS_Packet GetGSPacketMTVU() { + ScopedLock lock(mtvu.gsPackMutex); + if (mtvu.gsPackQueue.size()) { + GS_Packet t = mtvu.gsPackQueue[0]; + return t; // XGkick GS packet(s) + } + Console.Error("MTVU: Expected gsPackQueue to have elements!"); + pxAssert(0); + return GS_Packet(); // gsPack.size will be 0 + } + + // MTVU: Gets called by MTGS thread + void PopGSPacketMTVU() { + ScopedLock lock(mtvu.gsPackMutex); + if (mtvu.gsPackQueue.size()) { + mtvu.gsPackQueue.pop_front(); + } + } + + // MTVU: Returns the amount of pending + // GS Packets that MTGS hasn't yet processed + u32 GetPendingGSPackets() { + ScopedLock lock(mtvu.gsPackMutex); + u32 t = mtvu.gsPackQueue.size(); + return t; + } }; struct Gif_Unit { @@ -280,8 +389,8 @@ struct Gif_Unit { GIF_TRANSFER_TYPE lastTranType; // Last Transfer Type Gif_Unit() : stat(gifRegs.stat) { - gifPath[0].Init(GIF_PATH_1, _1mb*8, _16kb + _1kb); - gifPath[1].Init(GIF_PATH_2, _1mb*8, _1mb + _1kb); + gifPath[0].Init(GIF_PATH_1, _1mb*9, _1mb + _1kb); + gifPath[1].Init(GIF_PATH_2, _1mb*9, _1mb + _1kb); gifPath[2].Init(GIF_PATH_3, _1mb*9, _1mb + _1kb); } @@ -307,24 +416,24 @@ struct Gif_Unit { // Adds a finished GS Packet to the MTGS ring buffer __fi void AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path) { - Gif_AddCompletedGSPacket(gsPack, path); + if (gsPack.size==~0u) Gif_AddGSPacketMTVU (gsPack, path); + else Gif_AddCompletedGSPacket(gsPack, path); if (PRINT_GIF_PACKET) Gif_ParsePacket(gsPack, path); } // Returns GS Packet Size in bytes - u32 GetGSPacketSize(GIF_PATH pathIdx, u8* pMem, u32 offset = 0) { - u32 memMask = pathIdx ? 0xffffffffu : 0x3fffu; - u32 size = 0; + u32 GetGSPacketSize(GIF_PATH pathIdx, u8* pMem, u32 offset = 0, u32 size = ~0u) { + u32 memMask = pathIdx ? ~0u : 0x3fffu; + u32 curSize = 0; for(;;) { Gif_Tag gifTag(&pMem[offset & memMask]); - incTag(offset, size, 16 + gifTag.len); // Tag + Data length - if (pathIdx == GIF_PATH_1 && size >= 0x4000) { + incTag(offset, curSize, 16 + gifTag.len); // Tag + Data length + if (pathIdx == GIF_PATH_1 && curSize >= 0x4000) { Console.Warning("Gif Unit - GS packet size exceeded VU memory size!"); return 0; // Bios does this... (Fixed if you delay vu1's xgkick by 103 vu cycles) } - if (gifTag.tag.EOP) { - return size; - } + if (curSize >= size) return size; + if (gifTag.tag.EOP) return curSize; } } @@ -332,8 +441,22 @@ struct Gif_Unit { // The return value is the amount of data (in bytes) that was processed // If transfer cannot take place at this moment the return value is 0 u32 TransferGSPacketData(GIF_TRANSFER_TYPE tranType, u8* pMem, u32 size, bool aligned=false) { - - GIF_LOG("%s - [path=%d][size=%d]", Gif_TransferStr[(tranType>>8)&0xf], (tranType&3)+1, size); + + if (THREAD_VU1) { + Gif_Path& path1 = gifPath[GIF_PATH_1]; + if (tranType == GIF_TRANS_XGKICK) { // This is on the MTVU thread + path1.CopyGSPacketData(pMem, size, aligned); + path1.ExecuteGSPacketMTVU(); + return size; + } + if (tranType == GIF_TRANS_MTVU) { // This is on the EE thread + path1.mtvu.fakePackets++; + if (CanDoGif()) Execute(); + return 0; + } + } + + GUNIT_LOG("%s - [path=%d][size=%d]", Gif_TransferStr[(tranType>>8)&0xf], (tranType&3)+1, size); if (size == 0) { GUNIT_WARN("Gif Unit - Size == 0"); return 0; } if(!CanDoGif()) { GUNIT_WARN("Gif Unit - Signal or PSE Set or Dir = GS to EE"); } pxAssertDev((stat.APATH==0) || checkPaths(1,1,1), "Gif Unit - APATH wasn't cleared?"); @@ -344,6 +467,7 @@ struct Gif_Unit { } if (tranType == GIF_TRANS_DMA) { if(!CanDoPath3()) { if (!Path3Masked()) stat.P3Q = 1; return 0; } // DMA Stall + //if (stat.P2Q) DevCon.WriteLn("P2Q while path 3"); } if (tranType == GIF_TRANS_XGKICK) { if(!CanDoPath1()) { stat.P1Q = 1; } // We always buffer path1 packets @@ -404,7 +528,7 @@ struct Gif_Unit { GS_Packet gsPack = path.ExecuteGSPacket(); if(!gsPack.done) { if (stat.APATH == 3 && CanDoP3Slice() && !gsSIGNAL.queued) { - if(!didPath3 && checkPaths(1,1,0)) { // Path3 slicing + if(!didPath3 && /*!Path3Masked() &&*/ checkPaths(1,1,0)) { // Path3 slicing didPath3 = true; stat.APATH = 0; stat.IP3 = 1; @@ -433,7 +557,7 @@ struct Gif_Unit { } if (!gsSIGNAL.queued && !gifPath[0].isDone()) { stat.APATH = 1; stat.P1Q = 0; } elif (!gsSIGNAL.queued && !gifPath[1].isDone()) { stat.APATH = 2; stat.P2Q = 0; } - elif (!gsSIGNAL.queued && !gifPath[2].isDone() && !Path3Masked()) + elif (!gsSIGNAL.queued && !gifPath[2].isDone() && !Path3Masked() /*&& !stat.P2Q*/) { stat.APATH = 3; stat.P3Q = 0; stat.IP3 = 0; } else { stat.APATH = 0; stat.OPH = 0; break; } } diff --git a/pcsx2/Hw.cpp b/pcsx2/Hw.cpp index 3f45f280f8d48..bcfca1fc7cf90 100644 --- a/pcsx2/Hw.cpp +++ b/pcsx2/Hw.cpp @@ -19,7 +19,6 @@ #include "Hardware.h" #include "newVif.h" #include "IPU/IPUdma.h" -#include "Gif.h" #include "Gif_Unit.h" using namespace R5900; diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index d7e9a215bfb04..08317d393c57e 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -17,7 +17,6 @@ #include "PrecompiledHeader.h" #include "Common.h" #include "Hardware.h" -#include "Gif.h" #include "Gif_Unit.h" #include "ps2/HwInternal.h" diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index 1b3b33beee4f6..35cff8d9fcf0f 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -21,6 +21,7 @@ #include "GS.h" #include "Gif_Unit.h" +#include "MTVU.h" #include "Elfheader.h" #include "SamplProf.h" @@ -242,36 +243,29 @@ void SysMtgsThread::OpenPlugin() GSsetGameCRC( ElfCRC, 0 ); } -class RingBufferLock : public ScopedLock -{ - typedef ScopedLock _parent; - -protected: - SysMtgsThread& m_mtgs; +struct RingBufferLock { + ScopedLock m_lock1; + ScopedLock m_lock2; + SysMtgsThread& m_mtgs; -public: - RingBufferLock( SysMtgsThread& mtgs ) - : ScopedLock( mtgs.m_mtx_RingBufferBusy ) - , m_mtgs( mtgs ) - { + RingBufferLock(SysMtgsThread& mtgs) + : m_lock1(mtgs.m_mtx_RingBufferBusy), + m_lock2(mtgs.m_mtx_RingBufferBusy2), + m_mtgs(mtgs) { m_mtgs.m_RingBufferIsBusy = true; } - - virtual ~RingBufferLock() throw() - { + virtual ~RingBufferLock() throw() { m_mtgs.m_RingBufferIsBusy = false; } - - void Acquire() - { - _parent::Acquire(); + void Acquire() { + m_lock1.Acquire(); + m_lock2.Acquire(); m_mtgs.m_RingBufferIsBusy = true; } - - void Release() - { + void Release() { m_mtgs.m_RingBufferIsBusy = false; - _parent::Release(); + m_lock2.Release(); + m_lock1.Release(); } }; @@ -281,10 +275,9 @@ void SysMtgsThread::ExecuteTaskInThread() PacketTagType prevCmd; #endif - RingBufferLock busy( *this ); + RingBufferLock busy (*this); - while( true ) - { + while(true) { busy.Release(); // Performance note: Both of these perform cancellation tests, but pthread_testcancel @@ -299,8 +292,7 @@ void SysMtgsThread::ExecuteTaskInThread() // ever be modified by this thread. while( m_ReadPos != volatize(m_WritePos)) { - if( EmuConfig.GS.DisableOutput ) - { + if (EmuConfig.GS.DisableOutput) { m_ReadPos = m_WritePos; continue; } @@ -327,7 +319,7 @@ void SysMtgsThread::ExecuteTaskInThread() switch( tag.command ) { -#if COPY_GS_PACKET_TO_MTGS == 1 // d +#if COPY_GS_PACKET_TO_MTGS == 1 case GS_RINGTYPE_P1: { uint datapos = (m_ReadPos+1) & RingBufferMask; @@ -412,6 +404,21 @@ void SysMtgsThread::ExecuteTaskInThread() break; } + case GS_RINGTYPE_MTVU_GSPACKET: { + MTVU_LOG("MTGS - Waiting on semaXGkick!"); + vu1Thread.KickStart(true); + busy.m_lock2.Release(); + // Wait for MTVU to complete vu1 program + vu1Thread.semaXGkick.WaitWithoutYield(); + busy.m_lock2.Acquire(); + Gif_Path& path = gifUnit.gifPath[GIF_PATH_1]; + GS_Packet gsPack = path.GetGSPacketMTVU(); // Get vu1 program's xgkick packet(s) + if (gsPack.size) GSgifTransfer((u32*)&path.buffer[gsPack.offset], gsPack.size/16); + AtomicExchangeSub(path.readAmount, gsPack.size + gsPack.readAmount); + path.PopGSPacketMTVU(); // Should be done last, for proper Gif_MTGS_Wait() + break; + } + default: { switch( tag.command ) @@ -572,27 +579,43 @@ void SysMtgsThread::OnCleanupInThread() } // Waits for the GS to empty out the entire ring buffer contents. -// Used primarily for plugin startup/shutdown. -void SysMtgsThread::WaitGS() +// If syncRegs, then writes pcsx2's gs regs to MTGS's internal copy +// If weakWait, then this function is allowed to exit after MTGS finished a path1 packet +// If isMTVU, then this implies this function is being called from the MTVU thread... +void SysMtgsThread::WaitGS(bool syncRegs, bool weakWait, bool isMTVU) { pxAssertDev( !IsSelf(), "This method is only allowed from threads *not* named MTGS." ); if( m_ExecMode == ExecMode_NoThreadYet || !IsRunning() ) return; if( !pxAssertDev( IsOpen(), "MTGS Warning! WaitGS issued on a closed thread." ) ) return; - if( volatize(m_ReadPos) != m_WritePos ) - { + Gif_Path& path = gifUnit.gifPath[GIF_PATH_1]; + u32 startP1Packs = weakWait ? path.GetPendingGSPackets() : 0; + + if (isMTVU || volatize(m_ReadPos) != m_WritePos) { SetEvent(); RethrowException(); - - do { - m_mtx_RingBufferBusy.Wait(); + for(;;) { + if (weakWait) m_mtx_RingBufferBusy2.Wait(); + else m_mtx_RingBufferBusy .Wait(); RethrowException(); - } while( volatize(m_ReadPos) != m_WritePos ); + if(!isMTVU && volatize(m_ReadPos) == m_WritePos) break; + u32 curP1Packs = weakWait ? path.GetPendingGSPackets() : 0; + if (weakWait && ((startP1Packs-curP1Packs) || !curP1Packs)) break; + // On weakWait we will stop waiting on the MTGS thread if the + // MTGS thread has processed a vu1 xgkick packet, or is pending on + // its final vu1 xgkick packet (!curP1Packs)... + // Note: m_WritePos doesn't seem to have proper atomic write + // code, so reading it from the MTVU thread might be dangerous; + // hence it has been avoided... + } } - // Completely synchronize GS and MTGS register states. - memcpy_fast( RingBuffer.Regs, PS2MEM_GS, sizeof(RingBuffer.Regs) ); + if (syncRegs) { + ScopedLock lock(m_mtx_WaitGS); + // Completely synchronize GS and MTGS register states. + memcpy_fast(RingBuffer.Regs, PS2MEM_GS, sizeof(RingBuffer.Regs)); + } } // Sets the gsEvent flag and releases a timeslice. diff --git a/pcsx2/MTVU.cpp b/pcsx2/MTVU.cpp new file mode 100644 index 0000000000000..5e7de04466197 --- /dev/null +++ b/pcsx2/MTVU.cpp @@ -0,0 +1,37 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2010 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "Common.h" +#include "MTVU.h" +#include "newVif.h" + +__aligned16 VU_Thread vu1Thread(CpuVU1, VU1); + +// Calls the vif unpack functions from the MTVU thread +void MTVU_Unpack(void* data, VIFregisters& vifRegs) { + bool isFill = vifRegs.cycle.cl < vifRegs.cycle.wl; + if (newVifDynaRec) dVifUnpack<1>((u8*)data, isFill); + else _nVifUnpack(1, (u8*)data, vifRegs.mode, isFill); +} + +// Called on Saving/Loading states... +void SaveStateBase::mtvuFreeze() { + FreezeTag("MTVU"); + pxAssert(vu1Thread.IsDone()); + if (!IsSaving()) vu1Thread.Reset(); + Freeze(vu1Thread.vuCycles); + Freeze(vu1Thread.vuCycleIdx); +} diff --git a/pcsx2/MTVU.h b/pcsx2/MTVU.h new file mode 100644 index 0000000000000..1e18065bf7115 --- /dev/null +++ b/pcsx2/MTVU.h @@ -0,0 +1,305 @@ + +#pragma once +#include "System/SysThreads.h" +#include "Vif.h" +#include "Vif_Dma.h" +#include "VUmicro.h" +#include "Gif_Unit.h" + +extern void MTVU_Unpack(void* data, VIFregisters& vifRegs); +#define volatize(x) (*reinterpret_cast(&(x))) +#define size_u32(x) (((u32)x+3u)>>2) // Rounds up a size in bytes for size in u32's +#define MTVU_ALWAYS_KICK 0 +#define MTVU_SYNC_MODE 0 +#define MTVU_LOG(...) do{} while(0) +//#define MTVU_LOG DevCon.WriteLn + +enum MTVU_EVENT { + MTVU_VU_EXECUTE, // Execute VU program + MTVU_VU_WRITE_MICRO, // Write to VU micro-mem + MTVU_VU_WRITE_DATA, // Write to VU data-mem + MTVU_VIF_WRITE_COL, // Write to Vif col reg + MTVU_VIF_WRITE_ROW, // Write to Vif row reg + MTVU_VIF_UNPACK, // Execute Vif Unpack + MTVU_NULL_PACKET, // Go back to beginning of buffer + MTVU_RESET +}; + +// Notes: +// - This class should only be accessed from the EE thread... +// - buffer_size must be power of 2 +// - ring-buffer has no complete pending packets when read_pos==write_pos +struct VU_Thread : public pxThread { + static const u32 buffer_size = (_1mb * 16) / sizeof(u32); + static const u32 buffer_mask = buffer_size - 1; + __aligned(4) u32 buffer[buffer_size]; + __aligned(4) volatile s32 read_pos; // Only modified by VU thread + __aligned(4) volatile bool isBusy; // Is thread processing data? + __aligned(4) s32 write_pos; // Only modified by EE thread + __aligned(4) s32 write_offset; // Only modified by EE thread + __aligned(4) Mutex mtxBusy; + __aligned(4) Semaphore semaEvent; + __aligned(4) Semaphore semaXGkick; + __aligned(4) BaseVUmicroCPU*& vuCPU; + __aligned(4) VURegs& vuRegs; + __aligned16 vifStruct vif; + __aligned16 VIFregisters vifRegs; + __aligned(4) u32 vuCycles[4]; // Used for VU cycle stealing hack + __aligned(4) u32 vuCycleIdx; // Used for VU cycle stealing hack + + VU_Thread(BaseVUmicroCPU*& _vuCPU, VURegs& _vuRegs) : + vuCPU(_vuCPU), vuRegs(_vuRegs) { + m_name = L"MTVU"; + Reset(); + } + virtual ~VU_Thread() throw() { + pxThread::Cancel(); + } + void InitThread() { + Start(); // Starts the pxThread + } + void Reset() { + read_pos = 0; + write_pos = 0; + write_offset = 0; + vuCycleIdx = 0; + isBusy = false; + memzero(vif); + memzero(vifRegs); + memzero(vuCycles); + } +protected: + // Should only be called by ReserveSpace() + __ri void WaitOnSize(s32 size) { + for(;;) { + s32 readPos = GetReadPos(); + if (readPos <= write_pos) break; // MTVU is reading in back of write_pos + if (readPos > write_pos + size) break; // Enough free front space + if (1) { // Let MTVU run to free up buffer space + KickStart(); + if (IsDevBuild) DevCon.WriteLn("WaitOnSize()"); + ScopedLock lock(mtxBusy); + } + } + } + + // Makes sure theres enough room in the ring buffer + // to write a continuous 'size * sizeof(u32)' bytes + void ReserveSpace(s32 size) { + pxAssert(write_pos < buffer_size); + pxAssert(size < buffer_size); + pxAssert(size > 0); + pxAssert(write_offset == 0); + if (write_pos + size > buffer_size) { + pxAssert(write_pos > 0); + WaitOnSize(1); // Size of MTVU_NULL_PACKET + Write(MTVU_NULL_PACKET); + write_offset = 0; + AtomicExchange(volatize(write_pos), 0); + } + WaitOnSize(size); + } + + // Use this when reading read_pos from ee thread + __fi volatile s32 GetReadPos() { + return AtomicRead(read_pos); + } + // Use this when reading write_pos from vu thread + __fi volatile s32 GetWritePos() { + return AtomicRead(volatize(write_pos)); + } + // Gets the effective write pointer after adding write_offset + __fi u32* GetWritePtr() { + return &buffer[(write_pos + write_offset) & buffer_mask]; + } + + __fi void incReadPos(s32 offset) { // Offset in u32 sizes + s32 temp = (read_pos + offset) & buffer_mask; + AtomicExchange(read_pos, temp); + } + __fi void incWritePos() { // Adds write_offset + s32 temp = (write_pos + write_offset) & buffer_mask; + write_offset = 0; + AtomicExchange(volatize(write_pos), temp); + if (MTVU_ALWAYS_KICK) KickStart(); + if (MTVU_SYNC_MODE) WaitVU(); + } + + __fi u32 Read() { + u32 ret = buffer[read_pos]; + incReadPos(1); + return ret; + } + __fi void Read(void* dest, u32 size) { // Size in bytes + memcpy_fast(dest, &buffer[read_pos], size); + incReadPos(size_u32(size)); + } + + __fi void Write(u32 val) { + GetWritePtr()[0] = val; + write_offset += 1; + } + __fi void Write(void* src, u32 size) { // Size in bytes + memcpy_fast(GetWritePtr(), src, size); + write_offset += size_u32(size); + } + + void ExecuteTaskInThread() { + PCSX2_PAGEFAULT_PROTECT { + ExecuteRingBuffer(); + } PCSX2_PAGEFAULT_EXCEPT; + } + + void ExecuteRingBuffer() { + for(;;) { + semaEvent.WaitWithoutYield(); + ScopedLockBool lock(mtxBusy, isBusy); + while (read_pos != GetWritePos()) { + u32 tag = Read(); + switch (tag) { + case MTVU_VU_EXECUTE: { + vuRegs.cycle = 0; + s32 addr = Read(); + vifRegs.top = Read(); + vifRegs.itop = Read(); + if (addr != -1) vuRegs.VI[REG_TPC].UL = addr; + vuCPU->Execute(vu1RunCycles); + gifUnit.gifPath[GIF_PATH_1].FinishGSPacketMTVU(); + semaXGkick.Post(); // Tell MTGS a path1 packet is complete + AtomicExchange(vuCycles[vuCycleIdx], vuRegs.cycle); + vuCycleIdx = (vuCycleIdx + 1) & 3; + break; + } + case MTVU_VU_WRITE_MICRO: { + u32 vu_micro_addr = Read(); + u32 size = Read(); + vuCPU->Clear(vu_micro_addr, size); + Read(&vuRegs.Micro[vu_micro_addr], size); + break; + } + case MTVU_VU_WRITE_DATA: { + u32 vu_data_addr = Read(); + u32 size = Read(); + Read(&vuRegs.Mem[vu_data_addr], size); + break; + } + case MTVU_VIF_WRITE_COL: + Read(&vif.MaskCol, sizeof(vif.MaskCol)); + break; + case MTVU_VIF_WRITE_ROW: + Read(&vif.MaskRow, sizeof(vif.MaskRow)); + break; + case MTVU_VIF_UNPACK: { + u32 vif_copy_size = (uptr)&vif.StructEnd - (uptr)&vif.tag; + Read(&vif.tag, vif_copy_size); + Read(&vifRegs, sizeof(vifRegs)); + u32 size = Read(); + MTVU_Unpack(&buffer[read_pos], vifRegs); + incReadPos(size_u32(size)); + break; + } + case MTVU_NULL_PACKET: + AtomicExchange(read_pos, 0); + break; + jNO_DEFAULT; + } + } + } + } + + // Returns Average number of vu Cycles from last 4 runs + u32 Get_vuCycles() { // Used for vu cycle stealing hack + return (AtomicRead(vuCycles[0]) + AtomicRead(vuCycles[1]) + + AtomicRead(vuCycles[2]) + AtomicRead(vuCycles[3])) >> 2; + } +public: + + // Get MTVU to start processing its packets if it isn't already + void KickStart(bool forceKick = false) { + if ((forceKick && !semaEvent.Count()) + || (!isBusy && GetReadPos() != write_pos)) semaEvent.Post(); + } + + // Used for assertions... + bool IsDone() { return !isBusy && GetReadPos() == GetWritePos(); } + + // Waits till MTVU is done processing + void WaitVU() { + MTVU_LOG("MTVU - WaitVU!"); + for(;;) { + if (IsDone()) break; + //DevCon.WriteLn("WaitVU()"); + pxAssert(THREAD_VU1); + KickStart(); + ScopedLock lock(mtxBusy); + } + } + + void ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop) { + MTVU_LOG("MTVU - ExecuteVU!"); + ReserveSpace(4); + Write(MTVU_VU_EXECUTE); + Write(vu_addr); + Write(vif_top); + Write(vif_itop); + incWritePos(); + gifUnit.TransferGSPacketData(GIF_TRANS_MTVU, NULL, 0); + KickStart(); + u32 cycles = std::min(Get_vuCycles(), 3000u); + cpuRegs.cycle += cycles * EmuConfig.Speedhacks.VUCycleSteal; + } + + void VifUnpack(vifStruct& _vif, VIFregisters& _vifRegs, u8* data, u32 size) { + MTVU_LOG("MTVU - VifUnpack!"); + u32 vif_copy_size = (uptr)&_vif.StructEnd - (uptr)&_vif.tag; + ReserveSpace(1 + size_u32(vif_copy_size) + size_u32(sizeof(_vifRegs)) + 1 + size_u32(size)); + Write(MTVU_VIF_UNPACK); + Write(&_vif.tag, vif_copy_size); + Write(&_vifRegs, sizeof(_vifRegs)); + Write(size); + Write(data, size); + incWritePos(); + KickStart(); + } + + // Writes to VU's Micro Memory (size in bytes) + void WriteMicroMem(u32 vu_micro_addr, void* data, u32 size) { + MTVU_LOG("MTVU - WriteMicroMem!"); + ReserveSpace(3 + size_u32(size)); + Write(MTVU_VU_WRITE_MICRO); + Write(vu_micro_addr); + Write(size); + Write(data, size); + incWritePos(); + } + + // Writes to VU's Data Memory (size in bytes) + void WriteDataMem(u32 vu_data_addr, void* data, u32 size) { + MTVU_LOG("MTVU - WriteDataMem!"); + ReserveSpace(3 + size_u32(size)); + Write(MTVU_VU_WRITE_DATA); + Write(vu_data_addr); + Write(size); + Write(data, size); + incWritePos(); + } + + void WriteCol(vifStruct& _vif) { + MTVU_LOG("MTVU - WriteCol!"); + ReserveSpace(1 + size_u32(sizeof(_vif.MaskCol))); + Write(MTVU_VIF_WRITE_COL); + Write(&_vif.MaskCol, sizeof(_vif.MaskCol)); + incWritePos(); + } + + void WriteRow(vifStruct& _vif) { + MTVU_LOG("MTVU - WriteRow!"); + ReserveSpace(1 + size_u32(sizeof(_vif.MaskRow))); + Write(MTVU_VIF_WRITE_ROW); + Write(&_vif.MaskRow, sizeof(_vif.MaskRow)); + incWritePos(); + } +}; + +extern __aligned16 VU_Thread vu1Thread; + diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index 5192d62fae6a1..1ad1469d6da94 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -38,8 +38,9 @@ BIOS #include #include "IopCommon.h" -#include "VUmicro.h" #include "GS.h" +#include "VUmicro.h" +#include "MTVU.h" #include "ps2/HwInternal.h" #include "ps2/BiosTools.h" @@ -102,6 +103,7 @@ static vtlbHandler vu0_micro_mem, vu1_micro_mem, + vu1_data_mem, hw_by_page[0x10] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, @@ -131,7 +133,11 @@ void memMapVUmicro() // VU0/VU1 memory (data) // VU0 is 4k, mirrored 4 times across a 16k area. vtlb_MapBlock(VU0.Mem,0x11004000,0x00004000,0x1000); - vtlb_MapBlock(VU1.Mem,0x1100c000,0x00004000); + // Note: In order for the below conditional to work correctly + // support needs to be coded to reset the memMappings when MTVU is + // turned off/on. For now we just always use the vu data handlers... + if (1||THREAD_VU1) vtlb_MapHandler(vu1_data_mem,0x1100c000,0x00004000); + else vtlb_MapBlock (VU1.Mem, 0x1100c000,0x00004000); } void memMapPhy() @@ -431,128 +437,186 @@ static void __fastcall _ext_memWrite128(u32 mem, const mem128_t *value) typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc ); -template -static __fi void ClearVuFunc( u32 addr, u32 size ) -{ - if( vunum==0 ) - CpuVU0->Clear(addr,size); - else - CpuVU1->Clear(addr,size); +template static __fi void ClearVuFunc(u32 addr, u32 size) { + if (vunum) CpuVU1->Clear(addr, size); + else CpuVU0->Clear(addr, size); } -template -static mem8_t __fastcall vuMicroRead8(u32 addr) -{ - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; - +// VU Micro Memory Reads... +template static mem8_t __fc vuMicroRead8(u32 addr) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); return vu->Micro[addr]; } - -template -static mem16_t __fastcall vuMicroRead16(u32 addr) -{ - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; - +template static mem16_t __fc vuMicroRead16(u32 addr) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); return *(u16*)&vu->Micro[addr]; } - -template -static mem32_t __fastcall vuMicroRead32(u32 addr) -{ - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; - +template static mem32_t __fc vuMicroRead32(u32 addr) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); return *(u32*)&vu->Micro[addr]; } - -template -static void __fastcall vuMicroRead64(u32 addr,mem64_t* data) -{ - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; - +template static void __fc vuMicroRead64(u32 addr,mem64_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); *data=*(u64*)&vu->Micro[addr]; } - -template -static void __fastcall vuMicroRead128(u32 addr,mem128_t* data) -{ - addr&=(vunum==0)?0xfff:0x3fff; - VURegs* vu=(vunum==0)?&VU0:&VU1; - +template static void __fc vuMicroRead128(u32 addr,mem128_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); CopyQWC(data,&vu->Micro[addr]); } // Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per // frame in-game, and usually none at all after BIOS), so cpu clears aren't much of a big deal. - -template -static void __fastcall vuMicroWrite8(u32 addr,mem8_t data) -{ - addr &= (vunum==0) ? 0xfff : 0x3fff; - VURegs& vu = (vunum==0) ? VU0 : VU1; - - if (vu.Micro[addr]!=data) - { - ClearVuFunc(addr&(~7), 8); // Clear before writing new data (clearing 8 bytes because an instruction is 8 bytes) (cottonvibes) - vu.Micro[addr]=data; +template static void __fc vuMicroWrite8(u32 addr,mem8_t data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteMicroMem(addr, &data, sizeof(u8)); + return; + } + if (vu->Micro[addr]!=data) { // Clear before writing new data + ClearVuFunc(addr, 8); //(clearing 8 bytes because an instruction is 8 bytes) (cottonvibes) + vu->Micro[addr] =data; } } - -template -static void __fastcall vuMicroWrite16(u32 addr,mem16_t data) -{ - addr &= (vunum==0) ? 0xfff : 0x3fff; - VURegs& vu = (vunum==0) ? VU0 : VU1; - - if (*(u16*)&vu.Micro[addr]!=data) - { - ClearVuFunc(addr&(~7), 8); - *(u16*)&vu.Micro[addr]=data; +template static void __fc vuMicroWrite16(u32 addr, mem16_t data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteMicroMem(addr, &data, sizeof(u16)); + return; + } + if (*(u16*)&vu->Micro[addr]!=data) { + ClearVuFunc(addr, 8); + *(u16*)&vu->Micro[addr] =data; } } - -template -static void __fastcall vuMicroWrite32(u32 addr,mem32_t data) -{ - addr &= (vunum==0) ? 0xfff : 0x3fff; - VURegs& vu = (vunum==0) ? VU0 : VU1; - - if (*(u32*)&vu.Micro[addr]!=data) - { - ClearVuFunc(addr&(~7), 8); - *(u32*)&vu.Micro[addr]=data; +template static void __fc vuMicroWrite32(u32 addr, mem32_t data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteMicroMem(addr, &data, sizeof(u32)); + return; + } + if (*(u32*)&vu->Micro[addr]!=data) { + ClearVuFunc(addr, 8); + *(u32*)&vu->Micro[addr] =data; } } - -template -static void __fastcall vuMicroWrite64(u32 addr,const mem64_t* data) -{ - addr &= (vunum==0) ? 0xfff : 0x3fff; - VURegs& vu = (vunum==0) ? VU0 : VU1; - - if (*(u64*)&vu.Micro[addr]!=data[0]) - { - ClearVuFunc(addr&(~7), 8); - *(u64*)&vu.Micro[addr]=data[0]; +template static void __fc vuMicroWrite64(u32 addr, const mem64_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteMicroMem(addr, (void*)data, sizeof(u64)); + return; + } + if (*(u64*)&vu->Micro[addr]!=data[0]) { + ClearVuFunc(addr, 8); + *(u64*)&vu->Micro[addr] =data[0]; + } +} +template static void __fc vuMicroWrite128(u32 addr, const mem128_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteMicroMem(addr, (void*)data, sizeof(u128)); + return; + } + if ((u128&)vu->Micro[addr]!=*data) { + ClearVuFunc(addr, 16); + CopyQWC(&vu->Micro[addr],data); } } -template -static void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data) -{ - addr &= (vunum==0) ? 0xfff : 0x3fff; - VURegs& vu = (vunum==0) ? VU0 : VU1; - - if ((u128&)vu.Micro[addr] != *data) - { - ClearVuFunc(addr&(~7), 16); - CopyQWC(&vu.Micro[addr],data); +// VU Data Memory Reads... +template static mem8_t __fc vuDataRead8(u32 addr) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); + return vu->Mem[addr]; +} +template static mem16_t __fc vuDataRead16(u32 addr) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); + return *(u16*)&vu->Mem[addr]; +} +template static mem32_t __fc vuDataRead32(u32 addr) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); + return *(u32*)&vu->Mem[addr]; +} +template static void __fc vuDataRead64(u32 addr, mem64_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); + *data=*(u64*)&vu->Mem[addr]; +} +template static void __fc vuDataRead128(u32 addr, mem128_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) vu1Thread.WaitVU(); + CopyQWC(data,&vu->Mem[addr]); +} + +// VU Data Memory Writes... +template static void __fc vuDataWrite8(u32 addr, mem8_t data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteDataMem(addr, &data, sizeof(u8)); + return; } + vu->Mem[addr] = data; +} +template static void __fc vuDataWrite16(u32 addr, mem16_t data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteDataMem(addr, &data, sizeof(u16)); + return; + } + *(u16*)&vu->Mem[addr] = data; +} +template static void __fc vuDataWrite32(u32 addr, mem32_t data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteDataMem(addr, &data, sizeof(u32)); + return; + } + *(u32*)&vu->Mem[addr] = data; +} +template static void __fc vuDataWrite64(u32 addr, const mem64_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteDataMem(addr, (void*)data, sizeof(u64)); + return; + } + *(u64*)&vu->Mem[addr] = data[0]; +} +template static void __fc vuDataWrite128(u32 addr, const mem128_t* data) { + VURegs* vu = vunum ? &VU1 : &VU0; + addr &= vunum ? 0x3fff: 0xfff; + if (vunum && THREAD_VU1) { + vu1Thread.WriteDataMem(addr, (void*)data, sizeof(u128)); + return; + } + CopyQWC(&vu->Mem[addr], data); } + void memSetPageAddr(u32 vaddr, u32 paddr) { //Console.WriteLn("memSetPageAddr: %8.8x -> %8.8x", vaddr, paddr); @@ -640,9 +704,8 @@ void eeMemoryReserve::Commit() // Resets memory mappings, unmaps TLBs, reloads bios roms, etc. void eeMemoryReserve::Reset() { - if (!mmap_faultHandler) - { - pxAssume(Source_PageFault); + if(!mmap_faultHandler) { + pxAssert(Source_PageFault); mmap_faultHandler = new mmap_PageFaultHandler(); } @@ -674,7 +737,8 @@ void eeMemoryReserve::Reset() // Dynarec versions of VUs vu0_micro_mem = vtlb_RegisterHandlerTempl1(vuMicro,0); vu1_micro_mem = vtlb_RegisterHandlerTempl1(vuMicro,1); - + vu1_data_mem = (1||THREAD_VU1) ? vtlb_RegisterHandlerTempl1(vuData,1) : NULL; + ////////////////////////////////////////////////////////////////////////////////////////// // IOP's "secret" Hardware Register mapping, accessible from the EE (and meant for use // by debugging or BIOS only). The IOP's hw regs are divided into three main pages in diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index e0a35365548a0..ebe37c7529ab8 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -64,6 +64,7 @@ void Pcsx2Config::SpeedhackOptions::LoadSave( IniInterface& ini ) IniBitBool( WaitLoop ); IniBitBool( vuFlagHack ); IniBitBool( vuBlockHack ); + IniBitBool( vuThread ); } void Pcsx2Config::ProfilerOptions::LoadSave( IniInterface& ini ) diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index f698f02784063..9c14ab3dea142 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -21,6 +21,7 @@ #include "R3000A.h" #include "VUmicro.h" #include "COP0.h" +#include "MTVU.h" #include "System/SysThreads.h" #include "R5900Exceptions.h" @@ -54,6 +55,7 @@ extern SysMainMemory& GetVmMemory(); void cpuReset() { + vu1Thread.WaitVU(); if (GetMTGS().IsOpen()) GetMTGS().WaitGS(); // GS better be done processing before we reset the EE, just in case. @@ -281,9 +283,6 @@ static __fi void _cpuTestInterrupts() TESTINT(DMAC_GIF, gifInterrupt); TESTINT(DMAC_SIF0, EEsif0Interrupt); TESTINT(DMAC_SIF1, EEsif1Interrupt); - - //extern void Gif_Execute(); - //TESTINT(DMAC_GIF_UNIT, Gif_Execute); // Profile-guided Optimization (sorta) // The following ints are rarely called. Encasing them in a conditional diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index 068c986c3be0a..f174d7e680ca4 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -18,6 +18,7 @@ #include "SPR.h" #include "VUmicro.h" +#include "MTVU.h" extern void mfifoGIFtransfer(int); @@ -31,19 +32,23 @@ void sprInit() { } -static void TestClearVUs(u32 madr, u32 size) +static void TestClearVUs(u32 madr, u32 qwc) { if (madr >= 0x11000000) { if (madr < 0x11004000) { DbgCon.Warning("scratch pad clearing vu0"); - CpuVU0->Clear(madr&0xfff, size); + CpuVU0->Clear(madr&0xfff, qwc * 16); } else if (madr >= 0x11008000 && madr < 0x1100c000) { DbgCon.Warning("scratch pad clearing vu1"); - CpuVU1->Clear(madr&0x3fff, size); + if (THREAD_VU1) { + DevCon.Error("MTVU Warning: SPR Accessing VU1 Memory!!!"); + vu1Thread.WaitVU(); + } + CpuVU1->Clear(madr&0x3fff, qwc * 16); } } } @@ -83,7 +88,7 @@ int _SPR0chain() memcpy_qwc(pMem, &psSu128(spr0ch.sadr), partialqwc); // clear VU mem also! - TestClearVUs(spr0ch.madr, partialqwc << 2); // Wtf is going on here? AFAIK, only VIF should affect VU micromem (cottonvibes) + TestClearVUs(spr0ch.madr, partialqwc); spr0ch.madr += partialqwc << 4; spr0ch.sadr += partialqwc << 4; @@ -135,7 +140,7 @@ void _SPR0interleave() case NO_MFD: case MFD_RESERVED: // clear VU mem also! - TestClearVUs(spr0ch.madr, spr0ch.qwc << 2); + TestClearVUs(spr0ch.madr, spr0ch.qwc); memcpy_qwc(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc); break; } diff --git a/pcsx2/SaveState.cpp b/pcsx2/SaveState.cpp index 49ddcca5d86f7..ae7d8c6a0c2be 100644 --- a/pcsx2/SaveState.cpp +++ b/pcsx2/SaveState.cpp @@ -21,6 +21,7 @@ #include "ps2/BiosTools.h" #include "COP0.h" #include "VUmicro.h" +#include "MTVU.h" #include "Cache.h" #include "AppConfig.h" @@ -150,10 +151,9 @@ static const uint MainMemorySizeInBytes = SaveStateBase& SaveStateBase::FreezeMainMemory() { - if (IsLoading()) - PreLoadPrep(); - else - m_memory->MakeRoomFor( m_idx + MainMemorySizeInBytes ); + vu1Thread.WaitVU(); // Finish VU1 just in-case... + if (IsLoading()) PreLoadPrep(); + else m_memory->MakeRoomFor( m_idx + MainMemorySizeInBytes ); // First Block - Memory Dumps // --------------------------- @@ -175,8 +175,8 @@ SaveStateBase& SaveStateBase::FreezeMainMemory() SaveStateBase& SaveStateBase::FreezeInternals() { - if( IsLoading() ) - PreLoadPrep(); + vu1Thread.WaitVU(); // Finish VU1 just in-case... + if (IsLoading()) PreLoadPrep(); // Second Block - Various CPU Registers and States // ----------------------------------------------- diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index df3fad3fbfffd..6e11c275b4e5d 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -24,7 +24,7 @@ // the lower 16 bit value. IF the change is breaking of all compatibility with old // states, increment the upper 16 bit value, and clear the lower 16 bits to 0. -static const u32 g_SaveVersion = (0x9A02 << 16) | 0x0000; +static const u32 g_SaveVersion = (0x9A03 << 16) | 0x0000; // this function is meant to be used in the place of GSfreeze, and provides a safe layer // between the GS saving function and the MTGS's needs. :) @@ -193,6 +193,7 @@ class SaveStateBase // Load/Save functions for the various components of our glorious emulator! + void mtvuFreeze(); void rcntFreeze(); void vuMicroFreeze(); void vif0Freeze(); diff --git a/pcsx2/System.h b/pcsx2/System.h index ab43897e7616d..506bf00bb87f7 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -154,7 +154,7 @@ class SysCpuProviderPack // implemented by the provisioning interface. extern SysCpuProviderPack& GetCpuProviders(); -extern void SysLogMachineCaps(); // Detects cpu type and fills cpuInfo structs. +extern void SysLogMachineCaps(); // Detects cpu type and fills cpuInfo structs. extern void SysClearExecutionCache(); // clears recompiled execution caches! extern void SysOutOfMemory_EmergencyResponse(uptr blocksize); diff --git a/pcsx2/VU1micro.cpp b/pcsx2/VU1micro.cpp index 2b9240bee9f6a..c1cec190ddc44 100644 --- a/pcsx2/VU1micro.cpp +++ b/pcsx2/VU1micro.cpp @@ -19,10 +19,9 @@ #include "PrecompiledHeader.h" #include "Common.h" - #include - #include "VUmicro.h" +#include "MTVU.h" #ifdef PCSX2_DEBUG u32 vudump = 0; @@ -39,6 +38,10 @@ void vu1ResetRegs() } void vu1Finish() { + if (THREAD_VU1) { + if (VU0.VI[REG_VPU_STAT].UL & 0x100) DevCon.Error("MTVU: VU0.VI[REG_VPU_STAT].UL & 0x100"); + return; + } while (VU0.VI[REG_VPU_STAT].UL & 0x100) { VUM_LOG("vu1ExecMicro > Stalling until current microprogram finishes"); CpuVU1->Execute(vu1RunCycles); @@ -47,10 +50,15 @@ void vu1Finish() { void __fastcall vu1ExecMicro(u32 addr) { + if (THREAD_VU1) { + vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop); + vif1Regs.stat.VEW = false; + VU0.VI[REG_VPU_STAT].UL &= ~0xFF00; + return; + } static int count = 0; vu1Finish(); - VUM_LOG("vu1ExecMicro %x", addr); VUM_LOG("vu1ExecMicro %x (count=%d)", addr, count++); VU0.VI[REG_VPU_STAT].UL &= ~0xFF00; diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index 2446b1a96758c..7e575598a071a 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -18,6 +18,7 @@ #include "Common.h" #include "VUmicro.h" +#include "MTVU.h" extern void _vuFlushAll(VURegs* VU); @@ -173,6 +174,14 @@ InterpVU1::InterpVU1() IsInterpreter = true; } +void InterpVU1::Reset() { + vu1Thread.WaitVU(); +} + +void InterpVU1::Shutdown() { + vu1Thread.WaitVU(); +} + void InterpVU1::Step() { VU1.VI[REG_TPC].UL &= VU1_PROGMASK; diff --git a/pcsx2/VUmicro.h b/pcsx2/VUmicro.h index a4fe2d4cc0606..097231ab2c1bd 100644 --- a/pcsx2/VUmicro.h +++ b/pcsx2/VUmicro.h @@ -193,8 +193,8 @@ class InterpVU1 : public BaseVUmicroCPU wxString GetLongName() const { return L"VU1 Interpreter"; } void Reserve() { } - void Shutdown() throw() { } - void Reset() { } + void Shutdown() throw(); + void Reset(); void Step(); void Execute(u32 cycles); diff --git a/pcsx2/VUmicroMem.cpp b/pcsx2/VUmicroMem.cpp index 184cad74e6610..4b1127135c762 100644 --- a/pcsx2/VUmicroMem.cpp +++ b/pcsx2/VUmicroMem.cpp @@ -54,7 +54,8 @@ void vuMemoryReserve::Reset() pxAssert( VU0.Mem ); pxAssert( VU1.Mem ); - memMapVUmicro(); + // Below memMap is already called by "void eeMemoryReserve::Reset()" + //memMapVUmicro(); // === VU0 Initialization === memzero(VU0.ACC); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 241ea00c286b5..bcf40df61e1bf 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -18,6 +18,7 @@ #include "VUops.h" #include "GS.h" #include "Gif_Unit.h" +#include "MTVU.h" #include @@ -2018,7 +2019,8 @@ static __ri void _vuEEXP(VURegs * VU) { static __ri void _vuXITOP(VURegs * VU) { if (_It_ == 0) return; - VU->VI[_It_].US[0] = VU->GetVifRegs().itop; + if (VU==&VU1 && THREAD_VU1) VU->VI[_It_].US[0] = vu1Thread.vifRegs.itop; + else VU->VI[_It_].US[0] = VU->GetVifRegs().itop; } static __ri void _vuXGKICK(VURegs * VU) @@ -2041,7 +2043,8 @@ static __ri void _vuXGKICK(VURegs * VU) static __ri void _vuXTOP(VURegs * VU) { if(_It_ == 0) return; - VU->VI[_It_].US[0] = (u16)VU->GetVifRegs().top; + if (VU==&VU1 && THREAD_VU1) VU->VI[_It_].US[0] = (u16)vu1Thread.vifRegs.top; + else VU->VI[_It_].US[0] = (u16)VU->GetVifRegs().top; } #define GET_VF0_FLAG(reg) (((reg)==0)?(1< 0); + if (idx && THREAD_VU1) { + vu1Thread.WriteMicroMem(addr, (u8*)data, size*4); + return; + } if (memcmp_mmx(VUx.Micro + addr, data, size*4)) { // Clear VU memory before writing! - // (VUs expect size to be 32-bit scale, same as VIF's internal working sizes) - if (!idx) CpuVU0->Clear(addr, size); - else CpuVU1->Clear(addr, size); + if (!idx) CpuVU0->Clear(addr, size*4); + else CpuVU1->Clear(addr, size*4); memcpy_fast(VUx.Micro + addr, data, size*4); } } @@ -387,7 +392,9 @@ vifOp(vifCode_STCol) { return 1; } pass2 { - return _vifCode_STColRow(data, &vifX.MaskCol._u32[vifX.tag.addr]); + u32 ret = _vifCode_STColRow(data, &vifX.MaskCol._u32[vifX.tag.addr]); + if (idx && THREAD_VU1) { vu1Thread.WriteCol(vifX); } + return ret; } pass3 { VifCodeLog("STCol"); } return 0; @@ -401,7 +408,9 @@ vifOp(vifCode_STRow) { return 1; } pass2 { - return _vifCode_STColRow(data, &vifX.MaskRow._u32[vifX.tag.addr]); + u32 ret = _vifCode_STColRow(data, &vifX.MaskRow._u32[vifX.tag.addr]); + if (idx && THREAD_VU1) { vu1Thread.WriteRow(vifX); } + return ret; } pass3 { VifCodeLog("STRow"); } return 0; @@ -447,7 +456,9 @@ vifOp(vifCode_Unpack) { vifUnpackSetup(data); return 1; } - pass2 { return nVifUnpack((u8*)data); } + pass2 { + return nVifUnpack((u8*)data); + } pass3 { vifStruct& vifX = GetVifX; VIFregisters& vifRegs = vifXRegs; diff --git a/pcsx2/Vif_Dma.h b/pcsx2/Vif_Dma.h index 4746d661d0cec..e461cbaaa6808 100644 --- a/pcsx2/Vif_Dma.h +++ b/pcsx2/Vif_Dma.h @@ -56,14 +56,18 @@ union tTRXREG { // NOTE, if debugging vif stalls, use sega classics, spyro, gt4, and taito struct vifStruct { - u128 MaskRow, MaskCol; + __aligned16 u128 MaskRow; + __aligned16 u128 MaskCol; + + struct { // These must be together for MTVU + vifCode tag; + int cmd; + int cl; + u8 usn; + u8 StructEnd; // Address of this is used to calculate end of struct + }; - vifCode tag; - int cmd; int irq; - int cl; - int qwcalign; - u8 usn; bool done; bool vifstalled; @@ -72,17 +76,13 @@ struct vifStruct { // GS registers used for calculating the size of the last local->host transfer initiated on the GS // Transfer size calculation should be restricted to GS emulation in the future tBITBLTBUF BITBLTBUF; - tTRXREG TRXREG; - u32 GSLastDownloadSize; + tTRXREG TRXREG; + u32 GSLastDownloadSize; - u8 irqoffset; // 32bit offset where next vif code is - u32 savedtag; // need this for backwards compat with save states + u8 irqoffset; // 32bit offset where next vif code is u32 vifpacketsize; - u8 inprogress; - u32 lastcmd; - u8 dmamode; - u8 Unused_GifWaitState; // Only here for saved state compatibility - //u8 GifWaitState; // 0 = General PATH checking, 1 = Flush path 3, 2 == Wait for VU1 + u8 inprogress; + u8 dmamode; }; extern __aligned16 vifStruct vif0, vif1; diff --git a/pcsx2/Vif_Transfer.cpp b/pcsx2/Vif_Transfer.cpp index eee54b477a0c8..c202057f47b31 100644 --- a/pcsx2/Vif_Transfer.cpp +++ b/pcsx2/Vif_Transfer.cpp @@ -94,7 +94,6 @@ _vifT void vifTransferLoop(u32* &data) { vifCmdHandler[idx][vifX.cmd & 0x7f](0, data); data++; pSize--; - vifX.lastcmd = (vifXRegs.code >> 24) & 0x7f; if (analyzeIbit(data, iBit)) break; continue; } diff --git a/pcsx2/Vif_Unpack.cpp b/pcsx2/Vif_Unpack.cpp index fcd651e32d1ea..a3719e3cb989e 100644 --- a/pcsx2/Vif_Unpack.cpp +++ b/pcsx2/Vif_Unpack.cpp @@ -17,6 +17,7 @@ #include "Common.h" #include "Vif.h" #include "Vif_Dma.h" +#include "MTVU.h" enum UnpackOffset { OFFSET_X = 0, @@ -36,10 +37,10 @@ template< uint idx, uint mode, bool doMask > static __ri void writeXYZW(u32 offnum, u32 &dest, u32 data) { int n = 0; - vifStruct& vif = GetVifX; + vifStruct& vif = MTVU_VifX; if (doMask) { - const VIFregisters& regs = vifXRegs; + const VIFregisters& regs = MTVU_VifXRegs; switch (vif.cl) { case 0: n = (regs.mask >> (offnum * 2)) & 0x3; break; case 1: n = (regs.mask >> ( 8 + (offnum * 2))) & 0x3; break; diff --git a/pcsx2/gui/CpuUsageProvider.cpp b/pcsx2/gui/CpuUsageProvider.cpp index 8c638283d52c7..f2ba2f5574bb5 100644 --- a/pcsx2/gui/CpuUsageProvider.cpp +++ b/pcsx2/gui/CpuUsageProvider.cpp @@ -23,21 +23,24 @@ #endif #include "GS.h" +#include "MTVU.h" -void AllThreeThreads::LoadWithCurrentTimes() +void AllPCSX2Threads::LoadWithCurrentTimes() { ee = GetCoreThread().GetCpuTime(); gs = GetMTGS().GetCpuTime(); + vu = vu1Thread.GetCpuTime(); ui = GetThreadCpuTime(); update = GetCPUTicks(); } -AllThreeThreads AllThreeThreads::operator-( const AllThreeThreads& right ) const +AllPCSX2Threads AllPCSX2Threads::operator-( const AllPCSX2Threads& right ) const { - AllThreeThreads retval; + AllPCSX2Threads retval; retval.ee = ee - right.ee; retval.gs = gs - right.gs; + retval.vu = vu - right.vu; retval.ui = ui - right.ui; retval.update = update - right.update; @@ -48,6 +51,7 @@ DefaultCpuUsageProvider::DefaultCpuUsageProvider() { m_pct_ee = 0; m_pct_gs = 0; + m_pct_vu = 0; m_pct_ui = 0; m_writepos = 0; @@ -69,16 +73,17 @@ void DefaultCpuUsageProvider::UpdateStats() { // Measure deltas between the first and last positions in the ring buffer: - AllThreeThreads& newone( m_queue[m_writepos] ); + AllPCSX2Threads& newone( m_queue[m_writepos] ); newone.LoadWithCurrentTimes(); m_writepos = (m_writepos+1) % QueueDepth; - const AllThreeThreads deltas( newone - m_queue[m_writepos] ); + const AllPCSX2Threads deltas( newone - m_queue[m_writepos] ); // get the real time passed, scaled to the Thread's tick frequency. u64 timepass = (deltas.update * GetThreadTicksPerSecond()) / GetTickFrequency(); m_pct_ee = (deltas.ee * 100) / timepass; m_pct_gs = (deltas.gs * 100) / timepass; + m_pct_vu = (deltas.vu * 100) / timepass; m_pct_ui = (deltas.ui * 100) / timepass; } @@ -92,6 +97,11 @@ int DefaultCpuUsageProvider::GetGsPct() const return m_pct_gs; } +int DefaultCpuUsageProvider::GetVUPct() const +{ + return m_pct_vu; +} + int DefaultCpuUsageProvider::GetGuiPct() const { return m_pct_ui; diff --git a/pcsx2/gui/CpuUsageProvider.h b/pcsx2/gui/CpuUsageProvider.h index d24f03648bb07..18528dcf0e71a 100644 --- a/pcsx2/gui/CpuUsageProvider.h +++ b/pcsx2/gui/CpuUsageProvider.h @@ -27,6 +27,7 @@ class BaseCpuUsageProvider virtual void UpdateStats()=0; virtual int GetEEcorePct() const=0; virtual int GetGsPct() const=0; + virtual int GetVUPct() const=0; virtual int GetGuiPct() const=0; }; @@ -44,16 +45,17 @@ class CpuUsageProvider : public BaseCpuUsageProvider virtual void UpdateStats() { m_Implementation->UpdateStats(); } virtual int GetEEcorePct() const { return m_Implementation->GetEEcorePct(); } virtual int GetGsPct() const { return m_Implementation->GetGsPct(); } + virtual int GetVUPct() const { return m_Implementation->GetVUPct(); } virtual int GetGuiPct() const { return m_Implementation->GetGuiPct(); } }; -struct AllThreeThreads +struct AllPCSX2Threads { - u64 ee, gs, ui; + u64 ee, gs, vu, ui; u64 update; void LoadWithCurrentTimes(); - AllThreeThreads operator-( const AllThreeThreads& right ) const; + AllPCSX2Threads operator-( const AllPCSX2Threads& right ) const; }; class DefaultCpuUsageProvider : @@ -64,11 +66,12 @@ class DefaultCpuUsageProvider : static const uint QueueDepth = 4; protected: - AllThreeThreads m_queue[QueueDepth]; + AllPCSX2Threads m_queue[QueueDepth]; uint m_writepos; u32 m_pct_ee; u32 m_pct_gs; + u32 m_pct_vu; u32 m_pct_ui; public: @@ -80,6 +83,7 @@ class DefaultCpuUsageProvider : void UpdateStats(); int GetEEcorePct() const; int GetGsPct() const; + int GetVUPct() const; int GetGuiPct() const; protected: diff --git a/pcsx2/gui/CpuUsageProviderMSW.cpp b/pcsx2/gui/CpuUsageProviderMSW.cpp index f46a5d952aaca..8f735e44be0a3 100644 --- a/pcsx2/gui/CpuUsageProviderMSW.cpp +++ b/pcsx2/gui/CpuUsageProviderMSW.cpp @@ -55,6 +55,7 @@ class CpuUsageProviderMSW : public BaseCpuUsageProvider void UpdateStats(); int GetEEcorePct() const; int GetGsPct() const; + int GetVUPct() const; int GetGuiPct() const; }; @@ -264,6 +265,11 @@ int CpuUsageProviderMSW::GetGsPct() const return 0; } +int CpuUsageProviderMSW::GetVUPct() const +{ + return 0; +} + int CpuUsageProviderMSW::GetGuiPct() const { return 0; diff --git a/pcsx2/gui/FrameForGS.cpp b/pcsx2/gui/FrameForGS.cpp index 1326ea3a99871..8d92308143748 100644 --- a/pcsx2/gui/FrameForGS.cpp +++ b/pcsx2/gui/FrameForGS.cpp @@ -533,10 +533,18 @@ void GSFrame::OnUpdateTitle( wxTimerEvent& evt ) } FastFormatUnicode cpuUsage; - if( m_CpuUsage.IsImplemented() ) - { + if (m_CpuUsage.IsImplemented()) { m_CpuUsage.UpdateStats(); - cpuUsage.Write( L" | EE: %3d%% | GS: %3d%% | UI: %3d%%", m_CpuUsage.GetEEcorePct(), m_CpuUsage.GetGsPct(), m_CpuUsage.GetGuiPct() ); + if (THREAD_VU1) { // Display VU thread's usage + cpuUsage.Write(L" | EE: %3d%% | GS: %3d%% | VU: %3d%% | UI: %3d%%", + m_CpuUsage.GetEEcorePct(), m_CpuUsage.GetGsPct(), + m_CpuUsage.GetVUPct(), m_CpuUsage.GetGuiPct()); + } + else { + cpuUsage.Write(L" | EE: %3d%% | GS: %3d%% | UI: %3d%%", + m_CpuUsage.GetEEcorePct(), m_CpuUsage.GetGsPct(), + m_CpuUsage.GetGuiPct()); + } } const u64& smode2 = *(u64*)PS2GS_BASE(GS_SMODE2); diff --git a/pcsx2/gui/Panels/ConfigurationPanels.h b/pcsx2/gui/Panels/ConfigurationPanels.h index a9186bbc83a70..19366862e98b8 100644 --- a/pcsx2/gui/Panels/ConfigurationPanels.h +++ b/pcsx2/gui/Panels/ConfigurationPanels.h @@ -335,6 +335,7 @@ namespace Panels pxCheckBox* m_check_fastCDVD; pxCheckBox* m_check_vuFlagHack; pxCheckBox* m_check_vuBlockHack; + pxCheckBox* m_check_vuThread; public: virtual ~SpeedHacksPanel() throw() {} diff --git a/pcsx2/gui/Panels/SpeedhacksPanel.cpp b/pcsx2/gui/Panels/SpeedhacksPanel.cpp index f6a37e1aae55b..d7db4d164a794 100644 --- a/pcsx2/gui/Panels/SpeedhacksPanel.cpp +++ b/pcsx2/gui/Panels/SpeedhacksPanel.cpp @@ -161,10 +161,13 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent ) wxPanelWithHelpers* vuHacksPanel = new wxPanelWithHelpers( right, wxVERTICAL, _("microVU Hacks") ); m_check_vuFlagHack = new pxCheckBox( vuHacksPanel, _("mVU Flag Hack"), - _("Good Speedup and High Compatibility; may cause garbage graphics, SPS, etc... [Recommended]") ); + _("Good Speedup and High Compatibility; may cause bad graphics... [Recommended]" ) ); m_check_vuBlockHack = new pxCheckBox( vuHacksPanel, _("mVU Block Hack"), - _("Good Speedup and High Compatibility; may cause garbage graphics, SPS, etc...") ); + _("Good Speedup and High Compatibility; may cause bad graphics, SPS, etc...") ); + + m_check_vuThread = new pxCheckBox( vuHacksPanel, _("MTVU (Multi-Threaded microVU1)"), + _("Good Speedup and High Compatibility; may cause hanging... [Recommended if 3+ cores]") ); m_check_vuFlagHack->SetToolTip( pxEt( "!ContextTip:Speedhacks:vuFlagHack", L"Updates Status Flags only on blocks which will read them, instead of all the time. " @@ -176,6 +179,12 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent ) L"This should be pretty safe. It is unknown if this breaks any game..." ) ); + m_check_vuThread->SetToolTip( pxEt( "!ContextTip:Speedhacks:vuThread", + L"Runs VU1 on its own thread (microVU1-only). Generally a speedup on CPUs with 3 or more cores. " + L"This is safe for most games, but a few games are incompatible and may hang. " + L"In the case of GS limited games, it may be a slowdown (especially on dual core CPUs)." + ) ); + // ------------------------------------------------------------------------ // All other hacks Section: @@ -226,7 +235,8 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent ) *vuHacksPanel += m_check_vuFlagHack; *vuHacksPanel += m_check_vuBlockHack; - *vuHacksPanel += 57; // Aligns left and right boxes in default language and font size + *vuHacksPanel += m_check_vuThread; + //*vuHacksPanel += 57; // Aligns left and right boxes in default language and font size *miscHacksPanel += m_check_intc; *miscHacksPanel += m_check_waitloop; @@ -304,6 +314,7 @@ void Panels::SpeedHacksPanel::ApplyConfigToGui( AppConfig& configToApply, int fl m_check_vuFlagHack ->SetValue(opts.vuFlagHack); m_check_vuBlockHack ->SetValue(opts.vuBlockHack); + m_check_vuThread ->SetValue(opts.vuThread); m_check_intc ->SetValue(opts.IntcStat); m_check_waitloop ->SetValue(opts.WaitLoop); m_check_fastCDVD ->SetValue(opts.fastCDVD); @@ -333,6 +344,7 @@ void Panels::SpeedHacksPanel::Apply() opts.IntcStat = m_check_intc->GetValue(); opts.vuFlagHack = m_check_vuFlagHack->GetValue(); opts.vuBlockHack = m_check_vuBlockHack->GetValue(); + opts.vuThread = m_check_vuThread->GetValue(); // If the user has a command line override specified, we need to disable it // so that their changes take effect diff --git a/pcsx2/ps2/LegacyDmac.cpp b/pcsx2/ps2/LegacyDmac.cpp index 931dbd5a5efbc..a8473971fbb22 100644 --- a/pcsx2/ps2/LegacyDmac.cpp +++ b/pcsx2/ps2/LegacyDmac.cpp @@ -17,6 +17,7 @@ #include "PrecompiledHeader.h" #include "Common.h" #include "Hardware.h" +#include "MTVU.h" #include "IPU/IPUdma.h" #include "ps2/HwInternal.h" @@ -91,7 +92,7 @@ __fi void setDmacStat(u32 num) } // Note: Dma addresses are guaranteed to be aligned to 16 bytes (128 bits) -__fi tDMA_TAG *SPRdmaGetAddr(u32 addr, bool write) +__fi tDMA_TAG* SPRdmaGetAddr(u32 addr, bool write) { // if (addr & 0xf) { DMA_LOG("*PCSX2*: DMA address not 128bit aligned: %8.8x", addr); } @@ -114,6 +115,10 @@ __fi tDMA_TAG *SPRdmaGetAddr(u32 addr, bool write) } else if ((addr >= 0x11004000) && (addr < 0x11010000)) { + if (THREAD_VU1) { + DevCon.Error("MTVU: SPRdmaGetAddr Accessing VU Memory!"); + vu1Thread.WaitVU(); + } //Access for VU Memory return (tDMA_TAG*)vtlb_GetPhyPtr(addr & 0x1FFFFFF0); } diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index 845815a565847..3473107b5ddb4 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -41,7 +41,7 @@ using namespace R5900; using namespace vtlb_private; -#define verify pxAssume +#define verify pxAssert namespace vtlb_private { @@ -512,14 +512,14 @@ void vtlb_MapBlock(void* base, u32 start, u32 size, u32 blocksize) { verify(0==(start&VTLB_PAGE_MASK)); verify(0==(size&VTLB_PAGE_MASK) && size>0); - if (!blocksize) + if(!blocksize) blocksize = size; verify(0==(blocksize&VTLB_PAGE_MASK) && blocksize>0); verify(0==(size%blocksize)); s32 baseint = (s32)base; u32 end = start + (size - VTLB_PAGE_SIZE); - pxAssume( (end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap) ); + verify((end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap)); while (start <= end) { @@ -544,7 +544,7 @@ void vtlb_Mirror(u32 new_region,u32 start,u32 size) verify(0==(size&VTLB_PAGE_MASK) && size>0); u32 end = start + (size-VTLB_PAGE_SIZE); - pxAssume( (end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap) ); + verify((end>>VTLB_PAGE_BITS) < ArraySize(vtlbdata.pmap)); while(start <= end) { diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 89c316ffd8513..db32bc6b83551 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -1322,6 +1322,14 @@ + + + + diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index bffd943ed4f37..f2b83a7e941e0 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -99,6 +99,7 @@ void mVUreset(microVU& mVU, bool resetReserve) { mVU.prog.x86start = z; mVU.prog.x86ptr = z; mVU.prog.x86end = z + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb); + //memset(mVU.prog.x86start, 0xcc, mVU.cacheSize*_1mb); for(u32 i = 0; i < (mVU.progSize / 2); i++) { if(!mVU.prog.prog[i]) { @@ -279,7 +280,6 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) { //------------------------------------------------------------------ // recMicroVU0 / recMicroVU1 //------------------------------------------------------------------ - recMicroVU0::recMicroVU0() { m_Idx = 0; IsInterpreter = false; } recMicroVU1::recMicroVU1() { m_Idx = 1; IsInterpreter = false; } void recMicroVU0::Vsync() throw() { mVUvsyncUpdate(microVU0); } @@ -290,8 +290,10 @@ void recMicroVU0::Reserve() { mVUinit(microVU0, 0); } void recMicroVU1::Reserve() { - if (AtomicExchange(m_Reserved, 1) == 0) + if (AtomicExchange(m_Reserved, 1) == 0) { mVUinit(microVU1, 1); + vu1Thread.InitThread(); + } } void recMicroVU0::Shutdown() throw() { @@ -299,8 +301,10 @@ void recMicroVU0::Shutdown() throw() { mVUclose(microVU0); } void recMicroVU1::Shutdown() throw() { - if (AtomicExchange(m_Reserved, 0) == 1) + if (AtomicExchange(m_Reserved, 0) == 1) { + vu1Thread.WaitVU(); mVUclose(microVU1); + } } void recMicroVU0::Reset() { @@ -309,6 +313,7 @@ void recMicroVU0::Reset() { } void recMicroVU1::Reset() { if(!pxAssertDev(m_Reserved, "MicroVU1 CPU Provider has not been reserved prior to reset!")) return; + vu1Thread.WaitVU(); mVUreset(microVU1, true); } @@ -325,8 +330,10 @@ void recMicroVU0::Execute(u32 cycles) { void recMicroVU1::Execute(u32 cycles) { pxAssert(m_Reserved); // please allocate me first! :| - if(!(VU0.VI[REG_VPU_STAT].UL & 0x100)) return; - ((mVUrecCall)microVU1.startFunct)(VU1.VI[REG_TPC].UL, vu1RunCycles); + if (!THREAD_VU1) { + if(!(VU0.VI[REG_VPU_STAT].UL & 0x100)) return; + } + ((mVUrecCall)microVU1.startFunct)(VU1.VI[REG_TPC].UL, cycles); } void recMicroVU0::Clear(u32 addr, u32 size) { diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 8ae475cd61f70..30c76a9f55526 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -24,8 +24,8 @@ using namespace x86Emitter; #include #include "Common.h" #include "VU.h" +#include "MTVU.h" #include "GS.h" -#include "Gif.h" #include "Gif_Unit.h" #include "iR5900.h" #include "R5900OpcodeTables.h" @@ -217,9 +217,11 @@ struct microVU { VURegs& regs() const { return ::vuRegs[index]; } - __fi VIFregisters& getVifRegs() const { return regs().GetVifRegs(); } - __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } - __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } + __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } + __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } + __fi VIFregisters& getVifRegs() const { + return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs(); + } }; // microVU rec structs diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index e60aac2ea25fc..4e75c8173c668 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -77,8 +77,10 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); if (isEbit || isVU1) { // Clear 'is busy' Flags - xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag - xAND(ptr32[&mVU.getVifRegs().stat], ~VIF1_STAT_VEW); // Clear VU 'is busy' signal for vif + if (!mVU.index || !THREAD_VU1) { + xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag + xAND(ptr32[&mVU.getVifRegs().stat], ~VIF1_STAT_VEW); // Clear VU 'is busy' signal for vif + } } if (isEbit != 2) { // Save PC, and Jump to Exit Point diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 485d9a1d43201..02d39a7812c27 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -199,7 +199,10 @@ _mVUt void mVUcleanUp() { mVU.cycles = mVU.totalCycles - mVU.cycles; mVU.regs().cycle += mVU.cycles; - cpuRegs.cycle += ((mVU.cycles < 3000) ? mVU.cycles : 3000) * EmuConfig.Speedhacks.VUCycleSteal; + + if (!vuIndex || !THREAD_VU1) { + cpuRegs.cycle += std::min(mVU.cycles, 3000u) * EmuConfig.Speedhacks.VUCycleSteal; + } //static int ax = 0; ax++; //if (!(ax % 100000)) { // for (u32 i = 0; i < (mVU.progSize / 2); i++) { diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 8c8dd58580f97..b1c6aad445c27 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -239,7 +239,14 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) } // Gets called by mVUaddrFix at execution-time -static void __fastcall mVUwarningRegAccess(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); } +static void __fc mVUwarningRegAccess(u32 prog, u32 pc) { + Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); +} + +static void __fc mVUwaitMTVU() { + if (IsDevBuild) DevCon.WriteLn("microVU0: Waiting on VU1 thread to access VU1 regs!"); + if (THREAD_VU1) vu1Thread.WaitVU(); +} // Transforms the Address in gprReg to valid VU0/VU1 Address __fi void mVUaddrFix(mV, const x32& gprReg) @@ -249,28 +256,31 @@ __fi void mVUaddrFix(mV, const x32& gprReg) xSHL(gprReg, 4); } else { - if (IsDevBuild && !isCOP2) mVUbackupRegs(mVU, true); xTEST(gprReg, 0x400); xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around - xForwardJump8 jmpB; + xForwardJump32 jmpB; jmpA.SetTarget(); - if (IsDevBuild && !isCOP2) { // Lets see which games do this! - xPUSH(gprT1); // Note: Kernel does it via COP2 to initialize VU1! - xPUSH(gprT2); // So we don't spam console, we'll only check micro-mode... + if (THREAD_VU1 || (IsDevBuild && !isCOP2)) { + mVUbackupRegs(mVU, true); + xPUSH(gprT1); + xPUSH(gprT2); xPUSH(gprT3); - xMOV (gprT2, mVU.prog.cur->idx); - xMOV (gprT3, xPC); - xCALL(mVUwarningRegAccess); + if (IsDevBuild && !isCOP2) { // Lets see which games do this! + xMOV (gprT2, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1! + xMOV (gprT3, xPC); // So we don't spam console, we'll only check micro-mode... + xCALL(mVUwarningRegAccess); + } + xCALL(mVUwaitMTVU); xPOP (gprT3); xPOP (gprT2); xPOP (gprT1); + mVUrestoreRegs(mVU, true); } xAND(gprReg, 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem); jmpB.SetTarget(); xSHL(gprReg, 4); // multiply by 16 (shift left by 4) - if (IsDevBuild && !isCOP2) mVUrestoreRegs(mVU, true); } } diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index c1540ccbf4364..8f2a6031a3de2 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -57,7 +57,6 @@ _vifT extern void dVifUnpack (const u8* data, bool isFill); // nVifBlock - Ordered for Hashing; the 'num' field and the lower 6 bits of upkType are // used as the hash bucket selector. -// struct __aligned16 nVifBlock { u8 num; // [00] Num Field u8 upkType; // [01] Unpack Type [usn*1:mask*1:upk*4] @@ -74,6 +73,8 @@ struct __aligned16 nVifBlock { #define _tParams nVifBlock, _hSize, _cmpS struct nVifStruct { + __aligned16 nVifBlock block; + // Buffer for partial transfers (should always be first to ensure alignment) // Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword) __aligned16 u8 buffer[256*16]; diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index db52bce079c4d..925f24f69f084 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -19,30 +19,28 @@ #include "PrecompiledHeader.h" #include "newVif_UnpackSSE.h" +#include "MTVU.h" -static __aligned16 nVifBlock _vBlock = {0}; - -void dVifReserve(int idx) -{ - if (!nVif[idx].recReserve) +void dVifReserve(int idx) { + if(!nVif[idx].recReserve) nVif[idx].recReserve = new RecompiledCodeReserve(pxsFmt(L"VIF%u Unpack Recompiler Cache", idx)); nVif[idx].recReserve->Reserve( nVif[idx].recReserveSizeMB * _1mb, idx ? HostMemoryMap::VIF1rec : HostMemoryMap::VIF0rec ); } void dVifReset(int idx) { - pxAssertDev(nVif[idx].recReserve, "Dynamic VIF recompiler reserve must be created prior to VIF use or reset!"); - if (!nVif[idx].vifBlocks) + if(!nVif[idx].vifBlocks) nVif[idx].vifBlocks = new HashBucket<_tParams>(); else nVif[idx].vifBlocks->clear(); nVif[idx].recReserve->Reset(); - nVif[idx].numBlocks = 0; - nVif[idx].recWritePtr = nVif[idx].recReserve->GetPtr(); + nVif[idx].numBlocks = 0; + nVif[idx].recWritePtr = nVif[idx].recReserve->GetPtr(); + //memset(nVif[idx].recWritePtr, 0xcc, nVif[idx].recReserveSizeMB * _1mb); } void dVifClose(int idx) { @@ -74,7 +72,8 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo } __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const { - const vifStruct& vif = v.idx ? vif1 : vif0; + const int idx = v.idx; + const vifStruct& vif = MTVU_VifX; u32 m0 = vB.mask; u32 m1 = m0 & 0xaaaaaaaa; @@ -126,7 +125,8 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { } void VifUnpackSSE_Dynarec::writeBackRow() const { - xMOVAPS(ptr128[&((v.idx ? vif1 : vif0).MaskRow)], xmmRow); + const int idx = v.idx; + xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow); DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]"); // ToDo: Do we need to write back to vifregs.rX too!? :/ } @@ -208,25 +208,25 @@ void VifUnpackSSE_Dynarec::CompileRoutine() { } _vifT static __fi u8* dVifsetVUptr(uint cl, uint wl, bool isFill) { - vifStruct& vif = GetVifX; - const VURegs& VU = vuRegs[idx]; - const uint vuMemLimit = idx ? 0x4000 : 0x1000; + nVifStruct& v = nVif[idx]; + vifStruct& vif = MTVU_VifX; + const VURegs& VU = vuRegs[idx]; + const uint vuMemLimit = idx ? 0x4000 : 0x1000; - u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit-0x10)); - u8* endmem = VU.Mem + vuMemLimit; - uint length = (_vBlock.num > 0) ? (_vBlock.num * 16) : 4096; // 0 = 256 + u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit-0x10)); + u8* endmem = VU.Mem + vuMemLimit; + uint length = (v.block.num > 0) ? (v.block.num * 16) : 4096; // 0 = 256 if (!isFill) { // Accounting for skipping mode: Subtract the last skip cycle, since the skipped part of the run // shouldn't count as wrapped data. Otherwise, a trailing skip can cause the emu to drop back // to the interpreter. -- Refraction (test with MGS3) - uint skipSize = (cl - wl) * 16; - uint blocks = _vBlock.num / wl; + uint blocks = v.block.num / wl; length += (blocks-1) * skipSize; } - if ( (startmem+length) <= endmem ) { + if ((startmem + length) <= endmem) { return startmem; } //Console.WriteLn("nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", v.idx, vif.tag.addr, vif.tag.addr + (_vBlock.num * 16), _vBlock.num, wl, cl); @@ -245,12 +245,12 @@ static __fi void dVifRecLimit(int idx) { } } -_vifT static __fi bool dVifExecuteUnpack(const u8* data, bool isFill) +_vifT static __ri bool dVifExecuteUnpack(const u8* data, bool isFill) { - const nVifStruct& v = nVif[idx]; - VIFregisters& vifRegs = vifXRegs; + nVifStruct& v = nVif[idx]; + VIFregisters& vifRegs = MTVU_VifXRegs; - if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) { + if (nVifBlock* b = v.vifBlocks->find(&v.block)) { if (u8* dest = dVifsetVUptr(vifRegs.cycle.cl, vifRegs.cycle.wl, isFill)) { //DevCon.WriteLn("Running Recompiled Block!"); ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data); @@ -266,39 +266,37 @@ _vifT static __fi bool dVifExecuteUnpack(const u8* data, bool isFill) _vifT __fi void dVifUnpack(const u8* data, bool isFill) { - const nVifStruct& v = nVif[idx]; - vifStruct& vif = GetVifX; - VIFregisters& vifRegs = vifXRegs; + nVifStruct& v = nVif[idx]; + vifStruct& vif = MTVU_VifX; + VIFregisters& vifRegs = MTVU_VifXRegs; - const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5); - const int doMask = isFill? 1 : (vif.cmd & 0x10); + const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5); + const int doMask = isFill? 1 : (vif.cmd & 0x10); - _vBlock.upkType = upkType; - _vBlock.num = (u8&)vifRegs.num; - _vBlock.mode = (u8&)vifRegs.mode; - _vBlock.cl = vifRegs.cycle.cl; - _vBlock.wl = vifRegs.cycle.wl; + v.block.upkType = upkType; + v.block.num = (u8&)vifRegs.num; + v.block.mode = (u8&)vifRegs.mode; + v.block.cl = vifRegs.cycle.cl; + v.block.wl = vifRegs.cycle.wl; // Zero out the mask parameter if it's unused -- games leave random junk // values here which cause false recblock cache misses. - _vBlock.mask = doMask ? vifRegs.mask : 0; + v.block.mask = doMask ? vifRegs.mask : 0; //DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++); //DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]", - // _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode, - // doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored" + // v.Block.num, v.Block.upkType, v.Block.scl, v.Block.cl, v.Block.wl, v.Block.mode, + // doMask >> 4, doMask ? wxsFormat( L"0x%08x", v.Block.mask ).c_str() : L"ignored" //); if (dVifExecuteUnpack(data, isFill)) return; xSetPtr(v.recWritePtr); - _vBlock.startPtr = (uptr)xGetAlignedCallTarget(); - v.vifBlocks->add(_vBlock); - VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine(); + v.block.startPtr = (uptr)xGetAlignedCallTarget(); + v.vifBlocks->add(v.block); + VifUnpackSSE_Dynarec(v, v.block).CompileRoutine(); nVif[idx].recWritePtr = xGetPtr(); - // [TODO] : Ideally we should test recompile buffer limits prior to each instruction, - // which would be safer and more memory efficient than using an 0.25 meg recEnd marker. dVifRecLimit(idx); // Run the block we just compiled. Various conditions may force us to still use diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp index c316818c2bcd7..c90843b0ee93b 100644 --- a/pcsx2/x86/newVif_Unpack.cpp +++ b/pcsx2/x86/newVif_Unpack.cpp @@ -21,6 +21,7 @@ #include "Common.h" #include "Vif_Dma.h" #include "newVif.h" +#include "MTVU.h" __aligned16 nVifStruct nVif[2]; @@ -75,7 +76,7 @@ nVifStruct::nVifStruct() vifBlocks = NULL; numBlocks = 0; - recReserveSizeMB = 8; + recReserveSizeMB = 8; } void reserveNewVif(int idx) @@ -87,8 +88,8 @@ void resetNewVif(int idx) // Safety Reset : Reassign all VIF structure info, just in case the VU1 pointers have // changed for some reason. - nVif[idx].idx = idx; - nVif[idx].bSize = 0; + nVif[idx].idx = idx; + nVif[idx].bSize = 0; memzero(nVif[idx].buffer); if (newVifDynaRec) dVifReset(idx); @@ -106,8 +107,8 @@ static __fi u8* getVUptr(uint idx, int offset) { _vifT int nVifUnpack(const u8* data) { - nVifStruct& v = nVif[idx]; - vifStruct& vif = GetVifX; + nVifStruct& v = nVif[idx]; + vifStruct& vif = GetVifX; VIFregisters& vifRegs = vifXRegs; const uint ret = aMin(vif.vifpacketsize, vif.tag.size); @@ -118,6 +119,7 @@ _vifT int nVifUnpack(const u8* data) { if (v.bSize) { // Last transfer was partial memcpy_fast(&v.buffer[v.bSize], data, size); v.bSize += size; + size = v.bSize; data = v.buffer; vif.cl = 0; @@ -125,8 +127,11 @@ _vifT int nVifUnpack(const u8* data) { if (!vifRegs.num) vifRegs.num = 256; } - if (newVifDynaRec) dVifUnpack(data, isFill); - else _nVifUnpack(idx, data, vifRegs.mode, isFill); + if (!idx || !THREAD_VU1) { + if (newVifDynaRec) dVifUnpack(data, isFill); + else _nVifUnpack(idx, data, vifRegs.mode, isFill); + } + else vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, size); vif.tag.size = 0; vif.cmd = 0; @@ -147,12 +152,10 @@ _vifT int nVifUnpack(const u8* data) { // We can optimize the calculation either way as some games have big partial chunks (Guitar Hero). // Skipping writes are easy, filling is a bit more complex, so for now until we can // be sure its right (if it happens) it just prints debug stuff and processes the old way. - if(!isFill) - { - vifRegs.num -= (size / vSize); + if (!isFill) { + vifRegs.num -= (size / vSize); } - else - { + else { int guessedsize = (size / vSize); guessedsize = vifRegs.num - (((guessedsize / vifRegs.cycle.cl) * (vifRegs.cycle.wl - vifRegs.cycle.cl)) + guessedsize); @@ -164,14 +167,11 @@ _vifT int nVifUnpack(const u8* data) { if (vif.cl <= vifRegs.cycle.cl) size -= vSize; else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0; } - else - { + else { size -= vSize; if (vif.cl >= vifRegs.cycle.wl) vif.cl = 0; } } - - DevCon.Warning("Fill!! Partial num left = %x, guessed %x", vifRegs.num, guessedsize); } } @@ -236,8 +236,8 @@ static void setMasks(const vifStruct& vif, const VIFregisters& v) { template< int idx, bool doMode, bool isFill > __ri void __fastcall _nVifUnpackLoop(const u8* data) { - vifStruct& vif = GetVifX; - VIFregisters& vifRegs = vifXRegs; + vifStruct& vif = MTVU_VifX; + VIFregisters& vifRegs = MTVU_VifXRegs; // skipSize used for skipping writes only const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16; @@ -253,8 +253,8 @@ __ri void __fastcall _nVifUnpackLoop(const u8* data) { //uint vn = (vif.cmd >> 2) & 0x3; //uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle - const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ]; - const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ]; + const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ]; + const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ]; pxAssume (vif.cl == 0); pxAssume (vifRegs.cycle.wl > 0); diff --git a/pcsx2/x86/sVU_Lower.cpp b/pcsx2/x86/sVU_Lower.cpp index 749c02814942c..391ea7741f0e0 100644 --- a/pcsx2/x86/sVU_Lower.cpp +++ b/pcsx2/x86/sVU_Lower.cpp @@ -26,7 +26,6 @@ #include "sVU_Micro.h" #include "sVU_Debug.h" #include "sVU_zerorec.h" -#include "Gif.h" #include "Gif_Unit.h" using namespace x86Emitter; diff --git a/pcsx2/x86/sVU_zerorec.cpp b/pcsx2/x86/sVU_zerorec.cpp index 6a1f93bfb9fa4..7bd57dee8e908 100644 --- a/pcsx2/x86/sVU_zerorec.cpp +++ b/pcsx2/x86/sVU_zerorec.cpp @@ -32,6 +32,7 @@ #include "GS.h" #include "Gif.h" #include "VU.h" +#include "MTVU.h" #include "R5900.h" #include "iR5900.h" @@ -456,15 +457,14 @@ void SuperVUReset(int vuindex) s_recVUPtr[vuindex] = *s_recVUMem[vuindex]; } -// clear the block and any joining blocks +// clear the block and any joining blocks (size given in bytes) static void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex) { vector::iterator itrange; list::iterator it = s_listVUHeaders[vuindex].begin(); - u32 endpc = startpc + ((size * 4 + 7) & ~7); // Adding this code to ensure size is always a multiple of 8, it can be simplified to startpc+size if size is always a multiple of 8 (cottonvibes) + u32 endpc = startpc + ((size + 7) & ~7); // Ensure size is a multiple of u64 (round up) while (it != s_listVUHeaders[vuindex].end()) { - // for every fn, check if it has code in the range for(itrange = (*it)->ranges.begin(); itrange != (*it)->ranges.end(); itrange++) { @@ -4641,11 +4641,13 @@ void recSuperVU1::Reserve() void recSuperVU1::Shutdown() throw() { + vu1Thread.WaitVU(); SuperVUDestroy( 1 ); } void recSuperVU1::Reset() { + vu1Thread.WaitVU(); SuperVUReset( 1 ); }