Compare commits

...

5 commits

Author SHA1 Message Date
mitaclaw 802c344f93 CachedInterpreter: InterpretAndCheckExceptions Callback
I tried making InterpretAndCheckExceptions test only the relevant exceptions (EXCEPTION_DSI, EXCEPTION_PROGRAM, or both) using templating, but didn't find a significant performance boost in it. As I am learning, the biggest bottleneck is the number of callbacks emitted, not usually the actual contents of them.
2024-05-09 21:04:26 -07:00
mitaclaw 2665921dce CachedInterpreter: Use CodeOp::canEndBlock
This was a bigger performance boost than I expected.
2024-05-09 21:04:26 -07:00
mitaclaw acb162eb10 CachedInterpreter: WritePC optimizations
WritePC is now needed far less, only for instructions that end the block. Unfortunately, WritePC still needs to update `PowerPCState::npc` to support the false path of conditional branch instructions. Both drawbacks should be smoothed over by optimized cached instructions in the future.
2024-05-09 21:04:26 -07:00
mitaclaw 1a3f3d7842 CachedInterpreter: Exception Check Callback Micro-Optimization
This saves two register pushes / pops.
2024-05-09 21:04:26 -07:00
mitaclaw c7f65d3dc9 Cached Interpreter 2.0
It now supports variable-sized data payloads and memory range freeing. It's a little faster, too.
2024-05-09 21:04:25 -07:00
11 changed files with 510 additions and 271 deletions

View file

@ -17,7 +17,7 @@ namespace Common
// having to prefix them with gen-> or something similar.
// Example implementation:
// class JIT : public CodeBlock<ARMXEmitter> {}
template <class T>
template <class T, bool executable = true>
class CodeBlock : public T
{
private:
@ -53,7 +53,10 @@ public:
{
region_size = size;
total_region_size = size;
region = static_cast<u8*>(Common::AllocateExecutableMemory(total_region_size));
if constexpr (executable)
region = static_cast<u8*>(Common::AllocateExecutableMemory(total_region_size));
else
region = static_cast<u8*>(Common::AllocateMemoryPages(total_region_size));
T::SetCodePtr(region, region + size);
}

View file

@ -481,8 +481,10 @@ add_library(core
PowerPC/BreakPoints.h
PowerPC/CachedInterpreter/CachedInterpreter.cpp
PowerPC/CachedInterpreter/CachedInterpreter.h
PowerPC/CachedInterpreter/InterpreterBlockCache.cpp
PowerPC/CachedInterpreter/InterpreterBlockCache.h
PowerPC/CachedInterpreter/CachedInterpreterBlockCache.cpp
PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h
PowerPC/CachedInterpreter/CachedInterpreterEmitter.cpp
PowerPC/CachedInterpreter/CachedInterpreterEmitter.h
PowerPC/ConditionRegister.cpp
PowerPC/ConditionRegister.h
PowerPC/Expression.cpp

View file

@ -6,6 +6,7 @@
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HLE/HLE.h"
#include "Core/HW/CPU.h"
@ -16,65 +17,7 @@
#include "Core/PowerPC/PowerPC.h"
#include "Core/System.h"
struct CachedInterpreter::Instruction
{
using CommonCallback = void (*)(UGeckoInstruction);
using ConditionalCallback = bool (*)(u32);
using InterpreterCallback = void (*)(Interpreter&, UGeckoInstruction);
using CachedInterpreterCallback = void (*)(CachedInterpreter&, UGeckoInstruction);
using ConditionalCachedInterpreterCallback = bool (*)(CachedInterpreter&, u32);
Instruction() {}
Instruction(const CommonCallback c, UGeckoInstruction i)
: common_callback(c), data(i.hex), type(Type::Common)
{
}
Instruction(const ConditionalCallback c, u32 d)
: conditional_callback(c), data(d), type(Type::Conditional)
{
}
Instruction(const InterpreterCallback c, UGeckoInstruction i)
: interpreter_callback(c), data(i.hex), type(Type::Interpreter)
{
}
Instruction(const CachedInterpreterCallback c, UGeckoInstruction i)
: cached_interpreter_callback(c), data(i.hex), type(Type::CachedInterpreter)
{
}
Instruction(const ConditionalCachedInterpreterCallback c, u32 d)
: conditional_cached_interpreter_callback(c), data(d),
type(Type::ConditionalCachedInterpreter)
{
}
enum class Type
{
Abort,
Common,
Conditional,
Interpreter,
CachedInterpreter,
ConditionalCachedInterpreter,
};
union
{
const CommonCallback common_callback = nullptr;
const ConditionalCallback conditional_callback;
const InterpreterCallback interpreter_callback;
const CachedInterpreterCallback cached_interpreter_callback;
const ConditionalCachedInterpreterCallback conditional_cached_interpreter_callback;
};
u32 data = 0;
Type type = Type::Abort;
};
CachedInterpreter::CachedInterpreter(Core::System& system) : JitBase(system)
CachedInterpreter::CachedInterpreter(Core::System& system) : JitBase(system), m_block_cache(*this)
{
}
@ -84,7 +27,8 @@ void CachedInterpreter::Init()
{
RefreshConfig();
m_code.reserve(CODE_SIZE / sizeof(Instruction));
AllocCodeSpace(CODE_SIZE);
ResetFreeMemoryRanges();
jo.enableBlocklink = false;
@ -100,11 +44,6 @@ void CachedInterpreter::Shutdown()
m_block_cache.Shutdown();
}
u8* CachedInterpreter::GetCodePtr()
{
return reinterpret_cast<u8*>(m_code.data() + m_code.size());
}
void CachedInterpreter::ExecuteOneBlock()
{
const u8* normal_entry = m_block_cache.Dispatch();
@ -114,40 +53,13 @@ void CachedInterpreter::ExecuteOneBlock()
return;
}
const Instruction* code = reinterpret_cast<const Instruction*>(normal_entry);
auto& interpreter = m_system.GetInterpreter();
for (; code->type != Instruction::Type::Abort; ++code)
for (auto& ppc_state = m_ppc_state;;)
{
switch (code->type)
{
case Instruction::Type::Common:
code->common_callback(UGeckoInstruction(code->data));
const auto callback = *reinterpret_cast<const AnyCallback*>(normal_entry);
if (const auto distance = callback(ppc_state, normal_entry + sizeof(callback)))
normal_entry += distance;
else
break;
case Instruction::Type::Conditional:
if (code->conditional_callback(code->data))
return;
break;
case Instruction::Type::Interpreter:
code->interpreter_callback(interpreter, UGeckoInstruction(code->data));
break;
case Instruction::Type::CachedInterpreter:
code->cached_interpreter_callback(*this, UGeckoInstruction(code->data));
break;
case Instruction::Type::ConditionalCachedInterpreter:
if (code->conditional_cached_interpreter_callback(*this, code->data))
return;
break;
default:
ERROR_LOG_FMT(POWERPC, "Unknown CachedInterpreter Instruction: {}",
static_cast<int>(code->type));
break;
}
}
}
@ -177,94 +89,98 @@ void CachedInterpreter::SingleStep()
ExecuteOneBlock();
}
void CachedInterpreter::EndBlock(CachedInterpreter& cached_interpreter, UGeckoInstruction data)
s32 CachedInterpreter::EndBlock(PowerPC::PowerPCState& ppc_state, const EndBlockOperands& operands)
{
auto& ppc_state = cached_interpreter.m_ppc_state;
const auto& [downcount, num_load_stores, num_fp_inst] = operands;
ppc_state.pc = ppc_state.npc;
ppc_state.downcount -= data.hex;
PowerPC::UpdatePerformanceMonitor(data.hex, 0, 0, ppc_state);
ppc_state.downcount -= downcount;
PowerPC::UpdatePerformanceMonitor(downcount, num_load_stores, num_fp_inst, ppc_state);
return 0;
}
void CachedInterpreter::UpdateNumLoadStoreInstructions(CachedInterpreter& cached_interpreter,
UGeckoInstruction data)
s32 CachedInterpreter::Interpret(PowerPC::PowerPCState& ppc_state,
const InterpretOperands& operands)
{
PowerPC::UpdatePerformanceMonitor(0, data.hex, 0, cached_interpreter.m_ppc_state);
const auto& [interpreter, func, current_pc, inst] = operands;
func(interpreter, inst);
return sizeof(AnyCallback) + sizeof(operands);
}
void CachedInterpreter::UpdateNumFloatingPointInstructions(CachedInterpreter& cached_interpreter,
UGeckoInstruction data)
s32 CachedInterpreter::InterpretAndCheckExceptions(
PowerPC::PowerPCState& ppc_state, const InterpretAndCheckExceptionsOperands& operands)
{
PowerPC::UpdatePerformanceMonitor(0, 0, data.hex, cached_interpreter.m_ppc_state);
const auto& [interpreter, func, current_pc, inst, power_pc, downcount] = operands;
func(interpreter, inst);
if ((ppc_state.Exceptions & (EXCEPTION_DSI | EXCEPTION_PROGRAM)) != 0)
{
ppc_state.pc = current_pc;
ppc_state.downcount -= downcount;
power_pc.CheckExceptions();
return 0;
}
return sizeof(AnyCallback) + sizeof(operands);
}
void CachedInterpreter::WritePC(CachedInterpreter& cached_interpreter, UGeckoInstruction data)
s32 CachedInterpreter::HLEFunction(PowerPC::PowerPCState& ppc_state,
const HLEFunctionOperands& operands)
{
auto& ppc_state = cached_interpreter.m_ppc_state;
ppc_state.pc = data.hex;
ppc_state.npc = data.hex + 4;
const auto& [system, current_pc, hook_index] = operands;
ppc_state.pc = current_pc;
HLE::Execute(Core::CPUThreadGuard{system}, current_pc, hook_index);
return sizeof(AnyCallback) + sizeof(operands);
}
void CachedInterpreter::WriteBrokenBlockNPC(CachedInterpreter& cached_interpreter,
UGeckoInstruction data)
s32 CachedInterpreter::WritePC(PowerPC::PowerPCState& ppc_state, const WritePCOperands& operands)
{
cached_interpreter.m_ppc_state.npc = data.hex;
const auto& [current_pc] = operands;
ppc_state.pc = current_pc;
ppc_state.npc = current_pc + 4;
return sizeof(AnyCallback) + sizeof(operands);
}
bool CachedInterpreter::CheckFPU(CachedInterpreter& cached_interpreter, u32 data)
s32 CachedInterpreter::WriteBrokenBlockNPC(PowerPC::PowerPCState& ppc_state,
const WritePCOperands& operands)
{
auto& ppc_state = cached_interpreter.m_ppc_state;
const auto& [current_pc] = operands;
ppc_state.npc = current_pc;
return sizeof(AnyCallback) + sizeof(operands);
}
s32 CachedInterpreter::CheckFPU(PowerPC::PowerPCState& ppc_state, const CheckFPUOperands& operands)
{
const auto& [power_pc, current_pc, downcount] = operands;
if (!ppc_state.msr.FP)
{
ppc_state.pc = current_pc;
ppc_state.downcount -= downcount;
ppc_state.Exceptions |= EXCEPTION_FPU_UNAVAILABLE;
cached_interpreter.m_system.GetPowerPC().CheckExceptions();
ppc_state.downcount -= data;
return true;
power_pc.CheckExceptions();
return 0;
}
return false;
return sizeof(AnyCallback) + sizeof(operands);
}
bool CachedInterpreter::CheckDSI(CachedInterpreter& cached_interpreter, u32 data)
s32 CachedInterpreter::CheckBreakpoint(PowerPC::PowerPCState& ppc_state,
const CheckBreakpointOperands& operands)
{
auto& ppc_state = cached_interpreter.m_ppc_state;
if (ppc_state.Exceptions & EXCEPTION_DSI)
const auto& [power_pc, cpu_state, current_pc, downcount] = operands;
ppc_state.pc = current_pc;
if (power_pc.CheckBreakPoints(); *cpu_state != CPU::State::Running)
{
cached_interpreter.m_system.GetPowerPC().CheckExceptions();
ppc_state.downcount -= data;
return true;
// Accessing PowerPCState through power_pc instead of ppc_state produces better assembly.
power_pc.GetPPCState().downcount -= downcount;
return 0;
}
return false;
return sizeof(AnyCallback) + sizeof(operands);
}
bool CachedInterpreter::CheckProgramException(CachedInterpreter& cached_interpreter, u32 data)
s32 CachedInterpreter::CheckIdle(PowerPC::PowerPCState& ppc_state,
const CheckIdleOperands& operands)
{
auto& ppc_state = cached_interpreter.m_ppc_state;
if (ppc_state.Exceptions & EXCEPTION_PROGRAM)
{
cached_interpreter.m_system.GetPowerPC().CheckExceptions();
ppc_state.downcount -= data;
return true;
}
return false;
}
bool CachedInterpreter::CheckBreakpoint(CachedInterpreter& cached_interpreter, u32 data)
{
cached_interpreter.m_system.GetPowerPC().CheckBreakPoints();
if (cached_interpreter.m_system.GetCPU().GetState() != CPU::State::Running)
{
cached_interpreter.m_ppc_state.downcount -= data;
return true;
}
return false;
}
bool CachedInterpreter::CheckIdle(CachedInterpreter& cached_interpreter, u32 idle_pc)
{
if (cached_interpreter.m_ppc_state.npc == idle_pc)
{
cached_interpreter.m_system.GetCoreTiming().Idle();
}
return false;
const auto& [core_timing, idle_pc] = operands;
if (ppc_state.npc == idle_pc)
core_timing.Idle();
return sizeof(AnyCallback) + sizeof(operands);
}
bool CachedInterpreter::HandleFunctionHooking(u32 address)
@ -275,27 +191,56 @@ bool CachedInterpreter::HandleFunctionHooking(u32 address)
if (!result)
return false;
m_code.emplace_back(WritePC, address);
m_code.emplace_back(Interpreter::HLEFunction, result.hook_index);
Write(HLEFunction, {m_system, address, result.hook_index});
if (result.type != HLE::HookType::Replace)
return false;
m_code.emplace_back(EndBlock, js.downcountAmount);
m_code.emplace_back();
js.downcountAmount += js.st.numCycles;
Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
return true;
}
void CachedInterpreter::Jit(u32 address)
bool CachedInterpreter::SetEmitterStateToFreeCodeRegion()
{
if (m_code.size() >= CODE_SIZE / sizeof(Instruction) - 0x1000 ||
SConfig::GetInstance().bJITNoBlockCache)
const auto free = m_free_ranges.by_size_begin();
if (free == m_free_ranges.by_size_end())
{
WARN_LOG_FMT(DYNA_REC, "Failed to find free memory region in code region.");
return false;
}
SetCodePtr(free.from(), free.to());
return true;
}
void CachedInterpreter::FreeRanges()
{
for (const auto& [from, to] : m_block_cache.GetRangesToFree())
m_free_ranges.insert(from, to);
m_block_cache.ClearRangesToFree();
}
void CachedInterpreter::ResetFreeMemoryRanges()
{
m_free_ranges.clear();
m_free_ranges.insert(region, region + region_size);
}
void CachedInterpreter::Jit(u32 em_address)
{
Jit(em_address, true);
}
void CachedInterpreter::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
{
if (IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
{
ClearCache();
}
FreeRanges();
const u32 nextPC =
analyzer.Analyze(m_ppc_state.pc, &code_block, &m_code_buffer, m_code_buffer.size());
analyzer.Analyze(em_address, &code_block, &m_code_buffer, m_code_buffer.size());
if (code_block.m_memory_exception)
{
// Address of instruction could not be translated
@ -306,9 +251,46 @@ void CachedInterpreter::Jit(u32 address)
return;
}
JitBlock* b = m_block_cache.AllocateBlock(m_ppc_state.pc);
if (SetEmitterStateToFreeCodeRegion())
{
JitBlock* b = m_block_cache.AllocateBlock(em_address);
b->normalEntry = b->near_begin = GetWritableCodePtr();
js.blockStart = m_ppc_state.pc;
if (DoJit(em_address, b, nextPC))
{
// Record what memory region was used so we know what to free if this block gets invalidated.
b->near_end = GetWritableCodePtr();
b->far_begin = b->far_end = nullptr;
b->codeSize = static_cast<u32>(GetCodePtr() - b->normalEntry);
b->originalSize = code_block.m_num_instructions;
// Mark the memory region that this code block uses in the RangeSizeSet.
if (b->near_begin != b->near_end)
m_free_ranges.erase(b->near_begin, b->near_end);
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
return;
}
}
if (clear_cache_and_retry_on_failure)
{
WARN_LOG_FMT(DYNA_REC, "flushing code caches, please report if this happens a lot");
ClearCache();
Jit(em_address, false);
return;
}
PanicAlertFmtT("JIT failed to find code space after a cache clear. This should never happen. "
"Please report this incident on the bug tracker. Dolphin will now exit.");
std::exit(-1);
}
bool CachedInterpreter::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
{
js.blockStart = em_address;
js.firstFPInstructionFound = false;
js.fifoBytesSinceCheck = 0;
js.downcountAmount = 0;
@ -316,85 +298,81 @@ void CachedInterpreter::Jit(u32 address)
js.numFloatingPointInst = 0;
js.curBlock = b;
b->normalEntry = b->near_begin = GetCodePtr();
auto& interpreter = m_system.GetInterpreter();
auto& power_pc = m_system.GetPowerPC();
auto& cpu = m_system.GetCPU();
auto& breakpoints = power_pc.GetBreakPoints();
for (u32 i = 0; i < code_block.m_num_instructions; i++)
{
PPCAnalyst::CodeOp& op = m_code_buffer[i];
js.op = &op;
js.compilerPC = op.address;
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
js.downcountAmount += op.opinfo->num_cycles;
if (op.opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
if (op.opinfo->flags & FL_USE_FPU)
++js.numFloatingPointInst;
if (HandleFunctionHooking(op.address))
if (HandleFunctionHooking(js.compilerPC))
break;
if (!op.skip)
{
const bool breakpoint =
m_enable_debugging &&
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address);
const bool check_fpu = (op.opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound;
const bool endblock = (op.opinfo->flags & FL_ENDBLOCK) != 0;
const bool memcheck = (op.opinfo->flags & FL_LOADSTORE) && jo.memcheck;
const bool check_program_exception = !endblock && ShouldHandleFPExceptionForInstruction(&op);
const bool idle_loop = op.branchIsIdleLoop;
if (breakpoint || check_fpu || endblock || memcheck || check_program_exception)
m_code.emplace_back(WritePC, op.address);
if (breakpoint)
m_code.emplace_back(CheckBreakpoint, js.downcountAmount);
if (check_fpu)
if (IsDebuggingEnabled() && !cpu.IsStepping() &&
breakpoints.IsAddressBreakPoint(js.compilerPC))
{
m_code.emplace_back(CheckFPU, js.downcountAmount);
Write(CheckBreakpoint, {power_pc, cpu.GetStatePtr(), js.compilerPC, js.downcountAmount});
}
if (!js.firstFPInstructionFound && (op.opinfo->flags & FL_USE_FPU) != 0)
{
Write(CheckFPU, {power_pc, js.compilerPC, js.downcountAmount});
js.firstFPInstructionFound = true;
}
m_code.emplace_back(Interpreter::GetInterpreterOp(op.inst), op.inst);
if (memcheck)
m_code.emplace_back(CheckDSI, js.downcountAmount);
if (check_program_exception)
m_code.emplace_back(CheckProgramException, js.downcountAmount);
if (idle_loop)
m_code.emplace_back(CheckIdle, js.blockStart);
if (endblock)
if (op.canEndBlock)
Write(WritePC, {js.compilerPC});
// Instruction may cause a DSI Exception or Program Exception.
if ((jo.memcheck && (op.opinfo->flags & FL_LOADSTORE) != 0) ||
(!op.canEndBlock && ShouldHandleFPExceptionForInstruction(&op)))
{
m_code.emplace_back(EndBlock, js.downcountAmount);
if (js.numLoadStoreInst != 0)
m_code.emplace_back(UpdateNumLoadStoreInstructions, js.numLoadStoreInst);
if (js.numFloatingPointInst != 0)
m_code.emplace_back(UpdateNumFloatingPointInstructions, js.numFloatingPointInst);
Write(InterpretAndCheckExceptions, {interpreter, Interpreter::GetInterpreterOp(op.inst),
js.compilerPC, op.inst, power_pc, js.downcountAmount});
}
else
{
Write(Interpret,
{interpreter, Interpreter::GetInterpreterOp(op.inst), js.compilerPC, op.inst});
}
if (op.branchIsIdleLoop)
Write(CheckIdle, {m_system.GetCoreTiming(), js.blockStart});
if (op.canEndBlock)
Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
}
}
if (code_block.m_broken)
{
m_code.emplace_back(WriteBrokenBlockNPC, nextPC);
m_code.emplace_back(EndBlock, js.downcountAmount);
if (js.numLoadStoreInst != 0)
m_code.emplace_back(UpdateNumLoadStoreInstructions, js.numLoadStoreInst);
if (js.numFloatingPointInst != 0)
m_code.emplace_back(UpdateNumFloatingPointInstructions, js.numFloatingPointInst);
Write(WriteBrokenBlockNPC, {nextPC});
Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
}
m_code.emplace_back();
b->near_end = GetCodePtr();
b->far_begin = nullptr;
b->far_end = nullptr;
b->codeSize = static_cast<u32>(GetCodePtr() - b->normalEntry);
b->originalSize = code_block.m_num_instructions;
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
if (HasWriteFailed())
{
WARN_LOG_FMT(DYNA_REC, "JIT ran out of space in code region during code generation.");
return false;
}
return true;
}
void CachedInterpreter::ClearCache()
{
m_code.clear();
m_block_cache.Clear();
m_block_cache.ClearRangesToFree();
ClearCodeSpace();
ResetFreeMemoryRanges();
RefreshConfig();
}

View file

@ -3,14 +3,27 @@
#pragma once
#include <vector>
#include <cstddef>
#include <rangeset/rangesizeset.h>
#include "Common/CommonTypes.h"
#include "Core/PowerPC/CachedInterpreter/InterpreterBlockCache.h"
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h"
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/PPCAnalyst.h"
class CachedInterpreter : public JitBase
namespace CoreTiming
{
class CoreTimingManager;
}
namespace CPU
{
enum class State;
}
class Interpreter;
class CachedInterpreter : public JitBase, public CachedInterpreterCodeBlock
{
public:
explicit CachedInterpreter(Core::System& system);
@ -30,32 +43,106 @@ public:
void SingleStep() override;
void Jit(u32 address) override;
void Jit(u32 address, bool clear_cache_and_retry_on_failure);
bool DoJit(u32 address, JitBlock* b, u32 nextPC);
JitBaseBlockCache* GetBlockCache() override { return &m_block_cache; }
const char* GetName() const override { return "Cached Interpreter"; }
const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; }
private:
struct Instruction;
u8* GetCodePtr();
void ExecuteOneBlock();
bool HandleFunctionHooking(u32 address);
static void EndBlock(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
static void UpdateNumLoadStoreInstructions(CachedInterpreter& cached_interpreter,
UGeckoInstruction data);
static void UpdateNumFloatingPointInstructions(CachedInterpreter& cached_interpreter,
UGeckoInstruction data);
static void WritePC(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
static void WriteBrokenBlockNPC(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
static bool CheckFPU(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckDSI(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckProgramException(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckBreakpoint(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckIdle(CachedInterpreter& cached_interpreter, u32 idle_pc);
// Finds a free memory region and sets the code emitter to point at that region.
// Returns false if no free memory region can be found.
bool SetEmitterStateToFreeCodeRegion();
BlockCache m_block_cache{*this};
std::vector<Instruction> m_code;
void FreeRanges();
void ResetFreeMemoryRanges();
struct EndBlockOperands;
struct InterpretOperands;
struct InterpretAndCheckExceptionsOperands;
struct HLEFunctionOperands;
struct WritePCOperands;
struct CheckFPUOperands;
struct CheckBreakpointOperands;
struct CheckIdleOperands;
static s32 EndBlock(PowerPC::PowerPCState& ppc_state, const EndBlockOperands& operands);
static s32 Interpret(PowerPC::PowerPCState& ppc_state, const InterpretOperands& operands);
static s32 InterpretAndCheckExceptions(PowerPC::PowerPCState& ppc_state,
const InterpretAndCheckExceptionsOperands& operands);
static s32 HLEFunction(PowerPC::PowerPCState& ppc_state, const HLEFunctionOperands& operands);
static s32 WritePC(PowerPC::PowerPCState& ppc_state, const WritePCOperands& operands);
static s32 WriteBrokenBlockNPC(PowerPC::PowerPCState& ppc_state, const WritePCOperands& operands);
static s32 CheckFPU(PowerPC::PowerPCState& ppc_state, const CheckFPUOperands& operands);
static s32 CheckBreakpoint(PowerPC::PowerPCState& ppc_state,
const CheckBreakpointOperands& operands);
static s32 CheckIdle(PowerPC::PowerPCState& ppc_state, const CheckIdleOperands& operands);
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges;
CachedInterpreterBlockCache m_block_cache;
};
struct CachedInterpreter::EndBlockOperands
{
u32 downcount;
u32 num_load_stores;
u32 num_fp_inst;
u32 : 32;
};
struct CachedInterpreter::InterpretOperands
{
Interpreter& interpreter;
void (*func)(Interpreter&, UGeckoInstruction); // Interpreter::Instruction
u32 current_pc;
UGeckoInstruction inst;
};
struct CachedInterpreter::InterpretAndCheckExceptionsOperands
{
Interpreter& interpreter;
void (*func)(Interpreter&, UGeckoInstruction); // Interpreter::Instruction
u32 current_pc;
UGeckoInstruction inst;
PowerPC::PowerPCManager& power_pc;
u32 downcount;
};
struct CachedInterpreter::HLEFunctionOperands
{
Core::System& system;
u32 current_pc;
u32 hook_index;
};
struct CachedInterpreter::WritePCOperands
{
u32 current_pc;
u32 : 32;
};
struct CachedInterpreter::CheckFPUOperands
{
PowerPC::PowerPCManager& power_pc;
u32 current_pc;
u32 downcount;
};
struct CachedInterpreter::CheckBreakpointOperands
{
PowerPC::PowerPCManager& power_pc;
const CPU::State* cpu_state;
u32 current_pc;
u32 downcount;
};
struct CachedInterpreter::CheckIdleOperands
{
CoreTiming::CoreTimingManager& core_timing;
u32 idle_pc;
};

View file

@ -0,0 +1,41 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h"
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
CachedInterpreterBlockCache::CachedInterpreterBlockCache(JitBase& jit) : JitBaseBlockCache{jit}
{
}
void CachedInterpreterBlockCache::Init()
{
JitBaseBlockCache::Init();
ClearRangesToFree();
}
void CachedInterpreterBlockCache::DestroyBlock(JitBlock& block)
{
JitBaseBlockCache::DestroyBlock(block);
if (block.near_begin != block.near_end)
m_ranges_to_free_on_next_codegen.emplace_back(block.near_begin, block.near_end);
}
void CachedInterpreterBlockCache::ClearRangesToFree()
{
m_ranges_to_free_on_next_codegen.clear();
}
void CachedInterpreterBlockCache::WriteLinkBlock(const JitBlock::LinkData& source,
const JitBlock* dest)
{
}
void CachedInterpreterBlockCache::WriteDestroyBlock(const JitBlock& block)
{
CachedInterpreterEmitter emitter(block.normalEntry, block.near_end);
emitter.Write(CachedInterpreterEmitter::PoisonCallback);
}

View file

@ -0,0 +1,35 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <utility>
#include <vector>
#include "Common/CommonTypes.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
class JitBase;
class CachedInterpreterBlockCache final : public JitBaseBlockCache
{
public:
explicit CachedInterpreterBlockCache(JitBase& jit);
void Init() override;
void DestroyBlock(JitBlock& block) override;
void ClearRangesToFree();
const std::vector<std::pair<u8*, u8*>>& GetRangesToFree() const
{
return m_ranges_to_free_on_next_codegen;
};
private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
void WriteDestroyBlock(const JitBlock& block) override;
std::vector<std::pair<u8*, u8*>> m_ranges_to_free_on_next_codegen;
};

View file

@ -0,0 +1,38 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
#include <algorithm>
#include <cstring>
#include "Common/Assert.h"
#include "Common/MsgHandler.h"
void CachedInterpreterEmitter::Write(AnyCallback callback, const void* operands, std::size_t size)
{
DEBUG_ASSERT(reinterpret_cast<std::uintptr_t>(m_code) % alignof(AnyCallback) == 0);
if (m_code + sizeof(callback) + size >= m_code_end)
{
m_write_failed = true;
return;
}
std::memcpy(m_code, &callback, sizeof(callback));
m_code += sizeof(callback);
std::memcpy(m_code, operands, size);
m_code += size;
}
s32 CachedInterpreterEmitter::PoisonCallback(PowerPC::PowerPCState& ppc_state, const void* operands)
{
PanicAlertFmtT("The Cached Interpreter reached a poisoned callback. This should never happen!");
return 0;
}
void CachedInterpreterCodeBlock::PoisonMemory()
{
DEBUG_ASSERT(reinterpret_cast<std::uintptr_t>(region) % alignof(AnyCallback) == 0);
DEBUG_ASSERT(region_size % sizeof(AnyCallback) == 0);
std::fill(reinterpret_cast<AnyCallback*>(region),
reinterpret_cast<AnyCallback*>(region + region_size), AnyCallbackCast(PoisonCallback));
}

View file

@ -0,0 +1,84 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <type_traits>
#include "Common/CodeBlock.h"
#include "Common/CommonTypes.h"
namespace PowerPC
{
struct PowerPCState;
}
class CachedInterpreterEmitter
{
protected:
// The const void* parameter is a type-erased reference to operands. The return value of most
// callbacks is the distance in memory to the next callback. If a callback returns 0, the block
// will be exited. The return value is signed to support block-linking. 32-bit return values seem
// to perform better than 64-bit ones.
using AnyCallback = s32 (*)(PowerPC::PowerPCState& ppc_state, const void* operands);
template <class Operands>
static AnyCallback AnyCallbackCast(s32 (*callback)(PowerPC::PowerPCState&, const Operands&))
{
return reinterpret_cast<AnyCallback>(callback);
}
static AnyCallback AnyCallbackCast(AnyCallback callback) { return callback; }
private:
void Write(AnyCallback callback, const void* operands, std::size_t size);
public:
CachedInterpreterEmitter() = default;
explicit CachedInterpreterEmitter(u8* begin, u8* end) : m_code(begin), m_code_end(end) {}
template <class Operands>
void Write(s32 (*callback)(PowerPC::PowerPCState& ppc_state, const Operands&),
const Operands& operands)
{
// I would use std::is_trivial_v, but almost every operands struct uses
// references instead of pointers to make the callback functions nicer.
static_assert(
std::is_trivially_copyable_v<Operands> && std::is_trivially_destructible_v<Operands> &&
alignof(Operands) <= alignof(AnyCallback) && sizeof(Operands) % alignof(AnyCallback) == 0);
Write(AnyCallbackCast(callback), &operands, sizeof(Operands));
}
void Write(AnyCallback callback) { Write(callback, nullptr, 0); }
const u8* GetCodePtr() const { return m_code; }
u8* GetWritableCodePtr() { return m_code; }
const u8* GetCodeEnd() const { return m_code_end; };
u8* GetWritableCodeEnd() { return m_code_end; };
// Should be checked after a block of code has been generated to see if the code has been
// successfully written to memory. Do not call the generated code when this returns true!
bool HasWriteFailed() const { return m_write_failed; }
void SetCodePtr(u8* begin, u8* end)
{
m_code = begin;
m_code_end = end;
m_write_failed = false;
};
static s32 PoisonCallback(PowerPC::PowerPCState& ppc_state, const void* operands);
private:
// Pointer to memory where code will be emitted to.
u8* m_code = nullptr;
// Pointer past the end of the memory region we're allowed to emit to.
// Writes that would reach this memory are refused and will set the m_write_failed flag instead.
u8* m_code_end = nullptr;
// Set to true when a write request happens that would write past m_code_end.
// Must be cleared with SetCodePtr() afterwards.
bool m_write_failed = false;
};
class CachedInterpreterCodeBlock : public Common::CodeBlock<CachedInterpreterEmitter, false>
{
private:
void PoisonMemory() override;
};

View file

@ -1,14 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/CachedInterpreter/InterpreterBlockCache.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
BlockCache::BlockCache(JitBase& jit) : JitBaseBlockCache{jit}
{
}
void BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest)
{
}

View file

@ -1,17 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "Core/PowerPC/JitCommon/JitCache.h"
class JitBase;
class BlockCache final : public JitBaseBlockCache
{
public:
explicit BlockCache(JitBase& jit);
private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
};

View file

@ -427,7 +427,8 @@
<ClInclude Include="Core\PatchEngine.h" />
<ClInclude Include="Core\PowerPC\BreakPoints.h" />
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.h" />
<ClInclude Include="Core\PowerPC\CachedInterpreter\InterpreterBlockCache.h" />
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreterBlockCache.h" />
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreterEmitter.h" />
<ClInclude Include="Core\PowerPC\ConditionRegister.h" />
<ClInclude Include="Core\PowerPC\CPUCoreBase.h" />
<ClInclude Include="Core\PowerPC\Expression.h" />
@ -1087,7 +1088,8 @@
<ClCompile Include="Core\PatchEngine.cpp" />
<ClCompile Include="Core\PowerPC\BreakPoints.cpp" />
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.cpp" />
<ClCompile Include="Core\PowerPC\CachedInterpreter\InterpreterBlockCache.cpp" />
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreterBlockCache.cpp" />
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreterEmitter.cpp" />
<ClCompile Include="Core\PowerPC\ConditionRegister.cpp" />
<ClCompile Include="Core\PowerPC\Expression.cpp" />
<ClCompile Include="Core\PowerPC\GDBStub.cpp" />