Add branch delay slots to VUs and squash some bugs

- added branch delay slots to the VUs (todo: different ANDs for VU0 and VU1, VU0 has smaller micromem)
- fixed imm15 (forgot to lshift DEST by 11 bits)
- removed various warnings (heck, even fixed one bug!)
- extended the visibility of the members in BranchDelaySlot to protected
- [experimental] branching in branch delay slots
This commit is contained in:
hch12907 2018-08-26 01:21:30 +08:00
parent cabefebec9
commit f4abbb78f9
9 changed files with 173 additions and 22 deletions

View file

@ -81,6 +81,7 @@ set(COMMON_SRC_FILES
"${CMAKE_SOURCE_DIR}/liborbum/src/Controller/Ee/Timers/CEeTimers.hpp"
"${CMAKE_SOURCE_DIR}/liborbum/src/Controller/Ee/Vpu/Vif/CVif.cpp"
"${CMAKE_SOURCE_DIR}/liborbum/src/Controller/Ee/Vpu/Vif/CVif.hpp"
"${CMAKE_SOURCE_DIR}/liborbum/src/Controller/Ee/Vpu/Vu/VuBranchDelaySlot.hpp"
"${CMAKE_SOURCE_DIR}/liborbum/src/Controller/Ee/Vpu/Vu/Interpreter/CVuInterpreter.cpp"
"${CMAKE_SOURCE_DIR}/liborbum/src/Controller/Ee/Vpu/Vu/Interpreter/CVuInterpreter.hpp"
"${CMAKE_SOURCE_DIR}/liborbum/src/Controller/Ee/Vpu/Vu/Interpreter/CVuInterpreter_CONVERT.cpp"

View file

@ -177,6 +177,9 @@ struct Constants
struct VPU
{
// A VU Instruction is 8 bytes long
static constexpr const int SIZE_VU_INSTRUCTION = 8;
struct VIF
{
// Number of instructions is based off the CMD field in the VIFcode, without the leading interrupt bit and the m bit in the UNPACK instructions. See EE Users Manual page 87.

View file

@ -76,7 +76,7 @@ public:
return current_slot > 0;
}
private:
protected:
size_t current_slot;
uptr branch_pc;

View file

@ -4,6 +4,7 @@
#include "Controller/Ee/Vpu/Vu/Interpreter/CVuInterpreter.hpp"
#include "Core.hpp"
#include "Resources/RResources.hpp"
#include "Utilities/Utilities.hpp"
// Miscellaneous VU instructions.
// Includes:-
@ -44,7 +45,7 @@ void CVuInterpreter::RINIT(VuUnit_Base* unit, const VuInstruction inst)
// Writes a float consisting 23 bits of R as mantissa and 001111111 as exp+sign.
constexpr uword append = 0b001111111 << 23;
const f32 fsf = (reg_source.read_uword(inst.fsf()) & 0x7FFFFF) | append;
const uword fsf = (reg_source.read_uword(inst.fsf()) & 0x7FFFFF) | append;
reg_dest.write_uword(fsf);
}
@ -109,7 +110,8 @@ void CVuInterpreter::IBEQ(VuUnit_Base* unit, const VuInstruction inst)
if (reg_source_1.read_uhword() == reg_source_2.read_uhword())
{
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
}
@ -119,7 +121,8 @@ void CVuInterpreter::IBGEZ(VuUnit_Base* unit, const VuInstruction inst)
if (reg_source_1.read_uhword() >= 0)
{
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
}
@ -129,7 +132,8 @@ void CVuInterpreter::IBGTZ(VuUnit_Base* unit, const VuInstruction inst)
if (reg_source_1.read_uhword() > 0)
{
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
}
@ -139,7 +143,8 @@ void CVuInterpreter::IBLEZ(VuUnit_Base* unit, const VuInstruction inst)
if (reg_source_1.read_uhword() <= 0)
{
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
}
@ -149,7 +154,8 @@ void CVuInterpreter::IBLTZ(VuUnit_Base* unit, const VuInstruction inst)
if (reg_source_1.read_uhword() < 0)
{
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
}
@ -160,29 +166,43 @@ void CVuInterpreter::IBNE(VuUnit_Base* unit, const VuInstruction inst)
if (reg_source_1.read_uhword() != reg_source_2.read_uhword())
{
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
}
void CVuInterpreter::B(VuUnit_Base* unit, const VuInstruction inst)
{
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
void CVuInterpreter::BAL(VuUnit_Base* unit, const VuInstruction inst)
{
SizedHwordRegister& reg_dest = unit->vi[inst.it()];
constexpr uword next_addr_offset = 16;
reg_dest.write_uhword(unit->pc.read_uword() + next_addr_offset);
unit->pc.offset(inst.imm11() * NUMBER_BYTES_IN_DWORD);
if (unit->bdelay.is_branch_pending())
{
// If there is a pending branch, the linked register becomes the
// second instruction of said branch
reg_dest.write_uhword((unit->bdelay.get_branch_pc() + 8) / 8);
}
else
{
// Otherwise, the linked register is the instruction next to the
// branch delay slot
reg_dest.write_uhword((unit->pc.read_uword() + 2 * 8) / 8);
}
shword offset_by = extend_integer<shword, uhword, 11>(inst.imm11());
unit->bdelay.set_branch_itype(unit->pc, offset_by);
}
void CVuInterpreter::JR(VuUnit_Base* unit, const VuInstruction inst)
{
SizedHwordRegister& reg_source_1 = unit->vi[inst.is()];
unit->pc.write_uword(reg_source_1.read_uhword());
unit->bdelay.set_branch_jtype(unit->pc, reg_source_1.read_uhword());
}
void CVuInterpreter::JALR(VuUnit_Base* unit, const VuInstruction inst)
@ -190,9 +210,20 @@ void CVuInterpreter::JALR(VuUnit_Base* unit, const VuInstruction inst)
SizedHwordRegister& reg_source_1 = unit->vi[inst.is()];
SizedHwordRegister& reg_dest = unit->vi[inst.it()];
constexpr uword next_addr_offset = 16;
reg_dest.write_uhword(unit->pc.read_uword() + next_addr_offset);
unit->pc.write_uword(reg_source_1.read_uhword());
if (unit->bdelay.is_branch_pending())
{
// If there is a pending branch, the linked register becomes the
// second instruction of said branch
reg_dest.write_uhword((unit->bdelay.get_branch_pc() + 8) / 8);
}
else
{
// Otherwise, the linked register is the instruction next to the
// branch delay slot
reg_dest.write_uhword((unit->pc.read_uword() + 2 * 8) / 8);
}
unit->bdelay.set_branch_jtype(unit->pc, reg_source_1.read_uhword());
}
void CVuInterpreter::WAITP(VuUnit_Base* unit, const VuInstruction inst)

View file

@ -67,7 +67,7 @@ void CVuInterpreter::LQ(VuUnit_Base* unit, const VuInstruction inst)
if (inst.test_dest_field(field))
{
// Investigate?: Endianness scares me
reg_dest.write_float(source.uw[field], field);
reg_dest.write_uword(field, source.uw[field]);
}
}
}
@ -88,7 +88,7 @@ void CVuInterpreter::LQD(VuUnit_Base* unit, const VuInstruction inst)
if (inst.test_dest_field(field))
{
// Investigate?: Endianness scares me
reg_dest.write_float(source.uw[field], field);
reg_dest.write_uword(field, source.uw[field]);
}
}
}
@ -106,7 +106,7 @@ void CVuInterpreter::LQI(VuUnit_Base* unit, const VuInstruction inst)
if (inst.test_dest_field(field))
{
// Investigate?: Endianness scares me
reg_dest.write_float(source.uw[field], field);
reg_dest.write_uword(field, source.uw[field]);
}
}

View file

@ -0,0 +1,113 @@
#pragma once
#include <cereal/cereal.hpp>
#include "Common/Types/Mips/BranchDelaySlot.hpp"
#include "Common/Types/Primitive.hpp"
#include "Common/Types/Register/PcRegisters.hpp"
/// BranchDelaySlot, modified slightly for the VUs.
/// See BranchDelaySlot for more documentation.
template <size_t slots = 1>
class VuBranchDelaySlot : public BranchDelaySlot<slots>
{
public:
VuBranchDelaySlot() :
second_branch_pc(0),
second_branch_pending(false),
BranchDelaySlot<slots>()
{
}
/// is_branch_pending() in VuBranchDelaySlot has the same behaviour as the
/// one in BranchDelaySlot.
using BranchDelaySlot<slots>::is_branch_pending;
/// Obtains the PC of which the VU is branching to.
/// This is used by the *AL* (link register) branching/jumping instructions.
uptr get_branch_pc()
{
return branch_pc;
}
/// Sets the offset of the PC address of the VUs, with a delay slot.
/// See BranchDelaySlot::set_branch_itype for more documentation.
/// The only difference is the size of the instruction - VU instructions are 8 bytes long
/// whereas EE Core instructions are only 4.
void set_branch_itype(WordPcRegister& pc, const shword imm)
{
// If we are branching in the delay slot, the original branch runs for only one cycle
if (is_branch_pending())
{
second_branch_pending = true;
second_branch_pc = (pc.read_uword() + Constants::EE::VPU::SIZE_VU_INSTRUCTION + imm * 8) & 0x3FFF;
}
else
{
current_slot = slots + 1;
// VU can hold 16KB of instructions only (and 4KB in VU0), so AND it with 0x3FFF
branch_pc = (pc.read_uword() + Constants::EE::VPU::SIZE_VU_INSTRUCTION + imm * 8) & 0x3FFF;
}
}
/// Sets the PC address of the VUs, with a delay slot.
/// The VUs' jumping range is much smaller than the EE Core's
/// (just 16KB is sufficient for the VUs), and so this method behaves
/// differently from the original set_branch_jtype.
void set_branch_jtype(WordPcRegister& pc, const uptr jump_to)
{
if (is_branch_pending())
{
second_branch_pending = true;
second_branch_pc = (jump_to * 8) & 0x3FFF;
}
else
{
current_slot = slots + 1;
branch_pc = (jump_to * 8) & 0x3FFF;
}
}
void advance_pc(WordPcRegister& pc)
{
if (current_slot)
{
current_slot--;
if (!current_slot)
{
pc.write_uword(branch_pc);
if (second_branch_pending)
{
second_branch_pending = false;
branch_pc = second_branch_pc;
current_slot = 1;
}
return;
}
}
pc.offset(Constants::EE::VPU::SIZE_VU_INSTRUCTION);
}
private:
// introduces the base class variables to this class
using BranchDelaySlot<slots>::branch_pc;
using BranchDelaySlot<slots>::current_slot;
uptr second_branch_pc;
bool second_branch_pending;
// Serialization
public:
template<class Archive>
void serialize(Archive & archive)
{
archive(
CEREAL_NVP(current_slot),
CEREAL_NVP(branch_pc),
CEREAL_NVP(second_branch_pc),
CEREAL_NVP(second_branch_pending)
);
}
};

View file

@ -1,6 +1,7 @@
#pragma once
#include "Common/Types/Mips/MipsInstruction.hpp"
#include "Common/Types/Mips/MipsInstructionInfo.hpp"
#include "Resources/Ee/Vpu/Vu/VuVectorField.hpp"
/// A VU instruction type, which is used to extract information out of the parsed 32-bit value.
@ -101,7 +102,7 @@ struct VuInstruction : public MipsInstruction
ubyte imm5() const {
// FD is IMM5 in some instructions
return static_cast<uhword>(FD.extract_from(value));
return static_cast<ubyte>(FD.extract_from(value));
}
uhword imm11() const
@ -116,7 +117,7 @@ struct VuInstruction : public MipsInstruction
uhword imm15() const
{
return static_cast<uhword>(IMM11.extract_from(value) | DEST.extract_from(value));
return static_cast<uhword>(IMM11.extract_from(value) | (DEST.extract_from(value) << 11));
}
uword imm24() const

View file

@ -12,6 +12,7 @@
#include "Common/Types/Register/PcRegisters.hpp"
#include "Common/Types/Register/SizedHwordRegister.hpp"
#include "Common/Types/Register/SizedQwordRegister.hpp"
#include "Controller/Ee/Vpu/Vu/VuBranchDelaySlot.hpp"
#include "Resources/Ee/Vpu/Vu/VuUnitRegisters.hpp"
class EeCoreCop0;
@ -62,6 +63,7 @@ public:
/// Also known as the TPC (termination PC), treated as the same thing.
/// Made to be 32-bit even though only 16-bits are used (bus maps easier).
WordPcRegister pc;
VuBranchDelaySlot<> bdelay;
/// The CMSAR register used for micro subroutine execution.
/// See VU Users Manual page 202.

View file

@ -32,7 +32,7 @@ int count_trailing_zeros(const uword value);
/// Parses `source` of type U as a N-bit integer, converting it to an int of type T.
template<typename T, typename U, unsigned int N>
T extend_integer(const U source)
constexpr T extend_integer(const U source)
{
// For a 5 bit integer, it's the first 4 bits (0b01111)
constexpr T value_mask = (1 << (N - 1)) - 1;