Fix a few pipelining bugs with RSP

1) Setting SP_PC was not resetting the pipeline. This caused that
changing the PC within a HALT/UNHALT sequence was still causing
previous instructions in the pipeline (at the old address) to be
executed. This is not how the hardware works: SP_PC is immediate and
discards the whole pipeline.

2) BREAK did not correctly halt the processor at the right instruction,
which in turn caused resumption after HALT to execute the wrong
set of instructions. This was caused by the fact that the SP_STATUS
change was written into the EXDF latch, which in turn takes 3 cycles
to reach completion. Instead, we now use the DFWB latch, and we cause
it to abort the RSP cycle if the processor is halted. This happens
at the beginning of next cycle, which is the correct moment.

2bis) Since we are at it, use rsp_status_write to modify the RSP in
this case, rather than a direct write to the register. This change
fixes a race condition: SP_STATUS must be accessed atomically when
cen64 runs in multithreaded mode. To use rsp_status_write, we need
to introduce a nonexisting SP_SET_BROKE bit: we use the MSB, but then
mask it out in MTC0 to avoid some code to inadvertently have that bit
set.

3) When unhalting after BREAK, it's important to keep the correct
PC which comes from the EX stage (the one that was going to be
executed if BREAK didn't occur). Before, it was using the IF PC (fetch)
which is farther in the future.

Fixes #155
This commit is contained in:
Giovanni Bajo 2021-12-14 23:30:11 +01:00 committed by Simon Eriksson
parent d6b5b04395
commit 87ebca00b5
5 changed files with 36 additions and 14 deletions

View file

@ -21,8 +21,6 @@
#include <windows.h>
#endif
static void rsp_status_write(struct rsp *rsp, uint32_t rt);
//
// MFC0
//
@ -103,7 +101,7 @@ void rsp_status_write(struct rsp *rsp, uint32_t rt) {
if ((rt & SP_CLR_HALT) && (status & SP_STATUS_HALT)) {
// Save PC around pipeline init
uint32_t pc = rsp->pipeline.ifrd_latch.pc;
uint32_t pc = rsp->pipeline.rdex_latch.common.pc;
rsp_pipeline_init(&rsp->pipeline);
rsp->pipeline.ifrd_latch.pc = pc;
@ -115,6 +113,8 @@ void rsp_status_write(struct rsp *rsp, uint32_t rt) {
if (rt & SP_CLR_BROKE)
status &= ~SP_STATUS_BROKE;
else if (rt & SP_SET_BROKE)
status |= SP_STATUS_BROKE;
if (rt & SP_CLR_INTR)
clear_rcp_interrupt(rsp->bus->vr4300, MI_INTR_SP);
@ -205,7 +205,9 @@ void rsp_write_cp0_reg(struct rsp *rsp, unsigned dest, uint32_t rt) {
break;
case RSP_CP0_REGISTER_SP_STATUS:
rsp_status_write(rsp, rt);
// Mask out the synthetic SP_SET_BROKE bit which is used internally
// but does not exist in real hardware (and has no effect if set).
rsp_status_write(rsp, rt & ~SP_SET_BROKE);
break;
case RSP_CP0_REGISTER_SP_RESERVED:

View file

@ -55,6 +55,7 @@
#define SP_SET_SIG6 0x00400000
#define SP_CLR_SIG7 0x00800000
#define SP_SET_SIG7 0x01000000
#define SP_SET_BROKE 0x80000000 // Does not exist in hardware, used only internally
struct rsp;
@ -85,6 +86,8 @@ void RSP_MTC0(struct rsp *rsp, uint32_t iw, uint32_t rs, uint32_t rt);
uint32_t rsp_read_cp0_reg(struct rsp *rsp, unsigned src);
void rsp_write_cp0_reg(struct rsp *rsp, unsigned dest, uint32_t rt);
void rsp_status_write(struct rsp *rsp, uint32_t rt);
cen64_cold void rsp_cp0_init(struct rsp *rsp);
#endif

View file

@ -282,16 +282,17 @@ void RSP_BGTZ_BLEZ(
//
void RSP_BREAK(struct rsp *rsp,
uint32_t iw, uint32_t rs, uint32_t rt) {
struct rsp_exdf_latch *exdf_latch = &rsp->pipeline.exdf_latch;
uint32_t result = rsp->regs[RSP_CP0_REGISTER_SP_STATUS] |
(SP_STATUS_HALT | SP_STATUS_BROKE);
struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch;
if (rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_INTR_BREAK)
signal_rcp_interrupt(rsp->bus->vr4300, MI_INTR_SP);
exdf_latch->result.dest = RSP_CP0_REGISTER_SP_STATUS;
exdf_latch->result.result = result;
// Prepare to halt the processor at the beginning of next cycle
// (when the WB stage will run). This make sure that executing BREAK
// in a delay slot works correctly: the processor halts just before
// running the branch target opcode, and resume from there when un-halted.
dfwb_latch->result.dest = RSP_CP0_REGISTER_SP_STATUS;
dfwb_latch->result.result = SP_SET_HALT | SP_SET_BROKE;
}
//

View file

@ -175,8 +175,18 @@ int write_sp_regs2(void *opaque, uint32_t address, uint32_t word, uint32_t dqm)
debug_mmio_write(sp, sp_register_mnemonics[reg], word, dqm);
if (reg == SP_PC_REG)
if (reg == SP_PC_REG) {
// Setting the SP PC registers from the CPU basically forces the RSP to
// start from there, irrespective of existing pipeline stages, so we need
// to reset the pipeline.
// NOTE: this is currently broken when using multithreading if the CPU
// sets the PC while the RSP is running, so we just abort in this case
// which should not happen anyway in real world.
assert((rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_HALT)
&& "SP PC set while the RSP is running");
rsp_pipeline_init(&rsp->pipeline);
rsp->pipeline.ifrd_latch.pc = word & 0xFFC;
}
else
abort();

View file

@ -206,15 +206,21 @@ cen64_flatten static inline void rsp_df_stage(struct rsp *rsp) {
}
// Writeback stage.
static inline void rsp_wb_stage(struct rsp *rsp) {
static inline bool rsp_wb_stage(struct rsp *rsp) {
const struct rsp_dfwb_latch *dfwb_latch = &rsp->pipeline.dfwb_latch;
rsp->regs[dfwb_latch->result.dest] = dfwb_latch->result.result;
if (dfwb_latch->result.dest == RSP_CP0_REGISTER_SP_STATUS) {
rsp_status_write(rsp, dfwb_latch->result.result);
return !(rsp->regs[RSP_CP0_REGISTER_SP_STATUS] & SP_STATUS_HALT);
} else
rsp->regs[dfwb_latch->result.dest] = dfwb_latch->result.result;
return true;
}
// Advances the processor pipeline by one clock.
void rsp_cycle_(struct rsp *rsp) {
rsp_wb_stage(rsp);
if (unlikely(!rsp_wb_stage(rsp)))
return;
rsp_df_stage(rsp);
rsp->pipeline.exdf_latch.result.dest = RSP_REGISTER_R0;