improved timings for the first 50 scanlines

This commit is contained in:
Jaklyy 2023-12-14 15:18:39 -05:00
parent 24eecec50f
commit a46316d71f
4 changed files with 79 additions and 47 deletions

View file

@ -222,8 +222,7 @@ void GPU3D::Reset() noexcept
AlphaRefVal = 0;
AlphaRef = 0;
RDLines = 46;
RDLinesMin = 46;
RDLinesDisplay = 46;
memset(ToonTable, 0, sizeof(ToonTable));
memset(EdgeTable, 0, sizeof(EdgeTable));
@ -2370,7 +2369,7 @@ void GPU3D::CheckFIFODMA() noexcept
void GPU3D::VCount144() noexcept
{
RDLinesMin = 46;
RDLinesDisplay = 46;
CurrentRenderer->VCount144();
}
@ -2614,7 +2613,7 @@ u16 GPU3D::Read16(u32 addr) noexcept
return DispCnt;
case 0x04000320:
return RDLines; // IT IS TIME
return RDLinesDisplay; // IT IS TIME
case 0x04000600:
{
@ -2658,7 +2657,7 @@ u32 GPU3D::Read32(u32 addr) noexcept
return DispCnt;
case 0x04000320:
return RDLines; // IT IS TIME
return RDLinesDisplay; // IT IS TIME
case 0x04000600:
{

View file

@ -245,8 +245,7 @@ public:
bool RenderingEnabled = false;
u32 DispCnt = 0;
u32 RDLines = 0;
u32 RDLinesMin = 0;
u32 RDLinesDisplay = 0;
u8 AlphaRefVal = 0;
u8 AlphaRef = 0;
@ -334,13 +333,21 @@ public:
// rasteriztion timing constants
static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision
// GPU 2D read timings, for emulating race conditions
static constexpr int GPU2DSpeedWithinPair = 296 * TimingFrac;
static constexpr int GPU2DSpeedOutsidePair = 948 * TimingFrac;
static constexpr int ScanlinePairLength = 2130 * TimingFrac;
static constexpr int ScanlineTimeout = 2126 * TimingFrac;
static constexpr int GPU2DSpeedOutsidePair = 810 * TimingFrac;
static constexpr int GPU2DSpeedReadScanline = 256 * TimingFrac;
static constexpr int InitGPU2DTimeout = 51618 * TimingFrac;
static constexpr int ScanlineBreak = 4 * TimingFrac;
// GPU 3D rasterization timings, for emulating the timeout
static constexpr int ScanlinePairLength = 2130 * TimingFrac;
static constexpr int ScanlineTimeout = 1686 * TimingFrac; // 2126? 1686?
static constexpr int ScanlineBreak = 4 * TimingFrac;
static constexpr int ScanlineBreak2 = 40 * TimingFrac;
static constexpr int IncrementStrange = 1618 * TimingFrac; // 1882? 1442? 1618??
static constexpr int FreeTiming = 440 * TimingFrac;
// GPU 3D rasterization timings II, for counting each element with timing characteristics
static constexpr int PerPolyTiming = 12 * TimingFrac; // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPixelTiming = 1 * TimingFrac; // does not apply to the first 4 pixels in a polygon (per scanline?)
@ -348,7 +355,7 @@ public:
static constexpr int PerScanlineTiming = 1064 * TimingFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
static constexpr int PerScanlineRecup = 2112 * TimingFrac; // seems to check out? // should be the "free" time the gpu has to do the calculation
static constexpr int PerRightSlope = 1 * TimingFrac;
static constexpr int EmptyPolyScanline = 4 * TimingFrac - 14; // seems to be slightly under 4?
static constexpr int EmptyPolyScanline = 4 * TimingFrac;// - 14; // seems to be slightly under 4?
//static constexpr int FirstPixelTiming;
class Renderer3D

View file

@ -121,44 +121,43 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd)
else counter = &RasterTimingEven;
*counter += cycles;
if (RasterTiming - *counter) return false;
if (RasterTiming - *counter > 0) return false;
GPU.GPU3D.DispCnt |= (1<<12);
return true;
}
u32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd)
u32 SoftRenderer::DoTimingsPixels(s32 pixels, bool odd)
{
// calculate and return the difference between the old span and the new span, while adding timings to the timings counter
// pixels dont count towards timings if they're the first 4 pixels in a scanline (for some reason?)
if (pixels <= 4) return 0;
u32 pixelsremain = pixels-4;
pixels -= 4;
s32* counter;
if (odd) counter = &RasterTimingOdd;
else counter = &RasterTimingEven;
//todo: do this without a for loop somehow.
for (; pixelsremain > 0; pixelsremain--)
//todo: figure out a faster way to support TimingFrac > 1 without using a for loop somehow.
if constexpr (TimingFrac > 1)
for (; pixels > 0; pixels--)
{
*counter += TimingFrac;
if ((RasterTiming - *counter) <= 0) break;
}
else
{
*counter += TimingFrac;
if (!(RasterTiming - *counter)) break;
*counter += pixels;
pixels = -(RasterTiming - *counter);
if (pixels > 0) *counter -= pixels;
}
if (pixelsremain <= 0) return 0;
if (pixels <= 0) return 0;
GPU.GPU3D.DispCnt |= (1<<12);
return pixelsremain;
}
void SoftRenderer::EndScanline(bool odd)
{
if (!odd)
{
RasterTiming += std::max(RasterTimingOdd, RasterTimingEven);
}
return pixels;
}
void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
@ -1458,7 +1457,6 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
}
}
EndScanline(odd);
return abort;
}
@ -1758,30 +1756,59 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
s32 y = 0;
s8 prevbufferline = -2;
u8 buffersize = 0;
RasterTiming = (ScanlinePairLength * 24);
RasterTimingOdd = 0;
RasterTimingEven = 0;
s8 buffersize = 0;
RasterTiming = InitialTiming;
s32 timingadvance = InitialTiming;
bool abort = false;
//u32* RDLinesReg = &GPU.GPU3D.RDLines;
ClearBuffers();
for (u8 quarter = 0; quarter < 4; quarter++)
for (u8 bufferline = 0; bufferline < 48; bufferline += 2)
{
RasterTiming += (ScanlineTimeout);
RasterTimingOdd = 0;
RasterTimingEven = 0;
if (buffersize > 48)
{
RasterTiming = ScanlinePairLength * 23;
timingadvance = 0;
buffersize = 48;
}
if (!abort) RasterTiming += IncrementStrange;
else RasterTiming += ScanlineTimeout;
if (buffersize >= 50) RasterTiming = (ScanlinePairLength * 23) + ScanlineTimeout;
abort = RenderScanline(y, j, true);
abort = RenderScanline(y+1, j, false);
RenderScanline(y, j, true);
RenderScanline(y+1, j, false);
RasterTiming += ScanlineBreak;
buffersize += 2;
//RasterTiming += ScanlineBreak;
s32 timespent = std::max(RasterTimingOdd, RasterTimingEven);
u32* RDLinesReg = &GPU.GPU3D.RDLines;
*RDLinesReg = 0;
for (int i = RasterTiming; i > ScanlinePairLength / 2; i -= ScanlinePairLength / 2) *RDLinesReg += 1;
/*if (timespent > FreeTiming)
{
abort = true;
timespent -= FreeTiming;
}
else if (!abort)
{
abort = false;
timespent -= FreeTiming;
}*/
//if (!abort)
timespent -= FreeTiming;
if (timespent > 0)
{
RasterTiming -= timespent;
timingadvance -= timespent;
}
if (timingadvance < 0) for (s32 i = (ScanlinePairLength / 2) * buffersize; i > RasterTiming + (ScanlinePairLength / 2); i -= ScanlinePairLength / 2) buffersize -= 1;
if (buffersize < 0) buffersize = 0;
// seems to display the lowest scanline buffer count reached during the current frame.
// we also caps it to 46 here, because this reg does that too for some reason.
if (*RDLinesReg > GPU.GPU3D.RDLinesMin) *RDLinesReg = GPU.GPU3D.RDLinesMin;
else if (*RDLinesReg < GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLinesMin = *RDLinesReg;
if (quarter >= 1 && buffersize < GPU.GPU3D.RDLinesDisplay) GPU.GPU3D.RDLinesDisplay = buffersize;
if (prevbufferline >= 0)
{

View file

@ -455,8 +455,7 @@ private:
melonDS::GPU& GPU;
RendererPolygon PolygonList[2048];
bool DoTimings(s32 cycles, bool odd);
u32 DoTimingsPixels(u32 pixels, bool odd);
void EndScanline(bool odd);
u32 DoTimingsPixels(s32 pixels, bool odd);
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha);
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t);
void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);