diff --git a/.gitignore b/.gitignore index 8d74dc5..7443e2f 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,7 @@ spu.bin .vs/ imgui.ini +/vram.png todo.txt config.json diff --git a/src/device/gpu/render/texture_utils.cpp b/src/device/gpu/color_depth.cpp similarity index 89% rename from src/device/gpu/render/texture_utils.cpp rename to src/device/gpu/color_depth.cpp index b0e2b5b..1ec7a34 100644 --- a/src/device/gpu/render/texture_utils.cpp +++ b/src/device/gpu/color_depth.cpp @@ -1,4 +1,4 @@ -#include "texture_utils.h" +#include "color_depth.h" ColorDepth bitsToDepth(int bits) { switch (bits) { diff --git a/src/device/gpu/color_depth.h b/src/device/gpu/color_depth.h new file mode 100644 index 0000000..5e30464 --- /dev/null +++ b/src/device/gpu/color_depth.h @@ -0,0 +1,17 @@ +#pragma once + +enum class ColorDepth { NONE, BIT_4, BIT_8, BIT_16 }; + +ColorDepth bitsToDepth(int bits); + +template +constexpr ColorDepth bitsToDepth() { + if constexpr (bits == 4) + return ColorDepth::BIT_4; + else if constexpr (bits == 8) + return ColorDepth::BIT_8; + else if constexpr (bits == 16) + return ColorDepth::BIT_16; + else + return ColorDepth::NONE; +} \ No newline at end of file diff --git a/src/device/gpu/gpu.cpp b/src/device/gpu/gpu.cpp index 1bfd4e9..2319b50 100644 --- a/src/device/gpu/gpu.cpp +++ b/src/device/gpu/gpu.cpp @@ -49,6 +49,8 @@ void GPU::reset() { drawingOffsetY = 0; gp0_e6._reg = 0; + + clutCachePos = ivec2(-1, -1); } void GPU::drawTriangle(const primitive::Triangle& triangle) { @@ -607,6 +609,7 @@ void GPU::writeGP0(uint32_t data) { } } else if (command == 0x01) { // Clear Cache + clutCachePos = ivec2(-1, -1); } else if (command == 0x02) { // Fill rectangle cmd = Command::FillRectangle; diff --git a/src/device/gpu/gpu.h b/src/device/gpu/gpu.h index 1867bc7..bd94ea3 100644 --- a/src/device/gpu/gpu.h +++ b/src/device/gpu/gpu.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include "color_depth.h" #include "primitive.h" #include "psx_color.h" #include "registers.h" @@ -98,6 +99,11 @@ class GPU { std::array vram{}; + // TODO: Serialize? + std::array clutCache{}; + ivec2 clutCachePos{-1, -1}; + ColorDepth clutCacheColorDepth = ColorDepth::NONE; + private: // Hardware rendering std::vector vertices; diff --git a/src/device/gpu/render/render_rectangle.cpp b/src/device/gpu/render/render_rectangle.cpp index 6465d3f..afbd81a 100644 --- a/src/device/gpu/render/render_rectangle.cpp +++ b/src/device/gpu/render/render_rectangle.cpp @@ -44,6 +44,8 @@ INLINE void rasterizeRectangle(gpu::GPU* gpu, const primitive::Rect& rect) { vStep = -1; } + loadClutCacheIfRequired(gpu, rect.clut); + int x, y, u, v; for (y = min.y, v = uv.y; y <= max.y; y++, v += vStep) { for (x = min.x, u = uv.x; x <= max.x; x++, u += uStep) { @@ -57,7 +59,7 @@ INLINE void rasterizeRectangle(gpu::GPU* gpu, const primitive::Rect& rect) { c = PSXColor(rect.color.r, rect.color.g, rect.color.b); } else { const ivec2 texel = maskTexel(ivec2(u, v), textureWindow); - c = fetchTex(gpu, texel, rect.texpage, rect.clut); + c = fetchTex(gpu, texel, rect.texpage); if (c.raw == 0x0000) continue; if constexpr (isBlended) { diff --git a/src/device/gpu/render/render_triangle.cpp b/src/device/gpu/render/render_triangle.cpp index 1bb9e85..eef1b1c 100644 --- a/src/device/gpu/render/render_triangle.cpp +++ b/src/device/gpu/render/render_triangle.cpp @@ -200,6 +200,8 @@ void rasterizeTriangle(gpu::GPU* gpu, const primitive::Triangle& triangle) { const int area = orient2d(pos[0], pos[1], pos[2]); if (area == 0) return; + loadClutCacheIfRequired(gpu, triangle.clut); + ivec2 min( // std::min({pos[0].x, pos[1].x, pos[2].x}), // std::min({pos[0].y, pos[1].y, pos[2].y}) // @@ -278,7 +280,7 @@ void rasterizeTriangle(gpu::GPU* gpu, const primitive::Triangle& triangle) { } else { const ivec2 uv(FROM_FP(attrib.u), FROM_FP(attrib.v)); const ivec2 texel = maskTexel(uv, textureWindow); - c = fetchTex(gpu, texel, triangle.texpage, triangle.clut); + c = fetchTex(gpu, texel, triangle.texpage); if (c.raw == 0x0000) goto DONE; if constexpr (isBlended) { diff --git a/src/device/gpu/render/texture_utils.h b/src/device/gpu/render/texture_utils.h index 7c5a920..01ec529 100644 --- a/src/device/gpu/render/texture_utils.h +++ b/src/device/gpu/render/texture_utils.h @@ -1,48 +1,55 @@ #pragma once #include "device/gpu/gpu.h" #include "utils/macros.h" +#include "../color_depth.h" #include "../primitive.h" #define gpuVRAM ((uint16_t(*)[gpu::VRAM_WIDTH])gpu->vram.data()) -enum class ColorDepth { NONE, BIT_4, BIT_8, BIT_16 }; +template +void loadClutCacheIfRequired(gpu::GPU* gpu, ivec2 clut) { + // Only paletted textures should reload the color look-up table cache + if constexpr (bits != ColorDepth::BIT_4 && bits != ColorDepth::BIT_8) { + return; + } -ColorDepth bitsToDepth(int bits); + bool textureFormatRequireReload = bits > gpu->clutCacheColorDepth; + bool clutPositionChanged = gpu->clutCachePos != clut; -template -constexpr ColorDepth bitsToDepth() { - if constexpr (bits == 4) - return ColorDepth::BIT_4; - else if constexpr (bits == 8) - return ColorDepth::BIT_8; - else if constexpr (bits == 16) - return ColorDepth::BIT_16; - else - return ColorDepth::NONE; + if (!textureFormatRequireReload && !clutPositionChanged) { + return; + } + + gpu->clutCacheColorDepth = bits; + gpu->clutCachePos = clut; + + constexpr int entries = (bits == ColorDepth::BIT_8) ? 256 : 16; + for (int i = 0; i < entries; i++) { + gpu->clutCache[i] = gpuVRAM[clut.y][clut.x + i]; + } } namespace { -// Using unsigned vectors allows compiler to generate slightly faster division code -INLINE uint16_t tex4bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage, ivec2 clut) { +INLINE uint16_t tex4bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage) { uint16_t index = gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x / 4) & 1023]; uint8_t entry = (index >> ((tex.x & 3) * 4)) & 0xf; - return gpuVRAM[clut.y][clut.x + entry]; + return gpu->clutCache[entry]; } -INLINE uint16_t tex8bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage, ivec2 clut) { +INLINE uint16_t tex8bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage) { uint16_t index = gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x / 2) & 1023]; uint8_t entry = (index >> ((tex.x & 1) * 8)) & 0xff; - return gpuVRAM[clut.y][clut.x + entry]; + return gpu->clutCache[entry]; } INLINE uint16_t tex16bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage) { return gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x) & 1023]; } template -INLINE PSXColor fetchTex(gpu::GPU* gpu, ivec2 texel, const ivec2 texPage, const ivec2 clut) { +INLINE PSXColor fetchTex(gpu::GPU* gpu, ivec2 texel, const ivec2 texPage) { if constexpr (bits == ColorDepth::BIT_4) { - return tex4bit(gpu, texel, texPage, clut); + return tex4bit(gpu, texel, texPage); } else if constexpr (bits == ColorDepth::BIT_8) { - return tex8bit(gpu, texel, texPage, clut); + return tex8bit(gpu, texel, texPage); } else if constexpr (bits == ColorDepth::BIT_16) { return tex16bit(gpu, texel, texPage); } else { diff --git a/src/utils/vector.h b/src/utils/vector.h index f21c7c7..738165f 100644 --- a/src/utils/vector.h +++ b/src/utils/vector.h @@ -7,6 +7,8 @@ struct ivec2 { ivec2 operator-(const ivec2& b) const { return {x - b.x, y - b.y}; } ivec2 operator+(const ivec2& b) const { return {x + b.x, y + b.y}; } + bool operator==(const ivec2& b) const { return x == b.x && y == b.y; } + bool operator!=(const ivec2& b) const { return x != b.x || y != b.y; } }; struct ivec3 { @@ -15,6 +17,8 @@ struct ivec3 { ivec3 operator-(const ivec3& b) const { return {x - b.x, y - b.y, z - b.z}; } ivec3 operator+(const ivec3& b) const { return {x + b.x, y + b.y, z - b.z}; } + bool operator==(const ivec3& b) const { return x == b.x && y == b.y && z == b.z; } + bool operator!=(const ivec3& b) const { return x != b.x || y != b.y || z != b.z; } }; struct vec2 { @@ -27,6 +31,8 @@ struct vec2 { vec2 operator+(const vec2& b) const { return {x + b.x, y + b.y}; } vec2 operator*(const float b) const { return {x * b, y * b}; } vec2 operator/(const float b) const { return {x / b, y / b}; } + bool operator==(const vec2& b) const { return x == b.x && y == b.y; } + bool operator!=(const vec2& b) const { return x != b.x || y != b.y; } float length() const; static vec2 normalize(const vec2& v);