gpu: added CLUT cache implementation

This commit is contained in:
Jakub Czekański 2020-05-10 16:06:37 +02:00
parent d35f1c612b
commit 93580f34c1
9 changed files with 67 additions and 23 deletions

1
.gitignore vendored
View file

@ -62,6 +62,7 @@ spu.bin
.vs/ .vs/
imgui.ini imgui.ini
/vram.png
todo.txt todo.txt
config.json config.json

View file

@ -1,4 +1,4 @@
#include "texture_utils.h" #include "color_depth.h"
ColorDepth bitsToDepth(int bits) { ColorDepth bitsToDepth(int bits) {
switch (bits) { switch (bits) {

View file

@ -0,0 +1,17 @@
#pragma once
enum class ColorDepth { NONE, BIT_4, BIT_8, BIT_16 };
ColorDepth bitsToDepth(int bits);
template <int bits>
constexpr ColorDepth bitsToDepth() {
if constexpr (bits == 4)
return ColorDepth::BIT_4;
else if constexpr (bits == 8)
return ColorDepth::BIT_8;
else if constexpr (bits == 16)
return ColorDepth::BIT_16;
else
return ColorDepth::NONE;
}

View file

@ -49,6 +49,8 @@ void GPU::reset() {
drawingOffsetY = 0; drawingOffsetY = 0;
gp0_e6._reg = 0; gp0_e6._reg = 0;
clutCachePos = ivec2(-1, -1);
} }
void GPU::drawTriangle(const primitive::Triangle& triangle) { void GPU::drawTriangle(const primitive::Triangle& triangle) {
@ -607,6 +609,7 @@ void GPU::writeGP0(uint32_t data) {
} }
} else if (command == 0x01) { } else if (command == 0x01) {
// Clear Cache // Clear Cache
clutCachePos = ivec2(-1, -1);
} else if (command == 0x02) { } else if (command == 0x02) {
// Fill rectangle // Fill rectangle
cmd = Command::FillRectangle; cmd = Command::FillRectangle;

View file

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include <vector> #include <vector>
#include "color_depth.h"
#include "primitive.h" #include "primitive.h"
#include "psx_color.h" #include "psx_color.h"
#include "registers.h" #include "registers.h"
@ -98,6 +99,11 @@ class GPU {
std::array<uint16_t, VRAM_WIDTH * VRAM_HEIGHT> vram{}; std::array<uint16_t, VRAM_WIDTH * VRAM_HEIGHT> vram{};
// TODO: Serialize?
std::array<uint16_t, 256> clutCache{};
ivec2 clutCachePos{-1, -1};
ColorDepth clutCacheColorDepth = ColorDepth::NONE;
private: private:
// Hardware rendering // Hardware rendering
std::vector<Vertex> vertices; std::vector<Vertex> vertices;

View file

@ -44,6 +44,8 @@ INLINE void rasterizeRectangle(gpu::GPU* gpu, const primitive::Rect& rect) {
vStep = -1; vStep = -1;
} }
loadClutCacheIfRequired<bits>(gpu, rect.clut);
int x, y, u, v; int x, y, u, v;
for (y = min.y, v = uv.y; y <= max.y; y++, v += vStep) { for (y = min.y, v = uv.y; y <= max.y; y++, v += vStep) {
for (x = min.x, u = uv.x; x <= max.x; x++, u += uStep) { for (x = min.x, u = uv.x; x <= max.x; x++, u += uStep) {
@ -57,7 +59,7 @@ INLINE void rasterizeRectangle(gpu::GPU* gpu, const primitive::Rect& rect) {
c = PSXColor(rect.color.r, rect.color.g, rect.color.b); c = PSXColor(rect.color.r, rect.color.g, rect.color.b);
} else { } else {
const ivec2 texel = maskTexel(ivec2(u, v), textureWindow); const ivec2 texel = maskTexel(ivec2(u, v), textureWindow);
c = fetchTex<bits>(gpu, texel, rect.texpage, rect.clut); c = fetchTex<bits>(gpu, texel, rect.texpage);
if (c.raw == 0x0000) continue; if (c.raw == 0x0000) continue;
if constexpr (isBlended) { if constexpr (isBlended) {

View file

@ -200,6 +200,8 @@ void rasterizeTriangle(gpu::GPU* gpu, const primitive::Triangle& triangle) {
const int area = orient2d(pos[0], pos[1], pos[2]); const int area = orient2d(pos[0], pos[1], pos[2]);
if (area == 0) return; if (area == 0) return;
loadClutCacheIfRequired<bits>(gpu, triangle.clut);
ivec2 min( // ivec2 min( //
std::min({pos[0].x, pos[1].x, pos[2].x}), // std::min({pos[0].x, pos[1].x, pos[2].x}), //
std::min({pos[0].y, pos[1].y, pos[2].y}) // std::min({pos[0].y, pos[1].y, pos[2].y}) //
@ -278,7 +280,7 @@ void rasterizeTriangle(gpu::GPU* gpu, const primitive::Triangle& triangle) {
} else { } else {
const ivec2 uv(FROM_FP(attrib.u), FROM_FP(attrib.v)); const ivec2 uv(FROM_FP(attrib.u), FROM_FP(attrib.v));
const ivec2 texel = maskTexel(uv, textureWindow); const ivec2 texel = maskTexel(uv, textureWindow);
c = fetchTex<bits>(gpu, texel, triangle.texpage, triangle.clut); c = fetchTex<bits>(gpu, texel, triangle.texpage);
if (c.raw == 0x0000) goto DONE; if (c.raw == 0x0000) goto DONE;
if constexpr (isBlended) { if constexpr (isBlended) {

View file

@ -1,48 +1,55 @@
#pragma once #pragma once
#include "device/gpu/gpu.h" #include "device/gpu/gpu.h"
#include "utils/macros.h" #include "utils/macros.h"
#include "../color_depth.h"
#include "../primitive.h" #include "../primitive.h"
#define gpuVRAM ((uint16_t(*)[gpu::VRAM_WIDTH])gpu->vram.data()) #define gpuVRAM ((uint16_t(*)[gpu::VRAM_WIDTH])gpu->vram.data())
enum class ColorDepth { NONE, BIT_4, BIT_8, BIT_16 }; template <ColorDepth bits>
void loadClutCacheIfRequired(gpu::GPU* gpu, ivec2 clut) {
// Only paletted textures should reload the color look-up table cache
if constexpr (bits != ColorDepth::BIT_4 && bits != ColorDepth::BIT_8) {
return;
}
ColorDepth bitsToDepth(int bits); bool textureFormatRequireReload = bits > gpu->clutCacheColorDepth;
bool clutPositionChanged = gpu->clutCachePos != clut;
template <int bits> if (!textureFormatRequireReload && !clutPositionChanged) {
constexpr ColorDepth bitsToDepth() { return;
if constexpr (bits == 4) }
return ColorDepth::BIT_4;
else if constexpr (bits == 8) gpu->clutCacheColorDepth = bits;
return ColorDepth::BIT_8; gpu->clutCachePos = clut;
else if constexpr (bits == 16)
return ColorDepth::BIT_16; constexpr int entries = (bits == ColorDepth::BIT_8) ? 256 : 16;
else for (int i = 0; i < entries; i++) {
return ColorDepth::NONE; gpu->clutCache[i] = gpuVRAM[clut.y][clut.x + i];
}
} }
namespace { namespace {
// Using unsigned vectors allows compiler to generate slightly faster division code INLINE uint16_t tex4bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage) {
INLINE uint16_t tex4bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage, ivec2 clut) {
uint16_t index = gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x / 4) & 1023]; uint16_t index = gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x / 4) & 1023];
uint8_t entry = (index >> ((tex.x & 3) * 4)) & 0xf; uint8_t entry = (index >> ((tex.x & 3) * 4)) & 0xf;
return gpuVRAM[clut.y][clut.x + entry]; return gpu->clutCache[entry];
} }
INLINE uint16_t tex8bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage, ivec2 clut) { INLINE uint16_t tex8bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage) {
uint16_t index = gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x / 2) & 1023]; uint16_t index = gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x / 2) & 1023];
uint8_t entry = (index >> ((tex.x & 1) * 8)) & 0xff; uint8_t entry = (index >> ((tex.x & 1) * 8)) & 0xff;
return gpuVRAM[clut.y][clut.x + entry]; return gpu->clutCache[entry];
} }
INLINE uint16_t tex16bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage) { return gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x) & 1023]; } INLINE uint16_t tex16bit(gpu::GPU* gpu, ivec2 tex, ivec2 texPage) { return gpuVRAM[(texPage.y + tex.y) & 511][(texPage.x + tex.x) & 1023]; }
template <ColorDepth bits> template <ColorDepth bits>
INLINE PSXColor fetchTex(gpu::GPU* gpu, ivec2 texel, const ivec2 texPage, const ivec2 clut) { INLINE PSXColor fetchTex(gpu::GPU* gpu, ivec2 texel, const ivec2 texPage) {
if constexpr (bits == ColorDepth::BIT_4) { if constexpr (bits == ColorDepth::BIT_4) {
return tex4bit(gpu, texel, texPage, clut); return tex4bit(gpu, texel, texPage);
} else if constexpr (bits == ColorDepth::BIT_8) { } else if constexpr (bits == ColorDepth::BIT_8) {
return tex8bit(gpu, texel, texPage, clut); return tex8bit(gpu, texel, texPage);
} else if constexpr (bits == ColorDepth::BIT_16) { } else if constexpr (bits == ColorDepth::BIT_16) {
return tex16bit(gpu, texel, texPage); return tex16bit(gpu, texel, texPage);
} else { } else {

View file

@ -7,6 +7,8 @@ struct ivec2 {
ivec2 operator-(const ivec2& b) const { return {x - b.x, y - b.y}; } ivec2 operator-(const ivec2& b) const { return {x - b.x, y - b.y}; }
ivec2 operator+(const ivec2& b) const { return {x + b.x, y + b.y}; } ivec2 operator+(const ivec2& b) const { return {x + b.x, y + b.y}; }
bool operator==(const ivec2& b) const { return x == b.x && y == b.y; }
bool operator!=(const ivec2& b) const { return x != b.x || y != b.y; }
}; };
struct ivec3 { struct ivec3 {
@ -15,6 +17,8 @@ struct ivec3 {
ivec3 operator-(const ivec3& b) const { return {x - b.x, y - b.y, z - b.z}; } ivec3 operator-(const ivec3& b) const { return {x - b.x, y - b.y, z - b.z}; }
ivec3 operator+(const ivec3& b) const { return {x + b.x, y + b.y, z - b.z}; } ivec3 operator+(const ivec3& b) const { return {x + b.x, y + b.y, z - b.z}; }
bool operator==(const ivec3& b) const { return x == b.x && y == b.y && z == b.z; }
bool operator!=(const ivec3& b) const { return x != b.x || y != b.y || z != b.z; }
}; };
struct vec2 { struct vec2 {
@ -27,6 +31,8 @@ struct vec2 {
vec2 operator+(const vec2& b) const { return {x + b.x, y + b.y}; } vec2 operator+(const vec2& b) const { return {x + b.x, y + b.y}; }
vec2 operator*(const float b) const { return {x * b, y * b}; } vec2 operator*(const float b) const { return {x * b, y * b}; }
vec2 operator/(const float b) const { return {x / b, y / b}; } vec2 operator/(const float b) const { return {x / b, y / b}; }
bool operator==(const vec2& b) const { return x == b.x && y == b.y; }
bool operator!=(const vec2& b) const { return x != b.x || y != b.y; }
float length() const; float length() const;
static vec2 normalize(const vec2& v); static vec2 normalize(const vec2& v);