Vita3K/vita3k/renderer/src/texture/cache.cpp

819 lines
33 KiB
C++

// Vita3K emulator project
// Copyright (C) 2024 Vita3K team
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#include <renderer/functions.h>
#include <renderer/profile.h>
#include <renderer/texture_cache.h>
#include <gxm/functions.h>
#include <mem/ptr.h>
#include <util/align.h>
#include <util/bit_cast.h>
#include <util/log.h>
#include <algorithm>
#include <cstring>
#include <numeric>
#if defined(__x86_64__) && !defined(__APPLE__)
#include <xxh_x86dispatch.h>
#else
#define XXH_INLINE_ALL
#include <xxhash.h>
#endif
#ifdef WIN32
#include <execution>
#endif
namespace renderer {
namespace texture {
static uint64_t hash_data(const void *data, size_t size) {
return XXH3_64bits(data, size);
}
static uint64_t hash_palette_data(const SceGxmTexture &texture, size_t count, const MemState &mem) {
const uint32_t *const palette_bytes = get_texture_palette(texture, mem);
return hash_data(palette_bytes, count * sizeof(uint32_t));
}
uint64_t hash_texture_data(const SceGxmTexture &texture, uint32_t texture_size, const MemState &mem) {
const SceGxmTextureFormat format = gxm::get_format(texture);
const SceGxmTextureBaseFormat base_format = gxm::get_base_format(format);
const Ptr<const void> data(texture.data_addr << 2);
uint64_t data_hash = 0;
if (data.address()) {
data_hash = hash_data(data.get(mem), texture_size);
}
switch (base_format) {
case SCE_GXM_TEXTURE_BASE_FORMAT_P4:
return data_hash ^ hash_palette_data(texture, 16, mem);
case SCE_GXM_TEXTURE_BASE_FORMAT_P8:
return data_hash ^ hash_palette_data(texture, 256, mem);
default:
return data_hash;
}
}
// Function to hash an arbitrary swizzled texture in the most optimized way possible
// this is a recursive function which calls itself on the 4 higher block making the sizzle
// once a block entirely in the swizzle is found, it stops and hash it
// the pixels should be hash in the exact order they appear in the memory
static void hash_arbitrary_swizzled(const uint8_t *data, uint32_t width, uint32_t height, uint32_t texture_width, uint32_t texture_height, uint32_t texture_size, XXH3_state_t *hash_state) {
if (width >= texture_width && height >= texture_height) {
// whole block is included, hash it
XXH3_64bits_update(hash_state, data, texture_size);
return;
}
// divide the current block in 4 subblocks
const uint32_t block_width = texture_width / 2;
const uint32_t block_height = texture_height / 2;
const uint32_t block_size = texture_size / 4;
// we always hash the first subblock (it always contains something)
hash_arbitrary_swizzled(data, width, height, block_width, block_height, block_size, hash_state);
if (height > block_height) {
hash_arbitrary_swizzled(data + block_size, width, height - block_height, block_width, block_height, block_size, hash_state);
}
if (width > block_width) {
hash_arbitrary_swizzled(data + 2 * block_size, width - block_width, height, block_width, block_height, block_size, hash_state);
}
if (height > block_height && width > block_width) {
hash_arbitrary_swizzled(data + 3 * block_size, width - block_width, height - block_height, block_width, block_height, block_size, hash_state);
}
}
// hash only the visible pixels of a tiled texture
static void hash_unaligned_tiled(const uint8_t *data, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height, uint32_t bpp, XXH3_state_t *hash_state) {
// a tile is 32x32
constexpr uint32_t tile_mask = 0x1F;
const uint32_t width_down_aligned = align_down(width, 32);
const uint32_t height_down_aligned = align_down(height, 32);
// we need to take blocks into account, so consider a block line as
// a 32xblock_height rectangle of pixels
const uint32_t tile_block_lines = 32 / block_height;
const uint32_t total_line_size = (32 * block_height * bpp) / 8;
if (width == width_down_aligned) {
// just hash everything (except the bottom) in one go
const uint32_t hash_size = (width * height_down_aligned * bpp) / 8;
XXH3_64bits_update(hash_state, data, hash_size);
data += hash_size;
} else {
// need to hash block lines one by one
const uint32_t block_lines = height_down_aligned / 32;
const uint32_t filled_line_size = (width_down_aligned * 32 * bpp) / 8;
const uint32_t end_line_used = ((width & tile_mask) * block_height * bpp) / 8;
// we are only using the left of the tiles
for (uint32_t block_line = 0; block_line < block_lines; block_line++) {
XXH3_64bits_update(hash_state, data, filled_line_size);
data += filled_line_size;
for (uint32_t tile_block_line = 0; tile_block_line < tile_block_lines; tile_block_line++) {
XXH3_64bits_update(hash_state, data, end_line_used);
data += total_line_size;
}
}
}
if (height == height_down_aligned)
// we are done
return;
// we are only using the top of the tiles
const uint32_t tile_size = (32 * 32 * bpp) / 8;
const uint32_t used_tile_size = (32 * (height & tile_mask) * bpp) / 8;
const uint32_t nb_tiles_x = width_down_aligned / 32;
for (uint32_t tile_x = 0; tile_x < nb_tiles_x; tile_x++) {
XXH3_64bits_update(hash_state, data, used_tile_size);
data += tile_size;
}
if (width == width_down_aligned)
// we are done
return;
// We are only using the top left part of the bottom right tile
const uint32_t end_line_used = ((width & tile_mask) * block_height * bpp) / 8;
const uint32_t nb_lines_used = (height & tile_mask) / block_height;
for (uint32_t line = 0; line < nb_lines_used; line++) {
XXH3_64bits_update(hash_state, data, end_line_used);
data += total_line_size;
}
}
uint64_t hash_texture_nostride(const SceGxmTexture &texture, const MemState &mem) {
const SceGxmTextureFormat format = gxm::get_format(texture);
const SceGxmTextureBaseFormat base_format = gxm::get_base_format(format);
const Ptr<const uint8_t> data(texture.data_addr << 2);
if (!data)
return 0;
uint32_t width = gxm::get_width(texture);
uint32_t height = gxm::get_height(texture);
auto [block_width, block_height] = gxm::get_block_size(base_format);
width = align(width, block_width);
height = align(height, block_height);
// put the width and height of the texture in the hash
// also put the gamma mode in case the same texture is used with and without srgb
// and put the swizzle, although it's only used for textures with 3 or more components
uint64_t hash = width << 16
| height
| static_cast<uint64_t>(texture.gamma_mode != 0) << 32
| static_cast<uint64_t>(texture.swizzle_format) << 33;
// handle paletted texture (create one hash for each variant)
switch (base_format) {
case SCE_GXM_TEXTURE_BASE_FORMAT_P4:
hash ^= hash_palette_data(texture, 16, mem);
// update the block dimensions in this case (needed for hashing latter)
block_width = 2;
block_height = 1;
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_P8:
hash ^= hash_palette_data(texture, 256, mem);
break;
default:
break;
}
// special case
if (gxm::is_yuv_format(base_format)) {
hash ^= hash_data(data.get(mem), gxm::texture_size_first_mip(texture));
return hash;
}
const uint32_t bpp = gxm::bits_per_pixel(base_format);
// if there is no pixel in the stride, we can just hash the whole texture
bool has_no_stride = true;
uint32_t stride_in_pixels = width;
switch (texture.texture_type()) {
case SCE_GXM_TEXTURE_LINEAR_STRIDED:
stride_in_pixels = (gxm::get_stride_in_bytes(texture) * 8) / bpp;
has_no_stride = stride_in_pixels == width;
break;
case SCE_GXM_TEXTURE_LINEAR:
stride_in_pixels = align(width, 8);
has_no_stride = stride_in_pixels == width;
break;
case SCE_GXM_TEXTURE_SWIZZLED_ARBITRARY:
case SCE_GXM_TEXTURE_CUBE_ARBITRARY:
// it has no stride if both width and height are powers of 2
has_no_stride = (width & (width - 1)) == 0 && (height & (height - 1)) == 0;
break;
case SCE_GXM_TEXTURE_TILED:
// it has no stride if both the width and height are multiple of the tile size (32)
stride_in_pixels = align(width, 32);
has_no_stride = (width & 0x1F) == 0 && (height & 0x1F) == 0;
break;
default:
break;
}
if (has_no_stride) {
// just hash the whole first mips and we are done
// perform the computation with 64-bit integers for safety
// I checked, width * height * bpp will always fit in a 32-bit unsigned integer
uint32_t texture_size = (width * height * bpp) / 8;
hash ^= hash_data(data.get(mem), texture_size);
return hash;
}
// all the pixels are not in a contiguous memory range
static XXH3_state_t *hash_state = XXH3_createState();
XXH3_64bits_reset(hash_state);
if (texture.texture_type() == SCE_GXM_TEXTURE_LINEAR || texture.texture_type() == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
// hash line by line
// need to take block compressed textures into account
uint32_t block_stride_in_bytes = (stride_in_pixels * block_height * bpp) / 8;
uint32_t block_width_in_bytes = (width * block_height * bpp) / 8;
uint32_t nb_blocks_y = height / block_height;
const uint8_t *data_loc = data.get(mem);
for (uint32_t block_y = 0; block_y < nb_blocks_y; block_y++) {
XXH3_64bits_update(hash_state, data_loc, block_width_in_bytes);
data_loc += block_stride_in_bytes;
}
hash ^= XXH3_64bits_digest(hash_state);
return hash;
}
if (texture.texture_type() == SCE_GXM_TEXTURE_TILED) {
// some side tiles have non-used pixels
hash_unaligned_tiled(data.get(mem), width, height, block_width, block_height, bpp, hash_state);
hash ^= XXH3_64bits_digest(hash_state);
return hash;
}
// texture is arbitrarily swizzled
// hash completely used block by completely used block
const uint32_t texture_width = next_power_of_two(width);
const uint32_t texture_height = next_power_of_two(height);
const uint32_t texture_size = (texture_width * texture_height * bpp) / 8;
hash_arbitrary_swizzled(data.get(mem), width, height, texture_width, texture_height, texture_size, hash_state);
hash ^= XXH3_64bits_digest(hash_state);
return hash;
}
uint16_t get_upload_mip(const uint16_t true_mip, const uint16_t width, const uint16_t height) {
uint16_t max_mip_text = std::bit_width(std::min(width, height));
return std::min(true_mip, max_mip_text);
}
} // namespace texture
using namespace texture;
bool TextureCache::init(const bool hashless_texture_cache, const fs::path &texture_folder, std::string_view game_id, const size_t sampler_cache_size) {
use_protect = hashless_texture_cache;
// initialize the texture queue
texture_queue.init(TextureCacheSize);
// set the proper index of each entry
for (size_t i = 0; i < TextureCacheSize; i++)
texture_queue.items[i].content.index = static_cast<int>(i);
// prevent stutter caused by the hashmap resizing
texture_lookup.reserve(TextureCacheSize);
use_sampler_cache = sampler_cache_size > 0;
if (use_sampler_cache) {
sampler_queue.init(sampler_cache_size);
for (size_t i = 0; i < sampler_cache_size; i++)
sampler_queue.items[i].content.index = static_cast<int>(i);
sampler_lookup.reserve(sampler_cache_size);
}
export_folder = texture_folder / "export" / std::string(game_id);
import_folder = texture_folder / "import" / std::string(game_id);
refresh_available_textures();
return true;
}
void TextureCache::upload_texture(const SceGxmTexture &gxm_texture, MemState &mem) {
R_PROFILE(__func__);
bool is_vulkan = (backend == renderer::Backend::Vulkan);
const SceGxmTextureFormat fmt = gxm::get_format(gxm_texture);
const SceGxmTextureBaseFormat base_format = gxm::get_base_format(fmt);
if (base_format == SCE_GXM_TEXTURE_BASE_FORMAT_YUV422) {
LOG_ERROR_ONCE("Unimplemented YUV format {}, please report it to the developers.", log_hex(fmt::underlying(base_format)));
return;
}
uint32_t width = gxm::get_width(gxm_texture);
uint32_t height = gxm::get_height(gxm_texture);
const Ptr<uint8_t> data(gxm_texture.data_addr << 2);
uint8_t *texture_data = data.get(mem);
if (!texture_data) {
return;
}
std::vector<uint8_t> texture_data_decompressed;
std::vector<uint8_t> texture_pixels_lineared;
const void *pixels = nullptr;
uint32_t pixels_per_stride = 0;
uint32_t bpp = gxm::bits_per_pixel(base_format);
uint32_t bytes_per_pixel = (bpp + 7) >> 3;
const auto texture_type = gxm_texture.texture_type();
const bool is_swizzled = (texture_type == SCE_GXM_TEXTURE_SWIZZLED) || (texture_type == SCE_GXM_TEXTURE_CUBE) || (texture_type == SCE_GXM_TEXTURE_SWIZZLED_ARBITRARY) || (texture_type == SCE_GXM_TEXTURE_CUBE_ARBITRARY);
uint32_t mip_index = 0;
uint32_t total_mip = get_upload_mip(gxm_texture.true_mip_count(), width, height);
uint32_t face_uploaded_count = 0;
uint32_t face_total_count;
uint32_t total_source_so_far = 0;
// Modified during decompression
const uint32_t org_width = width;
const uint32_t org_height = height;
uint32_t face_align_bytes = 4;
// > 0 means texture cube
int upload_type = 0;
face_total_count = 1;
if (texture_type == SCE_GXM_TEXTURE_CUBE || texture_type == SCE_GXM_TEXTURE_CUBE_ARBITRARY) {
upload_type = 1;
face_total_count = 6;
if (gxm_texture.mip_count != 0xF) {
const bool twok_align_cond1 = width >= 32 && height >= 32 && (bpp <= 8 || gxm::is_block_compressed_format(base_format));
const bool twok_align_cond2 = width >= 16 && height >= 16 && (bpp == 16 || bpp == 32);
const bool twok_align_cond3 = width >= 8 && height >= 8 && bpp == 64;
if (twok_align_cond1 || twok_align_cond2 || twok_align_cond3) {
face_align_bytes = 2048;
}
}
}
uint32_t layout_width;
uint32_t layout_height;
if (gxm_texture.mip_count == 0xF && texture_type == SCE_GXM_TEXTURE_LINEAR) {
// a mipcount of 0xF means no mips, so for cube and planes, they follow each other directly without padding
layout_width = width;
layout_height = height;
} else {
layout_width = next_power_of_two(width);
layout_height = next_power_of_two(height);
}
auto [block_width, block_height] = gxm::get_block_size(base_format);
// block size in bytes
const uint32_t block_size = (block_width * block_height * bpp) / 8;
// from the number of pixels in a mipmap, we can get the number of blocks by shifting to the right by block_shift
const uint32_t block_shift = std::bit_width(block_width * block_height) - 1;
uint32_t align_width = block_width;
uint32_t align_height = block_height;
if (texture_type == SCE_GXM_TEXTURE_LINEAR) {
align_width = std::max(align_width, 8U);
} else if (texture_type == SCE_GXM_TEXTURE_TILED) {
align_width = std::max(align_width, 32U);
align_height = std::max(align_height, 32U);
}
const uint32_t org_layout_width = layout_width;
const uint32_t org_layout_height = layout_height;
while (face_uploaded_count < face_total_count && org_width > 0 && org_height > 0) {
pixels = texture_data;
SceGxmTextureBaseFormat upload_format = base_format;
uint32_t memory_height = height;
// Get pixels per stride
pixels_per_stride = width;
switch (texture_type) {
case SCE_GXM_TEXTURE_SWIZZLED_ARBITRARY:
case SCE_GXM_TEXTURE_CUBE_ARBITRARY:
pixels_per_stride = next_power_of_two(width);
memory_height = next_power_of_two(height);
break;
case SCE_GXM_TEXTURE_LINEAR_STRIDED:
pixels_per_stride = gxm::get_stride_in_bytes(gxm_texture) / bytes_per_pixel;
if (base_format == SCE_GXM_TEXTURE_BASE_FORMAT_P4) // P4 textures are the only one not byte aligned, therefore bytes_per_pixel should be 0.5 and not 1, correct it here
pixels_per_stride *= 2;
break;
default:
break;
}
pixels_per_stride = align(pixels_per_stride, align_width);
memory_height = align(memory_height, align_height);
// perform all needed conversions (formats not supported by modern GPUs)
switch (base_format) {
case SCE_GXM_TEXTURE_BASE_FORMAT_P4:
case SCE_GXM_TEXTURE_BASE_FORMAT_P8:
texture_data_decompressed.resize(pixels_per_stride * memory_height * 4);
if (base_format == SCE_GXM_TEXTURE_BASE_FORMAT_P8) {
palette_texture_to_rgba_8(reinterpret_cast<uint32_t *>(texture_data_decompressed.data()),
static_cast<const uint8_t *>(pixels), pixels_per_stride, memory_height, get_texture_palette(gxm_texture, mem));
} else {
palette_texture_to_rgba_4(reinterpret_cast<uint32_t *>(texture_data_decompressed.data()),
static_cast<const uint8_t *>(pixels), pixels_per_stride, memory_height, get_texture_palette(gxm_texture, mem));
}
pixels = texture_data_decompressed.data();
bytes_per_pixel = 4;
bpp = 32;
upload_format = SCE_GXM_TEXTURE_BASE_FORMAT_U8U8U8U8;
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_PVRT2BPP:
case SCE_GXM_TEXTURE_BASE_FORMAT_PVRT4BPP:
case SCE_GXM_TEXTURE_BASE_FORMAT_PVRTII2BPP:
case SCE_GXM_TEXTURE_BASE_FORMAT_PVRTII4BPP:
if (!is_swizzled)
LOG_ERROR_ONCE("Unhandled non-swizzled PVRT format, please report it to the developers");
texture_data_decompressed.resize(pixels_per_stride * memory_height * 4);
// this actually also unswizzles the texture
decompress_compressed_texture(base_format, texture_data_decompressed.data(), pixels, pixels_per_stride, memory_height);
bytes_per_pixel = 4;
bpp = 32;
upload_format = SCE_GXM_TEXTURE_BASE_FORMAT_U8U8U8U8;
pixels = texture_data_decompressed.data();
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_U8U3U3U2:
// Convert U8U3U3U2 to U8U8U8U8
texture_data_decompressed.resize(pixels_per_stride * memory_height * 4);
convert_U8U3U3U2_to_U8U8U8U8(texture_data_decompressed.data(), pixels, pixels_per_stride, memory_height);
pixels = texture_data_decompressed.data();
upload_format = SCE_GXM_TEXTURE_BASE_FORMAT_U8U8U8U8;
bpp = 32;
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_SE5M9M9M9:
// this format is supported on all GPUs with vulkan
if (is_vulkan)
break;
texture_data_decompressed.resize(pixels_per_stride * memory_height * 6);
decompress_packed_float_e5m9m9m9(base_format, texture_data_decompressed.data(), pixels, width, memory_height);
pixels = texture_data_decompressed.data();
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_U2F10F10F10:
// don't change what openGL is doing (which is completely wrong)
if (!is_vulkan)
break;
texture_data_decompressed.resize(pixels_per_stride * memory_height * 8);
convert_u2f10f10f10_to_f16f16f16f16(texture_data_decompressed.data(), pixels, pixels_per_stride, memory_height, fmt);
pixels = texture_data_decompressed.data();
upload_format = SCE_GXM_TEXTURE_BASE_FORMAT_F16F16F16F16;
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_X8U24:
texture_data_decompressed.resize(pixels_per_stride * memory_height * 4);
if (is_vulkan) {
// d24_u8 or x8_d24 is not supported on all GPUs (thanks AMD)
convert_x8u24_to_f32(texture_data_decompressed.data(), pixels, pixels_per_stride, memory_height, fmt);
upload_format = SCE_GXM_TEXTURE_BASE_FORMAT_F32;
} else {
// X8 = [24-31], D24 = [0-23], technically this is GL_UNSIGNED_INT_24_8_REV which does not exist
// TODO: Requires shader to convert the normalized value read by GL to unsigned int. Just multiply by 2^24-1 when reading and you're done.
// TODO: this is wrong, the depth is in the upper or lower 24 bits according to the swizzle
convert_x8u24_to_u24x8(texture_data_decompressed.data(), pixels, pixels_per_stride, memory_height);
}
pixels = texture_data_decompressed.data();
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_F32M:
// Convert F32M to F32
texture_data_decompressed.resize(pixels_per_stride * memory_height * 4);
convert_f32m_to_f32(texture_data_decompressed.data(), pixels, pixels_per_stride, memory_height);
pixels = texture_data_decompressed.data();
upload_format = SCE_GXM_TEXTURE_BASE_FORMAT_F32;
break;
case SCE_GXM_TEXTURE_BASE_FORMAT_YUV420P2:
case SCE_GXM_TEXTURE_BASE_FORMAT_YUV420P3:
texture_data_decompressed.resize(pixels_per_stride * memory_height * 4);
yuv420_texture_to_rgb(texture_data_decompressed.data(),
static_cast<const uint8_t *>(pixels), pixels_per_stride, memory_height, layout_width, layout_height,
base_format == SCE_GXM_TEXTURE_BASE_FORMAT_YUV420P3);
pixels = texture_data_decompressed.data();
bpp = 32;
upload_format = SCE_GXM_TEXTURE_BASE_FORMAT_U8U8U8U8;
break;
default:
break;
}
if (texture_type != SCE_GXM_TEXTURE_LINEAR && texture_type != SCE_GXM_TEXTURE_LINEAR_STRIDED && !gxm::is_pvrt_format(base_format)) {
// Convert data to linear layout
texture_pixels_lineared.resize(pixels_per_stride * memory_height * bytes_per_pixel);
if (is_swizzled && gxm::is_bcn_format(base_format))
// just unswizzle the blocks
resolve_z_order_compressed_texture(base_format, texture_pixels_lineared.data(), pixels, pixels_per_stride, memory_height);
else if (is_swizzled)
swizzled_texture_to_linear_texture(texture_pixels_lineared.data(), static_cast<const uint8_t *>(pixels), pixels_per_stride, memory_height,
static_cast<std::uint8_t>(bpp));
else
tiled_texture_to_linear_texture(texture_pixels_lineared.data(), static_cast<const uint8_t *>(pixels), pixels_per_stride, memory_height,
static_cast<std::uint8_t>(bpp));
pixels = texture_pixels_lineared.data();
}
upload_texture_impl(upload_format, width, height, mip_index, pixels, upload_type, pixels_per_stride);
if (export_textures)
export_texture_impl(upload_format, width, height, mip_index, pixels, upload_type, pixels_per_stride);
const uint32_t nb_pixels = align(layout_width, align_width) * align(layout_height, align_height);
const uint32_t mip_size = (nb_pixels >> block_shift) * block_size;
texture_data += mip_size;
total_source_so_far += mip_size;
mip_index++;
width /= 2;
height /= 2;
layout_width /= 2;
layout_height /= 2;
if (mip_index == total_mip) {
if ((texture_type == SCE_GXM_TEXTURE_CUBE || texture_type == SCE_GXM_TEXTURE_CUBE_ARBITRARY) && gxm_texture.mip_count != 0xF) {
// we must do as if all possible mips are here
while (layout_width > 0 && layout_height > 0) {
const uint32_t nb_pixels = align(layout_width, align_width) * align(layout_height, align_height);
const uint32_t mip_size = (nb_pixels >> block_shift) * block_size;
texture_data += mip_size;
total_source_so_far += mip_size;
layout_width /= 2;
layout_height /= 2;
}
}
mip_index = 0;
face_uploaded_count++;
layout_width = org_layout_width;
layout_height = org_layout_height;
width = org_width;
height = org_height;
upload_type++;
uint32_t source_unaligned_size = total_source_so_far;
total_source_so_far = align(total_source_so_far, face_align_bytes);
texture_data += total_source_so_far - source_unaligned_size;
}
}
}
// remove everything related to the sampler state
static constexpr TextureGxmDataRepr default_texture_mask = {
0x981E0000,
0xFFFFFFFF,
0xFFFFFFFC,
0xF3FFFFFF
};
static constexpr TextureGxmDataRepr strided_texture_mask = {
0x9FFE0E06,
0xFFFFFFFF,
0xFFFFFFFC,
0xF3FFFFFF
};
void TextureCache::cache_and_bind_texture(const SceGxmTexture &gxm_texture, MemState &mem) {
R_PROFILE(__func__);
size_t index = 0;
bool configure = false;
bool upload = false;
// Try to find GXM texture in cache.
int cached_gxm_texture_index = -1;
TextureGxmDataRepr texture_repr = std::bit_cast<TextureGxmDataRepr>(gxm_texture);
if (use_sampler_cache) {
// remove the sampler state from the representation
const TextureGxmDataRepr &mask = (gxm_texture.texture_type() == SCE_GXM_TEXTURE_LINEAR_STRIDED) ? strided_texture_mask : default_texture_mask;
for (int i = 0; i < 4; i++)
texture_repr[i] &= mask[i];
}
auto gxm_it = texture_lookup.find(texture_repr);
if (gxm_it != texture_lookup.end())
// we found the texture in the cache
cached_gxm_texture_index = gxm_it->second->index;
Address range_protect_begin = 0;
Address range_protect_end = 0;
TextureCacheInfo *info;
if (cached_gxm_texture_index == -1) {
// Texture not found in cache.
// get the least recently used texture, which info_list_head points to
info = texture_queue.get_lru();
index = info->index;
if (info->texture_size > 0) {
// Cache is full.
LOG_WARN_ONCE("Texture cache is full. Starting to replace textures");
texture_lookup.erase(std::bit_cast<TextureGxmDataRepr>(info->texture));
}
texture_lookup[texture_repr] = info;
configure = true;
upload = true;
// only hash the first mips, assume no game would modify other mips (and faces) without modifying the first one
info->texture_size = gxm::texture_size_first_mip(gxm_texture);
// use the texture_repr representation, it contains everything we need and we can use it to erase the key
// from texture_lookup later
info->texture = std::bit_cast<SceGxmTexture>(texture_repr);
// To prevent protecting too commonly accessed data that belongs to the page where the texture also resides
// (for example, uniform buffer value and texture data got mixed, so page faults are triggered too many, it's not always good).
// This works under the assumption that once this big enough texture decided to modify. It will have to modify either all of its data,
// or replace with an entire new texture.
bool should_use_hash = true;
if (use_protect && info->texture_size >= mem.page_size * 4) {
range_protect_begin = align(gxm_texture.data_addr << 2, mem.page_size);
range_protect_end = align_down((gxm_texture.data_addr << 2) + info->texture_size, mem.page_size);
if (range_protect_end - range_protect_begin >= mem.page_size * 4) {
should_use_hash = false;
}
}
info->use_hash = should_use_hash;
if (info->use_hash) {
if (import_textures || export_textures)
info->hash = hash_texture_nostride(gxm_texture, mem);
else
// the xor 1 is to make sure it won't be the same as hash_texture_nostride
info->hash = hash_texture_data(gxm_texture, info->texture_size, mem) ^ 1;
}
} else {
// Texture is cached.
index = cached_gxm_texture_index;
info = gxm_it->second;
configure = false;
if (info->use_hash) {
const uint64_t previous_hash = info->hash;
if (import_textures || export_textures)
info->hash = hash_texture_nostride(gxm_texture, mem);
else
info->hash = hash_texture_data(gxm_texture, info->texture_size, mem) ^ 1;
upload = previous_hash != info->hash;
} else {
upload = info->dirty;
}
}
current_info = info;
if (gxm_texture.data_addr == 0) {
upload = false;
}
if (upload && !info->use_hash && (import_textures || export_textures)) {
// we still need to get a hash of the texture
info->hash = hash_texture_nostride(gxm_texture, mem);
}
importing_texture = false;
// to restore the state, in case for whatever reason we could not load the replacement texture
bool previous_configure = configure;
if (upload && import_textures) {
auto it = available_textures_hash.find(info->hash);
if (it != available_textures_hash.end()) {
importing_texture = true;
loading_texture = it->second;
// always configure for replacement texture (although it may have no effect)
// the reason being that we may have two replacement textures for the same gxm identifier
// with different dimensions, so we can't assume
configure = true;
}
}
if (upload && !importing_texture && info->is_imported)
configure = true;
select(index, gxm_texture);
if (configure) {
bool need_configure = true;
if (importing_texture)
need_configure = !import_configure_texture();
if (need_configure) {
configure_texture(gxm_texture);
importing_texture = false;
info->is_imported = false;
}
}
if (upload) {
if (export_textures && !importing_texture)
export_select(gxm_texture);
if (importing_texture)
import_upload_texture();
else
upload_texture(gxm_texture, mem);
if (!info->use_hash) {
info->dirty = false;
add_protect(mem, range_protect_begin, range_protect_end - range_protect_begin, MemPerm::ReadOnly, [info, gxm_texture](Address, bool) {
if (memcmp(&info->texture, &gxm_texture, sizeof(SceGxmTexture)) == 0) {
info->dirty = true;
}
return true;
});
}
upload_done();
if (export_textures && !importing_texture)
export_done();
if (importing_texture)
import_done();
}
importing_texture = false;
// set the texture as the mru
texture_queue.set_as_mru(info);
// retrieve the appropriate sampler if needed
if (use_sampler_cache)
cache_and_bind_sampler(gxm_texture);
}
int TextureCache::cache_and_bind_sampler(const SceGxmTexture &gxm_texture) {
uint32_t compact_repr = 0;
if (gxm_texture.texture_type() != SCE_GXM_TEXTURE_LINEAR_STRIDED) {
compact_repr = 0b01
| (gxm_texture.vaddr_mode << 2)
| (gxm_texture.uaddr_mode << 5)
| (gxm_texture.mip_filter << 8)
| (gxm_texture.min_filter << 9)
| (gxm_texture.mag_filter << 11)
| (gxm_texture.lod_bias << 13)
| (gxm_texture.lod_min0 << 19)
| (gxm_texture.lod_min1 << 21);
} else {
// has a special representation
compact_repr = 0b11
| (gxm_texture.vaddr_mode << 2)
| (gxm_texture.uaddr_mode << 5)
| (gxm_texture.mag_filter << 8);
}
auto it = sampler_lookup.find(compact_repr);
if (it != sampler_lookup.end()) {
sampler_queue.set_as_mru(it->second);
last_bound_sampler_index = it->second->index;
return last_bound_sampler_index;
}
// we didn't find a matching sampler, create a new one
SamplerCacheInfo *info = sampler_queue.get_lru();
if (info->value != 0) {
// the compact representation can never be 0, so we can erase the previous value
sampler_lookup.erase(info->value);
}
sampler_queue.set_as_mru(info);
sampler_lookup[compact_repr] = info;
info->value = compact_repr;
configure_sampler(info->index, gxm_texture);
last_bound_sampler_index = info->index;
return last_bound_sampler_index;
}
} // namespace renderer