mirror of
https://github.com/decaf-emu/decaf-emu.git
synced 2024-05-23 14:47:23 -04:00
350 lines
13 KiB
C++
350 lines
13 KiB
C++
#ifdef DECAF_VULKAN
|
|
|
|
#include "tiling_tests.h"
|
|
#include "addrlib_helpers.h"
|
|
#include "test_helpers.h"
|
|
|
|
#include <common/align.h>
|
|
#include <libgpu/gpu7_tiling_vulkan.h>
|
|
#include <vector>
|
|
|
|
#include "vulkan_helpers.h"
|
|
|
|
static gpu7::tiling::vulkan::Retiler gVkRetiler;
|
|
|
|
static inline void
|
|
compareTilingToAddrLib(const gpu7::tiling::SurfaceDescription& desc,
|
|
std::vector<uint8_t>& input,
|
|
uint32_t firstSlice, uint32_t numSlices)
|
|
{
|
|
// Set up AddrLib to generate data to test against
|
|
auto addrLib = AddrLib { };
|
|
|
|
auto alibUntiled = std::vector<uint8_t> { };
|
|
auto gpu7Untiled = std::vector<uint8_t> { };
|
|
auto alibTiled = std::vector<uint8_t> { };
|
|
auto gpu7Tiled = std::vector<uint8_t> { };
|
|
|
|
// Compute surface info
|
|
auto alibInfo = addrLib.computeSurfaceInfo(desc, 0, 0);
|
|
auto gpu7Info = gpu7::tiling::computeSurfaceInfo(desc, 0);
|
|
|
|
REQUIRE(gpu7Info.surfSize == alibInfo.surfSize);
|
|
REQUIRE(input.size() >= gpu7Info.surfSize);
|
|
|
|
alibUntiled.resize(alibInfo.surfSize);
|
|
alibTiled.resize(alibInfo.surfSize);
|
|
gpu7Untiled.resize(gpu7Info.surfSize);
|
|
gpu7Tiled.resize(gpu7Info.surfSize);
|
|
|
|
// AddrLib
|
|
{
|
|
addrLib.untileSlices(desc, 0, input.data(), alibUntiled.data(), firstSlice, numSlices);
|
|
addrLib.tileSlices(desc, 0, alibUntiled.data(), alibTiled.data(), firstSlice, numSlices);
|
|
}
|
|
|
|
// Get the sizes we will use for our buffers. We oversize the work
|
|
// buffers to avoid errors causing buffer overruns which crash my GPU.
|
|
auto surfSize = gpu7Info.surfSize;
|
|
auto uploadSize = gpu7Info.surfSize;
|
|
auto workSize = gpu7Info.surfSize * 2;
|
|
|
|
// Create input/output buffers
|
|
auto uploadBuffer = allocateSsboBuffer(uploadSize, SsboBufferUsage::CpuToGpu);
|
|
auto inputBuffer = allocateSsboBuffer(workSize, SsboBufferUsage::Gpu);
|
|
auto untiledOutputBuffer = allocateSsboBuffer(workSize, SsboBufferUsage::Gpu);
|
|
auto tiledOutputBuffer = allocateSsboBuffer(workSize, SsboBufferUsage::Gpu);
|
|
auto untiledDownloadBuffer = allocateSsboBuffer(surfSize, SsboBufferUsage::CpuToGpu);
|
|
auto tiledDownloadBuffer = allocateSsboBuffer(surfSize, SsboBufferUsage::CpuToGpu);
|
|
|
|
// Upload to the upload buffer
|
|
uploadSsboBuffer(uploadBuffer, input.data(), uploadSize);
|
|
|
|
{
|
|
// Allocate a command buffer and fence
|
|
auto cmdBuffer = allocSyncCmdBuffer();
|
|
beginSyncCmdBuffer(cmdBuffer);
|
|
|
|
// Barrier our host writes the transfer reads
|
|
globalVkMemoryBarrier(cmdBuffer.cmds, vk::AccessFlagBits::eHostWrite, vk::AccessFlagBits::eTransferRead);
|
|
|
|
// Clear the input/output buffers to a known value so its obvious if something goes wrong
|
|
cmdBuffer.cmds.fillBuffer(inputBuffer.buffer, 0, surfSize, 0xffffffff);
|
|
cmdBuffer.cmds.fillBuffer(inputBuffer.buffer, surfSize, workSize - surfSize, 0xfefefefe);
|
|
cmdBuffer.cmds.fillBuffer(untiledOutputBuffer.buffer, 0, surfSize, 0x00000000);
|
|
cmdBuffer.cmds.fillBuffer(untiledOutputBuffer.buffer, surfSize, workSize - surfSize, 0x01010101);
|
|
cmdBuffer.cmds.fillBuffer(tiledOutputBuffer.buffer, 0, surfSize, 0x00000000);
|
|
cmdBuffer.cmds.fillBuffer(tiledOutputBuffer.buffer, surfSize, workSize - surfSize, 0x01010101);
|
|
|
|
// Copy the data
|
|
cmdBuffer.cmds.copyBuffer(uploadBuffer.buffer, inputBuffer.buffer, { vk::BufferCopy(0, 0, uploadSize) });
|
|
|
|
// Barrier our transfers to the shader reads
|
|
globalVkMemoryBarrier(cmdBuffer.cmds, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead);
|
|
|
|
// Dispatch the actual retile
|
|
auto retileInfo = gpu7::tiling::computeRetileInfo(gpu7Info);
|
|
|
|
auto tiledFirstSliceIndex = align_down(firstSlice, retileInfo.microTileThickness);
|
|
auto tiledSliceOffset = tiledFirstSliceIndex * retileInfo.thinSliceBytes;
|
|
auto untiledSliceOffset = firstSlice * retileInfo.thinSliceBytes;
|
|
|
|
auto untileHandle = gVkRetiler.untile(retileInfo,
|
|
cmdBuffer.cmds,
|
|
untiledOutputBuffer.buffer, untiledSliceOffset,
|
|
inputBuffer.buffer, tiledSliceOffset,
|
|
firstSlice, numSlices);
|
|
|
|
// Barrier between these to force the pipeline flush
|
|
globalVkMemoryBarrier(cmdBuffer.cmds, vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);
|
|
|
|
auto tileHandle = gVkRetiler.tile(retileInfo,
|
|
cmdBuffer.cmds,
|
|
tiledOutputBuffer.buffer, tiledSliceOffset,
|
|
untiledOutputBuffer.buffer, untiledSliceOffset,
|
|
firstSlice, numSlices);
|
|
|
|
// Put a barrier from the shader writes to the transfers
|
|
globalVkMemoryBarrier(cmdBuffer.cmds, vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eTransferRead);
|
|
|
|
// Copy the output buffer to the download buffer
|
|
cmdBuffer.cmds.copyBuffer(untiledOutputBuffer.buffer, untiledDownloadBuffer.buffer, { vk::BufferCopy(0, 0, surfSize) });
|
|
cmdBuffer.cmds.copyBuffer(tiledOutputBuffer.buffer, tiledDownloadBuffer.buffer, { vk::BufferCopy(0, 0, surfSize) });
|
|
|
|
// Put a barrier from the transfer writes to the host reads
|
|
globalVkMemoryBarrier(cmdBuffer.cmds, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eHostRead);
|
|
|
|
// End, execute and free this buffer
|
|
endSyncCmdBuffer(cmdBuffer);
|
|
execSyncCmdBuffer(cmdBuffer);
|
|
freeSyncCmdBuffer(cmdBuffer);
|
|
|
|
// Free the retiler resources we used
|
|
gVkRetiler.releaseHandle(untileHandle);
|
|
gVkRetiler.releaseHandle(tileHandle);
|
|
}
|
|
|
|
// Capture the retiled data from the GPU
|
|
downloadSsboBuffer(untiledDownloadBuffer, gpu7Untiled.data(), surfSize);
|
|
downloadSsboBuffer(tiledDownloadBuffer, gpu7Tiled.data(), surfSize);
|
|
|
|
// Free the buffers associated with this
|
|
freeSsboBuffer(uploadBuffer);
|
|
freeSsboBuffer(inputBuffer);
|
|
freeSsboBuffer(untiledOutputBuffer);
|
|
freeSsboBuffer(tiledOutputBuffer);
|
|
freeSsboBuffer(untiledDownloadBuffer);
|
|
freeSsboBuffer(tiledDownloadBuffer);
|
|
|
|
// Compare that the images match
|
|
CHECK(compareImages(gpu7Untiled, alibUntiled));
|
|
CHECK(compareImages(gpu7Tiled, alibTiled));
|
|
}
|
|
|
|
TEST_CASE("vkTiling")
|
|
{
|
|
for (auto& layout : sTestLayout) {
|
|
SECTION(fmt::format("{}x{}x{} s{}n{}",
|
|
layout.width, layout.height, layout.depth,
|
|
layout.testFirstSlice, layout.testNumSlices))
|
|
{
|
|
for (auto& mode : sTestTilingMode) {
|
|
SECTION(fmt::format("{}", tileModeToString(mode.tileMode)))
|
|
{
|
|
for (auto& format : sTestFormats) {
|
|
SECTION(fmt::format("{}bpp{}", format.bpp, format.depth ? " depth" : ""))
|
|
{
|
|
auto surface = gpu7::tiling::SurfaceDescription { };
|
|
surface.tileMode = mode.tileMode;
|
|
surface.format = format.format;
|
|
surface.bpp = format.bpp;
|
|
surface.width = layout.width;
|
|
surface.height = layout.height;
|
|
surface.numSlices = layout.depth;
|
|
surface.numSamples = 1u;
|
|
surface.numLevels = 1u;
|
|
surface.bankSwizzle = 0u;
|
|
surface.pipeSwizzle = 0u;
|
|
surface.dim = gpu7::tiling::SurfaceDim::Texture2DArray;
|
|
surface.use = format.depth ?
|
|
gpu7::tiling::SurfaceUse::DepthBuffer :
|
|
gpu7::tiling::SurfaceUse::None;
|
|
|
|
compareTilingToAddrLib(surface,
|
|
sRandomData,
|
|
layout.testFirstSlice,
|
|
layout.testNumSlices);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
struct PendingVkPerfEntry
|
|
{
|
|
gpu7::tiling::SurfaceDescription desc;
|
|
uint32_t firstSlice;
|
|
uint32_t numSlices;
|
|
|
|
gpu7::tiling::SurfaceInfo info;
|
|
SsboBuffer uploadBuffer;
|
|
SsboBuffer inputBuffer;
|
|
SsboBuffer outputBuffer;
|
|
|
|
gpu7::tiling::vulkan::RetileHandle handle;
|
|
};
|
|
|
|
TEST_CASE("vkTilingPerf", "[!benchmark]")
|
|
{
|
|
// Set up AddrLib to generate data to test against
|
|
auto addrLib = AddrLib { };
|
|
|
|
// Get some random data to use
|
|
auto& untiled = sRandomData;
|
|
|
|
// Some place to store pending tests
|
|
std::vector<PendingVkPerfEntry> pendingTests;
|
|
|
|
// Generate all the test cases to run
|
|
auto& layout = sPerfTestLayout;
|
|
for (auto& mode : sTestTilingMode) {
|
|
for (auto& format : sTestFormats) {
|
|
auto surface = gpu7::tiling::SurfaceDescription { };
|
|
surface.tileMode = mode.tileMode;
|
|
surface.format = format.format;
|
|
surface.bpp = format.bpp;
|
|
surface.width = layout.width;
|
|
surface.height = layout.height;
|
|
surface.numSlices = layout.depth;
|
|
surface.numSamples = 1u;
|
|
surface.numLevels = 1u;
|
|
surface.bankSwizzle = 0u;
|
|
surface.pipeSwizzle = 0u;
|
|
surface.dim = gpu7::tiling::SurfaceDim::Texture2DArray;
|
|
surface.use = format.depth ?
|
|
gpu7::tiling::SurfaceUse::DepthBuffer :
|
|
gpu7::tiling::SurfaceUse::None;
|
|
|
|
PendingVkPerfEntry test;
|
|
test.desc = surface;
|
|
test.firstSlice = layout.testFirstSlice;
|
|
test.numSlices = layout.testNumSlices;
|
|
pendingTests.push_back(test);
|
|
}
|
|
}
|
|
|
|
// Set up all the tests
|
|
for (auto& test : pendingTests) {
|
|
// Compute some needed surface information
|
|
auto info = gpu7::tiling::computeSurfaceInfo(test.desc, 0);
|
|
auto surfSize = static_cast<uint32_t>(info.surfSize);
|
|
|
|
// Make sure our test data is big enough
|
|
REQUIRE(untiled.size() >= info.surfSize);
|
|
|
|
// Create input/output buffers
|
|
auto uploadBuffer = allocateSsboBuffer(surfSize, SsboBufferUsage::CpuToGpu);
|
|
auto inputBuffer = allocateSsboBuffer(surfSize, SsboBufferUsage::Gpu);
|
|
auto outputBuffer = allocateSsboBuffer(surfSize, SsboBufferUsage::Gpu);
|
|
|
|
// Upload to the upload buffer
|
|
uploadSsboBuffer(uploadBuffer, untiled.data(), surfSize);
|
|
|
|
// Store the state between setup loops
|
|
test.info = info;
|
|
test.uploadBuffer = uploadBuffer;
|
|
test.inputBuffer = inputBuffer;
|
|
test.outputBuffer = outputBuffer;
|
|
}
|
|
|
|
// Copy our uploaded data to the input buffers
|
|
{
|
|
auto cmdBuffer = allocSyncCmdBuffer();
|
|
beginSyncCmdBuffer(cmdBuffer);
|
|
|
|
for (auto& test : pendingTests) {
|
|
auto surfSize = static_cast<uint32_t>(test.info.surfSize);
|
|
|
|
// Barrier our host writes the transfer reads
|
|
globalVkMemoryBarrier(cmdBuffer.cmds, vk::AccessFlagBits::eHostWrite, vk::AccessFlagBits::eTransferRead);
|
|
|
|
// Set up the input/output buffers on the GPU
|
|
cmdBuffer.cmds.copyBuffer(test.uploadBuffer.buffer,
|
|
test.inputBuffer.buffer,
|
|
{ vk::BufferCopy(0, 0, surfSize) });
|
|
|
|
// Barrier our transfers to the shader reads
|
|
globalVkMemoryBarrier(cmdBuffer.cmds, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead);
|
|
}
|
|
|
|
endSyncCmdBuffer(cmdBuffer);
|
|
execSyncCmdBuffer(cmdBuffer);
|
|
freeSyncCmdBuffer(cmdBuffer);
|
|
}
|
|
|
|
// Run the retiles
|
|
{
|
|
auto cmdBuffer = allocSyncCmdBuffer();
|
|
beginSyncCmdBuffer(cmdBuffer);
|
|
|
|
for (auto& test : pendingTests) {
|
|
// Calculate data on how to retile
|
|
auto retileInfo = gpu7::tiling::computeRetileInfo(test.info);
|
|
|
|
auto tiledFirstSliceIndex = align_down(test.firstSlice, retileInfo.microTileThickness);
|
|
auto tiledSliceOffset = tiledFirstSliceIndex * retileInfo.thinSliceBytes;
|
|
auto untiledSliceOffset = test.firstSlice * retileInfo.thinSliceBytes;
|
|
|
|
// Dispatch the actual retile
|
|
auto handle = gVkRetiler.untile(retileInfo,
|
|
cmdBuffer.cmds,
|
|
test.outputBuffer.buffer, untiledSliceOffset,
|
|
test.inputBuffer.buffer, tiledSliceOffset,
|
|
test.firstSlice, test.numSlices);
|
|
|
|
// Save some information for freeing later
|
|
test.handle = handle;
|
|
}
|
|
|
|
endSyncCmdBuffer(cmdBuffer);
|
|
|
|
BENCHMARK(fmt::format("processing ({} retiles)", pendingTests.size()))
|
|
{
|
|
execSyncCmdBuffer(cmdBuffer);
|
|
};
|
|
|
|
freeSyncCmdBuffer(cmdBuffer);
|
|
}
|
|
|
|
// Clean up all our resources used...
|
|
for (auto& test : pendingTests) {
|
|
// Free the retiler resources we used
|
|
gVkRetiler.releaseHandle(test.handle);
|
|
|
|
// Free the buffers we allocated
|
|
freeSsboBuffer(test.uploadBuffer);
|
|
freeSsboBuffer(test.inputBuffer);
|
|
freeSsboBuffer(test.outputBuffer);
|
|
}
|
|
}
|
|
|
|
bool vulkanBeforeStart()
|
|
{
|
|
if (!initialiseVulkan()) {
|
|
return false;
|
|
}
|
|
|
|
// Initialize our retiler
|
|
gVkRetiler.initialise(gDevice);
|
|
return true;
|
|
}
|
|
|
|
bool vulkanAfterComplete()
|
|
{
|
|
return shutdownVulkan();
|
|
}
|
|
|
|
#endif // DECAF_VULKAN
|