mirror of
https://github.com/xenia-project/xenia.git
synced 2024-06-11 17:17:30 -04:00
Compare commits
8 commits
11f017991a
...
d81a38f618
Author | SHA1 | Date | |
---|---|---|---|
d81a38f618 | |||
3189a0e259 | |||
a3304d252f | |||
f964290ea8 | |||
376bad5056 | |||
f0ad4f4587 | |||
add1b008e2 | |||
cec5fcfd06 |
|
@ -921,7 +921,7 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
|
|||
auto in = reinterpret_cast<const float*>(samples[j]);
|
||||
|
||||
// Raw samples sometimes aren't within [-1, 1]
|
||||
float scaled_sample = xe::saturate_signed(in[i]) * scale;
|
||||
float scaled_sample = xe::clamp_float(in[i], -1.0f, 1.0f) * scale;
|
||||
|
||||
// Convert the sample and output it in big endian.
|
||||
auto sample = static_cast<int16_t>(scaled_sample);
|
||||
|
|
|
@ -56,6 +56,9 @@ class PosixMappedMemory : public MappedMemory {
|
|||
map_length = size_t(file_stat.st_size);
|
||||
}
|
||||
|
||||
// Ensure that the file is large enough.
|
||||
ftruncate(file_descriptor, map_length);
|
||||
|
||||
void* data =
|
||||
mmap(0, map_length, protection, MAP_SHARED, file_descriptor, offset);
|
||||
if (!data) {
|
||||
|
|
|
@ -60,20 +60,22 @@ constexpr T round_up(T value, V multiple, bool force_non_zero = true) {
|
|||
return (value + multiple - 1) / multiple * multiple;
|
||||
}
|
||||
|
||||
// Using the same conventions as in shading languages, returning 0 for NaN.
|
||||
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument is
|
||||
// always returned. Also -0 is not < +0, so +0 is also chosen for it.
|
||||
// For NaN, returns min_value (or, if it's NaN too, max_value).
|
||||
// If either of the boundaries is zero, and if the value is at that boundary or
|
||||
// exceeds it, the result will have the sign of that boundary. If both
|
||||
// boundaries are zero, which sign is selected among the argument signs is not
|
||||
// explicitly defined.
|
||||
template <typename T>
|
||||
constexpr T saturate_unsigned(T value) {
|
||||
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(0.0f), value));
|
||||
T clamp_float(T value, T min_value, T max_value) {
|
||||
float clamped_to_min = std::isgreater(value, min_value) ? value : min_value;
|
||||
return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value;
|
||||
}
|
||||
|
||||
// This diverges from the GPU NaN rules for signed normalized formats (NaN
|
||||
// should be converted to 0, not to -1), but this expectation is not needed most
|
||||
// of time, and cannot be met for free (unlike for 0...1 clamping).
|
||||
// Using the same conventions as in shading languages, returning 0 for NaN.
|
||||
// 0 is always returned as positive.
|
||||
template <typename T>
|
||||
constexpr T saturate_signed(T value) {
|
||||
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(-1.0f), value));
|
||||
T saturate(T value) {
|
||||
return clamp_float(value, static_cast<T>(0.0f), static_cast<T>(1.0f));
|
||||
}
|
||||
|
||||
// Gets the next power of two value that is greater than or equal to the given
|
||||
|
@ -330,12 +332,6 @@ inline uint64_t rotate_left(uint64_t v, uint8_t sh) {
|
|||
}
|
||||
#endif // XE_PLATFORM_WIN32
|
||||
|
||||
template <typename T>
|
||||
T clamp(T value, T min_value, T max_value) {
|
||||
const T t = value < min_value ? min_value : value;
|
||||
return t > max_value ? max_value : t;
|
||||
}
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
// Utilities for SSE values.
|
||||
template <int N>
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <functional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/byte_order.h"
|
||||
|
@ -24,6 +25,30 @@
|
|||
namespace xe {
|
||||
namespace memory {
|
||||
|
||||
// For variable declarations (not return values or `this` pointer).
|
||||
// Not propagated.
|
||||
#define XE_RESTRICT_VAR __restrict
|
||||
|
||||
// Aliasing-safe bit reinterpretation.
|
||||
// For more complex cases such as non-trivially-copyable types, write copying
|
||||
// code respecting the requirements for them externally instead of using these
|
||||
// functions.
|
||||
|
||||
template <typename Dst, typename Src>
|
||||
void Reinterpret(Dst& XE_RESTRICT_VAR dst, const Src& XE_RESTRICT_VAR src) {
|
||||
static_assert(sizeof(Dst) == sizeof(Src));
|
||||
static_assert(std::is_trivially_copyable_v<Dst>);
|
||||
static_assert(std::is_trivially_copyable_v<Src>);
|
||||
std::memcpy(&dst, &src, sizeof(Dst));
|
||||
}
|
||||
|
||||
template <typename Dst, typename Src>
|
||||
Dst Reinterpret(const Src& XE_RESTRICT_VAR src) {
|
||||
Dst dst;
|
||||
Reinterpret(dst, src);
|
||||
return dst;
|
||||
}
|
||||
|
||||
#if XE_PLATFORM_ANDROID
|
||||
void AndroidInitialize();
|
||||
void AndroidShutdown();
|
||||
|
|
|
@ -107,10 +107,11 @@ TEST_CASE("WinSystemClock <-> XSystemClock", "[clock_cast]") {
|
|||
auto error2 = xsys.time_since_epoch() - wxsys.time_since_epoch();
|
||||
auto error3 = wsys - wxsys;
|
||||
|
||||
REQUIRE(error1 < 10ms);
|
||||
REQUIRE(error1 > -10ms);
|
||||
REQUIRE(error2 < 10ms);
|
||||
REQUIRE(error2 > -10ms);
|
||||
// In AppVeyor, the difference often can be as large as roughly 16ms.
|
||||
REQUIRE(error1 < 20ms);
|
||||
REQUIRE(error1 > -20ms);
|
||||
REQUIRE(error2 < 20ms);
|
||||
REQUIRE(error2 > -20ms);
|
||||
REQUIRE(error3 < duration);
|
||||
REQUIRE(error3 > -duration);
|
||||
}
|
||||
|
|
|
@ -418,6 +418,7 @@ X64ThunkEmitter::X64ThunkEmitter(X64Backend* backend, XbyakAllocator* allocator)
|
|||
X64ThunkEmitter::~X64ThunkEmitter() {}
|
||||
|
||||
HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
|
||||
#if XE_PLATFORM_WIN32
|
||||
// rcx = target
|
||||
// rdx = arg0 (context)
|
||||
// r8 = arg1 (guest return address)
|
||||
|
@ -460,6 +461,53 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
|
|||
mov(rdx, qword[rsp + 8 * 2]);
|
||||
mov(r8, qword[rsp + 8 * 3]);
|
||||
ret();
|
||||
#elif XE_PLATFORM_LINUX || XE_PLATFORM_MAC
|
||||
// System-V ABI args:
|
||||
// rdi = target
|
||||
// rsi = arg0 (context)
|
||||
// rdx = arg1 (guest return address)
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
size_t prolog_stack_alloc;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
} code_offsets = {};
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
|
||||
code_offsets.prolog = getSize();
|
||||
|
||||
// rsp + 0 = return address
|
||||
// mov(qword[rsp + 8 * 3], rdx);
|
||||
// mov(qword[rsp + 8 * 2], rsi);
|
||||
// mov(qword[rsp + 8 * 1], rdi);
|
||||
sub(rsp, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = getSize();
|
||||
code_offsets.body = getSize();
|
||||
|
||||
// Save nonvolatile registers.
|
||||
EmitSaveNonvolatileRegs();
|
||||
|
||||
mov(rax, rdi);
|
||||
// mov(rsi, rsi); // context
|
||||
mov(rcx, rdx); // return address
|
||||
call(rax);
|
||||
|
||||
EmitLoadNonvolatileRegs();
|
||||
|
||||
code_offsets.epilog = getSize();
|
||||
|
||||
add(rsp, stack_size);
|
||||
// mov(rdi, qword[rsp + 8 * 1]);
|
||||
// mov(rsi, qword[rsp + 8 * 2]);
|
||||
// mov(rdx, qword[rsp + 8 * 3]);
|
||||
ret();
|
||||
#else
|
||||
assert_always("Unknown platform ABI in host to guest thunk!");
|
||||
#endif
|
||||
|
||||
code_offsets.tail = getSize();
|
||||
|
||||
|
@ -479,6 +527,7 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
|
|||
}
|
||||
|
||||
GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
||||
#if XE_PLATFORM_WIN32
|
||||
// rcx = target function
|
||||
// rdx = arg0
|
||||
// r8 = arg1
|
||||
|
@ -515,6 +564,57 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
|
||||
add(rsp, stack_size);
|
||||
ret();
|
||||
#elif XE_PLATFORM_LINUX || XE_PLATFORM_MAC
|
||||
// This function is being called using the Microsoft ABI from CallNative
|
||||
// rcx = target function
|
||||
// rdx = arg0
|
||||
// r8 = arg1
|
||||
// r9 = arg2
|
||||
|
||||
// Must be translated to System-V ABI:
|
||||
// rdi = target function
|
||||
// rsi = arg0
|
||||
// rdx = arg1
|
||||
// rcx = arg2
|
||||
// r8, r9 - unused argument registers
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
size_t prolog_stack_alloc;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
} code_offsets = {};
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
|
||||
code_offsets.prolog = getSize();
|
||||
|
||||
// rsp + 0 = return address
|
||||
sub(rsp, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = getSize();
|
||||
code_offsets.body = getSize();
|
||||
|
||||
// Save off volatile registers.
|
||||
EmitSaveVolatileRegs();
|
||||
|
||||
mov(rax, rcx); // function
|
||||
mov(rdi, GetContextReg()); // context
|
||||
mov(rsi, rdx); // arg0
|
||||
mov(rdx, r8); // arg1
|
||||
mov(rcx, r9); // arg2
|
||||
call(rax);
|
||||
|
||||
EmitLoadVolatileRegs();
|
||||
|
||||
code_offsets.epilog = getSize();
|
||||
|
||||
add(rsp, stack_size);
|
||||
ret();
|
||||
#else
|
||||
assert_always("Unknown platform ABI in guest to host thunk!")
|
||||
#endif
|
||||
|
||||
code_offsets.tail = getSize();
|
||||
|
||||
|
@ -537,6 +637,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
uint64_t ResolveFunction(void* raw_context, uint64_t target_address);
|
||||
|
||||
ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
|
||||
#if XE_PLATFORM_WIN32
|
||||
// ebx = target PPC address
|
||||
// rcx = context
|
||||
|
||||
|
@ -572,6 +673,49 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
|
|||
|
||||
add(rsp, stack_size);
|
||||
jmp(rax);
|
||||
#elif XE_PLATFORM_LINUX || XE_PLATFORM_MAC
|
||||
// Function is called with the following params:
|
||||
// ebx = target PPC address
|
||||
// rsi = context
|
||||
|
||||
// System-V ABI args:
|
||||
// rdi = context
|
||||
// rsi = target PPC address
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
size_t prolog_stack_alloc;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
} code_offsets = {};
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
|
||||
code_offsets.prolog = getSize();
|
||||
|
||||
// rsp + 0 = return address
|
||||
sub(rsp, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = getSize();
|
||||
code_offsets.body = getSize();
|
||||
|
||||
// Save volatile registers
|
||||
EmitSaveVolatileRegs();
|
||||
mov(rdi, rsi); // context
|
||||
mov(rsi, rbx); // target PPC address
|
||||
mov(rax, reinterpret_cast<uint64_t>(&ResolveFunction));
|
||||
call(rax);
|
||||
|
||||
EmitLoadVolatileRegs();
|
||||
|
||||
code_offsets.epilog = getSize();
|
||||
|
||||
add(rsp, stack_size);
|
||||
jmp(rax);
|
||||
#else
|
||||
assert_always("Unknown platform ABI in resolve function!");
|
||||
#endif
|
||||
|
||||
code_offsets.tail = getSize();
|
||||
|
||||
|
|
|
@ -182,7 +182,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
|
|||
ImVec2(kSplitterWidth, top_panes_height));
|
||||
if (ImGui::IsItemActive()) {
|
||||
function_pane_width += io.MouseDelta.x;
|
||||
function_pane_width = xe::clamp(function_pane_width, 30.0f, FLT_MAX);
|
||||
function_pane_width = xe::clamp_float(function_pane_width, 30.0f, FLT_MAX);
|
||||
}
|
||||
ImGui::SameLine();
|
||||
ImGui::BeginChild("##source_pane",
|
||||
|
@ -194,7 +194,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
|
|||
ImVec2(kSplitterWidth, top_panes_height));
|
||||
if (ImGui::IsItemActive()) {
|
||||
source_pane_width += io.MouseDelta.x;
|
||||
source_pane_width = xe::clamp(source_pane_width, 30.0f, FLT_MAX);
|
||||
source_pane_width = xe::clamp_float(source_pane_width, 30.0f, FLT_MAX);
|
||||
}
|
||||
ImGui::SameLine();
|
||||
ImGui::BeginChild("##registers_pane",
|
||||
|
@ -206,7 +206,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
|
|||
ImVec2(kSplitterWidth, top_panes_height));
|
||||
if (ImGui::IsItemActive()) {
|
||||
registers_pane_width += io.MouseDelta.x;
|
||||
registers_pane_width = xe::clamp(registers_pane_width, 30.0f, FLT_MAX);
|
||||
registers_pane_width =
|
||||
xe::clamp_float(registers_pane_width, 30.0f, FLT_MAX);
|
||||
}
|
||||
ImGui::SameLine();
|
||||
ImGui::BeginChild("##right_pane", ImVec2(0, top_panes_height), true);
|
||||
|
@ -234,7 +235,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
|
|||
ImGui::InvisibleButton("##hsplitter0", ImVec2(-1, kSplitterWidth));
|
||||
if (ImGui::IsItemActive()) {
|
||||
bottom_panes_height -= io.MouseDelta.y;
|
||||
bottom_panes_height = xe::clamp(bottom_panes_height, 30.0f, FLT_MAX);
|
||||
bottom_panes_height = xe::clamp_float(bottom_panes_height, 30.0f, FLT_MAX);
|
||||
}
|
||||
ImGui::BeginChild("##log_pane", ImVec2(log_pane_width, bottom_panes_height),
|
||||
true);
|
||||
|
@ -245,7 +246,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
|
|||
ImVec2(kSplitterWidth, bottom_panes_height));
|
||||
if (ImGui::IsItemActive()) {
|
||||
breakpoints_pane_width -= io.MouseDelta.x;
|
||||
breakpoints_pane_width = xe::clamp(breakpoints_pane_width, 30.0f, FLT_MAX);
|
||||
breakpoints_pane_width =
|
||||
xe::clamp_float(breakpoints_pane_width, 30.0f, FLT_MAX);
|
||||
}
|
||||
ImGui::SameLine();
|
||||
ImGui::BeginChild("##breakpoints_pane", ImVec2(0, 0), true);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "xenia/base/byte_stream.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/base/ring_buffer.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
|
@ -334,7 +335,8 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
return;
|
||||
}
|
||||
|
||||
regs.values[index].u32 = value;
|
||||
// Volatile for the WAIT_REG_MEM loop.
|
||||
const_cast<volatile uint32_t&>(regs.values[index]) = value;
|
||||
if (!regs.GetRegisterInfo(index)) {
|
||||
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
|
||||
}
|
||||
|
@ -342,19 +344,20 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
// Scratch register writeback.
|
||||
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
||||
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
||||
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) {
|
||||
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) {
|
||||
// Enabled - write to address.
|
||||
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
||||
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR];
|
||||
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
|
||||
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
|
||||
}
|
||||
} else {
|
||||
switch (index) {
|
||||
// If this is a COHER register, set the dirty flag.
|
||||
// This will block the command processor the next time it WAIT_MEM_REGs
|
||||
// This will block the command processor the next time it WAIT_REG_MEMs
|
||||
// and allow us to synchronize the memory.
|
||||
case XE_GPU_REG_COHER_STATUS_HOST: {
|
||||
regs.values[index].u32 |= UINT32_C(0x80000000);
|
||||
const_cast<volatile uint32_t&>(regs.values[index]) |=
|
||||
UINT32_C(0x80000000);
|
||||
} break;
|
||||
|
||||
case XE_GPU_REG_DC_LUT_RW_INDEX: {
|
||||
|
@ -365,12 +368,12 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
|
||||
case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
|
||||
// Should be in the 256-entry table writing mode.
|
||||
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
|
||||
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
|
||||
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
|
||||
// DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
|
||||
// enable mask is blue, green, red.
|
||||
bool write_gamma_ramp_component =
|
||||
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
|
||||
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
|
||||
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
|
||||
if (write_gamma_ramp_component) {
|
||||
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
|
||||
|
@ -401,14 +404,14 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
|
||||
case XE_GPU_REG_DC_LUT_PWL_DATA: {
|
||||
// Should be in the PWL writing mode.
|
||||
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
|
||||
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
|
||||
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
|
||||
// Bit 7 of the index is ignored for PWL.
|
||||
uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
|
||||
// DC_LUT_PWL_DATA is likely in the red, green, blue order because
|
||||
// DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
|
||||
bool write_gamma_ramp_component =
|
||||
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
|
||||
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
|
||||
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
|
||||
if (write_gamma_ramp_component) {
|
||||
reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
|
||||
|
@ -436,10 +439,10 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
|
||||
case XE_GPU_REG_DC_LUT_30_COLOR: {
|
||||
// Should be in the 256-entry table writing mode.
|
||||
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
|
||||
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
|
||||
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
|
||||
uint32_t gamma_ramp_write_enable_mask =
|
||||
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111;
|
||||
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111;
|
||||
if (gamma_ramp_write_enable_mask) {
|
||||
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
|
||||
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
|
||||
|
@ -479,10 +482,12 @@ void CommandProcessor::MakeCoherent() {
|
|||
// https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
|
||||
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
|
||||
|
||||
RegisterFile* regs = register_file_;
|
||||
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
|
||||
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
|
||||
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
|
||||
// Volatile because this may be called from the WAIT_REG_MEM loop.
|
||||
volatile uint32_t* regs_volatile = register_file_->values;
|
||||
auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
|
||||
uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
|
||||
uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
|
||||
uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];
|
||||
|
||||
if (!status_host.status) {
|
||||
return;
|
||||
|
@ -502,7 +507,7 @@ void CommandProcessor::MakeCoherent() {
|
|||
base_host + size_host, size_host, action);
|
||||
|
||||
// Mark coherent.
|
||||
status_host.status = 0;
|
||||
regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0;
|
||||
}
|
||||
|
||||
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
|
||||
|
@ -940,28 +945,33 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader,
|
|||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// wait until a register or memory location is a specific value
|
||||
|
||||
uint32_t wait_info = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t poll_reg_addr = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t ref = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t mask = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t wait = reader->ReadAndSwap<uint32_t>();
|
||||
|
||||
bool is_memory = (wait_info & 0x10) != 0;
|
||||
|
||||
assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
const volatile uint32_t& value_ref =
|
||||
is_memory ? *reinterpret_cast<uint32_t*>(memory_->TranslatePhysical(
|
||||
poll_reg_addr & ~uint32_t(0x3)))
|
||||
: register_file_->values[poll_reg_addr];
|
||||
|
||||
bool matched = false;
|
||||
do {
|
||||
uint32_t value;
|
||||
if (wait_info & 0x10) {
|
||||
// Memory.
|
||||
auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
|
||||
uint32_t value = value_ref;
|
||||
if (is_memory) {
|
||||
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)),
|
||||
sizeof(uint32_t));
|
||||
value = xenos::GpuSwap(value,
|
||||
static_cast<xenos::Endian>(poll_reg_addr & 0x3));
|
||||
} else {
|
||||
// Register.
|
||||
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
value = register_file_->values[poll_reg_addr].u32;
|
||||
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
|
||||
MakeCoherent();
|
||||
value = register_file_->values[poll_reg_addr].u32;
|
||||
value = value_ref;
|
||||
}
|
||||
}
|
||||
switch (wait_info & 0x7) {
|
||||
|
@ -1024,17 +1034,17 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader,
|
|||
uint32_t rmw_info = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t and_mask = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t or_mask = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32;
|
||||
uint32_t value = register_file_->values[rmw_info & 0x1FFF];
|
||||
if ((rmw_info >> 31) & 0x1) {
|
||||
// & reg
|
||||
value &= register_file_->values[and_mask & 0x1FFF].u32;
|
||||
value &= register_file_->values[and_mask & 0x1FFF];
|
||||
} else {
|
||||
// & imm
|
||||
value &= and_mask;
|
||||
}
|
||||
if ((rmw_info >> 30) & 0x1) {
|
||||
// | reg
|
||||
value |= register_file_->values[or_mask & 0x1FFF].u32;
|
||||
value |= register_file_->values[or_mask & 0x1FFF];
|
||||
} else {
|
||||
// | imm
|
||||
value |= or_mask;
|
||||
|
@ -1055,7 +1065,7 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader,
|
|||
uint32_t reg_val;
|
||||
|
||||
assert_true(reg_addr < RegisterFile::kRegisterCount);
|
||||
reg_val = register_file_->values[reg_addr].u32;
|
||||
reg_val = register_file_->values[reg_addr];
|
||||
|
||||
auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3);
|
||||
mem_addr &= ~0x3;
|
||||
|
@ -1105,7 +1115,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader,
|
|||
} else {
|
||||
// Register.
|
||||
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
value = register_file_->values[poll_reg_addr].u32;
|
||||
value = register_file_->values[poll_reg_addr];
|
||||
}
|
||||
bool matched = false;
|
||||
switch (wait_info & 0x7) {
|
||||
|
@ -1240,7 +1250,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
|
|||
if (fake_sample_count >= 0) {
|
||||
auto* pSampleCounts =
|
||||
memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>(
|
||||
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32);
|
||||
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]);
|
||||
// 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
|
||||
// and used to detect a finished query.
|
||||
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
|
||||
|
@ -1599,10 +1609,10 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader,
|
|||
// The scan converter writes the internal result back to the register here.
|
||||
// We just fake it and say it was visible in case it is read back.
|
||||
if (id < 32) {
|
||||
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |=
|
||||
uint32_t(1) << id;
|
||||
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1)
|
||||
<< id;
|
||||
} else {
|
||||
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |=
|
||||
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |=
|
||||
uint32_t(1) << (id - 32);
|
||||
}
|
||||
}
|
||||
|
@ -1614,9 +1624,8 @@ void CommandProcessor::InitializeTrace() {
|
|||
// Write the initial register values, to be loaded directly into the
|
||||
// RegisterFile since all registers, including those that may have side
|
||||
// effects on setting, will be saved.
|
||||
trace_writer_.WriteRegisters(
|
||||
0, reinterpret_cast<const uint32_t*>(register_file_->values),
|
||||
RegisterFile::kRegisterCount, false);
|
||||
trace_writer_.WriteRegisters(0, register_file_->values,
|
||||
RegisterFile::kRegisterCount, false);
|
||||
|
||||
trace_writer_.WriteGammaRamp(gamma_ramp_256_entry_table(),
|
||||
gamma_ramp_pwl_rgb(), gamma_ramp_rw_component_);
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
||||
|
@ -2306,8 +2307,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
|
||||
vfetch_bits_remaining &= ~(uint32_t(1) << j);
|
||||
uint32_t vfetch_index = i * 32 + j;
|
||||
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
|
||||
xenos::xe_gpu_vertex_fetch_t vfetch_constant =
|
||||
regs.GetVertexFetch(vfetch_index);
|
||||
switch (vfetch_constant.type) {
|
||||
case xenos::FetchConstantType::kVertex:
|
||||
break;
|
||||
|
@ -3050,10 +3051,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
|
|||
|
||||
// Blend factor.
|
||||
float blend_factor[] = {
|
||||
regs[XE_GPU_REG_RB_BLEND_RED].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
|
||||
};
|
||||
// std::memcmp instead of != so in case of NaN, every draw won't be
|
||||
// invalidating it.
|
||||
|
@ -3100,7 +3101,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||
auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
|
||||
|
@ -3241,9 +3242,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// Tessellation factor range, plus 1.0 according to the images in
|
||||
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
|
||||
float tessellation_factor_min =
|
||||
regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f;
|
||||
regs.Get<float>(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f;
|
||||
float tessellation_factor_max =
|
||||
regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f;
|
||||
regs.Get<float>(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f;
|
||||
dirty |= system_constants_.tessellation_factor_range_min !=
|
||||
tessellation_factor_min;
|
||||
system_constants_.tessellation_factor_range_min = tessellation_factor_min;
|
||||
|
@ -3280,12 +3281,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
while (xe::bit_scan_forward(user_clip_planes_remaining,
|
||||
&user_clip_plane_index)) {
|
||||
user_clip_planes_remaining &= ~(UINT32_C(1) << user_clip_plane_index);
|
||||
const float* user_clip_plane =
|
||||
®s[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32;
|
||||
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane,
|
||||
const void* user_clip_plane_regs =
|
||||
®s[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4];
|
||||
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs,
|
||||
4 * sizeof(float))) {
|
||||
dirty = true;
|
||||
std::memcpy(user_clip_plane_write_ptr, user_clip_plane,
|
||||
std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs,
|
||||
4 * sizeof(float));
|
||||
}
|
||||
user_clip_plane_write_ptr += 4;
|
||||
|
@ -3423,9 +3424,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
color_exp_bias -= 5;
|
||||
}
|
||||
}
|
||||
float color_exp_bias_scale;
|
||||
*reinterpret_cast<int32_t*>(&color_exp_bias_scale) =
|
||||
0x3F800000 + (color_exp_bias << 23);
|
||||
auto color_exp_bias_scale = xe::memory::Reinterpret<float>(
|
||||
int32_t(0x3F800000 + (color_exp_bias << 23)));
|
||||
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
|
||||
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
|
||||
if (edram_rov_used) {
|
||||
|
@ -3454,7 +3454,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
||||
4 * sizeof(float));
|
||||
uint32_t blend_factors_ops =
|
||||
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
|
||||
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
|
||||
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
|
||||
blend_factors_ops;
|
||||
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
|
||||
|
@ -3477,22 +3477,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
if (primitive_polygonal) {
|
||||
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
|
||||
poly_offset_front_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
|
||||
poly_offset_front_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
|
||||
}
|
||||
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
|
||||
poly_offset_back_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
|
||||
poly_offset_back_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
|
||||
}
|
||||
} else {
|
||||
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
||||
poly_offset_front_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
|
||||
poly_offset_front_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
|
||||
poly_offset_back_scale = poly_offset_front_scale;
|
||||
poly_offset_back_offset = poly_offset_front_offset;
|
||||
}
|
||||
|
@ -3567,21 +3567,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
}
|
||||
|
||||
dirty |= system_constants_.edram_blend_constant[0] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
|
||||
system_constants_.edram_blend_constant[0] =
|
||||
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
|
||||
dirty |= system_constants_.edram_blend_constant[1] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
|
||||
system_constants_.edram_blend_constant[1] =
|
||||
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
|
||||
dirty |= system_constants_.edram_blend_constant[2] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
|
||||
system_constants_.edram_blend_constant[2] =
|
||||
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
|
||||
dirty |= system_constants_.edram_blend_constant[3] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
|
||||
system_constants_.edram_blend_constant[3] =
|
||||
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
|
||||
}
|
||||
|
||||
cbuffer_binding_system_.up_to_date &= !dirty;
|
||||
|
@ -3638,10 +3638,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
|
|||
// These are the constant base addresses/ranges for shaders.
|
||||
// We have these hardcoded right now cause nothing seems to differ on the Xbox
|
||||
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
|
||||
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
||||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
|
||||
regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
|
||||
// Check if the float constant layout is still the same and get the counts.
|
||||
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
||||
vertex_shader->constant_register_map();
|
||||
|
@ -3715,8 +3715,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
|
|||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(float_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
(float_constant_index << 2)],
|
||||
4 * sizeof(float));
|
||||
float_constants += 4 * sizeof(float);
|
||||
}
|
||||
|
@ -3746,8 +3745,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
|
|||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(float_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
(float_constant_index << 2)],
|
||||
4 * sizeof(float));
|
||||
float_constants += 4 * sizeof(float);
|
||||
}
|
||||
|
@ -3767,7 +3765,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
|
|||
return false;
|
||||
}
|
||||
std::memcpy(bool_loop_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
|
||||
kBoolLoopConstantsSize);
|
||||
cbuffer_binding_bool_loop_.up_to_date = true;
|
||||
current_graphics_root_up_to_date_ &=
|
||||
|
@ -3782,8 +3780,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
|
|||
if (fetch_constants == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::memcpy(fetch_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
||||
std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
|
||||
kFetchConstantsSize);
|
||||
cbuffer_binding_fetch_.up_to_date = true;
|
||||
current_graphics_root_up_to_date_ &=
|
||||
|
|
|
@ -960,8 +960,8 @@ uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex(
|
|||
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
|
||||
const D3D12Shader::SamplerBinding& binding) const {
|
||||
const auto& regs = register_file();
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
|
||||
xenos::xe_gpu_texture_fetch_t fetch =
|
||||
regs.GetTextureFetch(binding.fetch_constant);
|
||||
|
||||
SamplerParameters parameters;
|
||||
|
||||
|
@ -1441,8 +1441,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture(
|
|||
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
|
||||
xenos::TextureFormat& format_out) {
|
||||
const auto& regs = register_file();
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
|
||||
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
|
||||
TextureKey key;
|
||||
BindingInfoFromFetchConstant(fetch, key, nullptr);
|
||||
if (!key.is_valid || key.base_page == 0 ||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/ucode.h"
|
||||
|
@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export(
|
|||
point_size_ = value[0];
|
||||
}
|
||||
if (value_mask & 0b0100) {
|
||||
vertex_kill_ = *reinterpret_cast<const uint32_t*>(&value[2]);
|
||||
vertex_kill_ = xe::memory::Reinterpret<uint32_t>(value[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
|
|||
xenos::Endian index_endian = vgt_dma_size.swap_mode;
|
||||
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) {
|
||||
xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
|
||||
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32;
|
||||
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE];
|
||||
uint32_t index_buffer_read_count =
|
||||
std::min(uint32_t(vgt_draw_initiator.num_indices),
|
||||
uint32_t(vgt_dma_size.num_words));
|
||||
|
@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
|
|||
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
|
||||
: 1.0f;
|
||||
float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: 0.0f;
|
||||
float viewport_y_offset =
|
||||
pa_cl_vte_cntl.vport_y_offset_ena
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
|
||||
: 0.0f;
|
||||
|
||||
int32_t point_vertex_min_diameter_float = 0;
|
||||
int32_t point_vertex_max_diameter_float = 0;
|
||||
float point_constant_radius_y = 0.0f;
|
||||
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
*reinterpret_cast<float*>(&point_vertex_min_diameter_float) =
|
||||
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
|
||||
*reinterpret_cast<float*>(&point_vertex_max_diameter_float) =
|
||||
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
|
||||
point_vertex_min_diameter_float = xe::memory::Reinterpret<int32_t>(
|
||||
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f));
|
||||
point_vertex_max_diameter_float = xe::memory::Reinterpret<int32_t>(
|
||||
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f));
|
||||
point_constant_radius_y =
|
||||
float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f);
|
||||
}
|
||||
|
@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
|
|||
// Vertex-specified diameter. Clamped effectively as a signed integer in
|
||||
// the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN
|
||||
// to the maximum.
|
||||
point_radius_y = position_y_export_sink.point_size().value();
|
||||
*reinterpret_cast<int32_t*>(&point_radius_y) = std::min(
|
||||
point_vertex_max_diameter_float,
|
||||
std::max(point_vertex_min_diameter_float,
|
||||
*reinterpret_cast<const int32_t*>(&point_radius_y)));
|
||||
point_radius_y *= 0.5f;
|
||||
point_radius_y =
|
||||
0.5f *
|
||||
xe::memory::Reinterpret<float>(std::min(
|
||||
point_vertex_max_diameter_float,
|
||||
std::max(point_vertex_min_diameter_float,
|
||||
xe::memory::Reinterpret<int32_t>(
|
||||
position_y_export_sink.point_size().value()))));
|
||||
} else {
|
||||
// Constant radius.
|
||||
point_radius_y = point_constant_radius_y;
|
||||
|
@ -331,11 +334,12 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
|
|||
}
|
||||
// Then apply the floating-point viewport offset.
|
||||
if (pa_cl_vte_cntl.vport_y_offset_ena) {
|
||||
viewport_bottom += regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32;
|
||||
viewport_bottom += regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
|
||||
}
|
||||
viewport_bottom += pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
|
||||
: 1.0f;
|
||||
viewport_bottom +=
|
||||
pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? std::abs(regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE))
|
||||
: 1.0f;
|
||||
// Using floor, or, rather, truncation (because maxing with zero anyway)
|
||||
// similar to how viewport scissoring behaves on real AMD, Intel and Nvidia
|
||||
// GPUs on Direct3D 12 (but not WARP), also like in
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
|
@ -100,20 +99,20 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs,
|
|||
// ones that are rendered (except for shadow volumes).
|
||||
if (pa_su_sc_mode_cntl.poly_offset_front_enable &&
|
||||
!pa_su_sc_mode_cntl.cull_front) {
|
||||
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
|
||||
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
|
||||
}
|
||||
if (pa_su_sc_mode_cntl.poly_offset_back_enable &&
|
||||
!pa_su_sc_mode_cntl.cull_back && !scale && !offset) {
|
||||
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
||||
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
||||
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
|
||||
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
|
||||
}
|
||||
} else {
|
||||
// Non-triangle primitives use the front offset, but it's toggled via
|
||||
// poly_offset_para_enable.
|
||||
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
||||
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
|
||||
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
|
||||
}
|
||||
}
|
||||
scale_out = scale;
|
||||
|
@ -148,7 +147,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
|
|||
}
|
||||
|
||||
// Check if a color target is actually written.
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
|
||||
uint32_t rts_remaining = shader.writes_color_targets();
|
||||
uint32_t rt_index;
|
||||
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
|
||||
|
@ -311,24 +310,26 @@ void GetHostViewportInfo(const RegisterFile& regs,
|
|||
|
||||
// Obtain the original viewport values in a normalized way.
|
||||
float scale_xy[] = {
|
||||
pa_cl_vte_cntl.vport_x_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32
|
||||
: 1.0f,
|
||||
pa_cl_vte_cntl.vport_y_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||
: 1.0f,
|
||||
pa_cl_vte_cntl.vport_x_scale_ena
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
|
||||
: 1.0f,
|
||||
pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
|
||||
: 1.0f,
|
||||
};
|
||||
float scale_z = pa_cl_vte_cntl.vport_z_scale_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
|
||||
: 1.0f;
|
||||
float offset_base_xy[] = {
|
||||
pa_cl_vte_cntl.vport_x_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
|
||||
: 0.0f,
|
||||
pa_cl_vte_cntl.vport_y_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
|
||||
: 0.0f,
|
||||
};
|
||||
float offset_z = pa_cl_vte_cntl.vport_z_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
||||
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
|
||||
: 0.0f;
|
||||
// Calculate all the integer.0 or integer.5 offsetting exactly at full
|
||||
// precision, separately so it can be used in other integer calculations
|
||||
|
@ -398,16 +399,11 @@ void GetHostViewportInfo(const RegisterFile& regs,
|
|||
float offset_axis = offset_base_xy[i] + offset_add_xy[i];
|
||||
float scale_axis = scale_xy[i];
|
||||
float scale_axis_abs = std::abs(scale_xy[i]);
|
||||
float axis_0 = offset_axis - scale_axis_abs;
|
||||
float axis_1 = offset_axis + scale_axis_abs;
|
||||
float axis_max_unscaled_float = float(xy_max_unscaled[i]);
|
||||
// max(0.0f, xy) drops NaN and < 0 - max picks the first argument in the
|
||||
// !(a < b) case (always for NaN), min as float (axis_max_unscaled_float
|
||||
// is well below 2^24) to safely drop very large values.
|
||||
uint32_t axis_0_int =
|
||||
uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_0)));
|
||||
uint32_t axis_1_int =
|
||||
uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_1)));
|
||||
uint32_t axis_0_int = uint32_t(xe::clamp_float(
|
||||
offset_axis - scale_axis_abs, 0.0f, axis_max_unscaled_float));
|
||||
uint32_t axis_1_int = uint32_t(xe::clamp_float(
|
||||
offset_axis + scale_axis_abs, 0.0f, axis_max_unscaled_float));
|
||||
uint32_t axis_extent_int = axis_1_int - axis_0_int;
|
||||
viewport_info_out.xy_offset[i] = axis_0_int * axis_resolution_scale;
|
||||
viewport_info_out.xy_extent[i] = axis_extent_int * axis_resolution_scale;
|
||||
|
@ -510,8 +506,8 @@ void GetHostViewportInfo(const RegisterFile& regs,
|
|||
// extension. But cases when this really matters are yet to be found -
|
||||
// trying to fix this will result in more correct depth values, but
|
||||
// incorrect clipping.
|
||||
z_min = xe::saturate_unsigned(host_clip_offset_z);
|
||||
z_max = xe::saturate_unsigned(host_clip_offset_z + host_clip_scale_z);
|
||||
z_min = xe::saturate(host_clip_offset_z);
|
||||
z_max = xe::saturate(host_clip_offset_z + host_clip_scale_z);
|
||||
// Direct3D 12 doesn't allow reverse depth range - on some drivers it
|
||||
// works, on some drivers it doesn't, actually, but it was never
|
||||
// explicitly allowed by the specification.
|
||||
|
@ -615,7 +611,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
|
|||
return 0;
|
||||
}
|
||||
uint32_t normalized_color_mask = 0;
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
|
||||
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
||||
// Exclude the render targets not statically written to by the pixel shader.
|
||||
// If the shader doesn't write to a render target, it shouldn't be written
|
||||
|
@ -661,10 +657,16 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
|
|||
? regs.Get<reg::SQ_VS_CONST>().base
|
||||
: regs.Get<reg::SQ_PS_CONST>().base;
|
||||
for (uint32_t constant_index : shader.memexport_stream_constants()) {
|
||||
const auto& stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_000_X +
|
||||
(float_constants_base + constant_index) * 4);
|
||||
if (!stream.index_count) {
|
||||
xenos::xe_gpu_memexport_stream_t stream =
|
||||
regs.GetMemExportStream(float_constants_base + constant_index);
|
||||
// Safety checks for stream constants potentially not set up if the export
|
||||
// isn't done on the control flow path taken by the shader (not checking the
|
||||
// Y component because the index is more likely to be constructed
|
||||
// arbitrarily).
|
||||
// The hardware validates the upper bits of eA according to the
|
||||
// IPR2015-00325 sequencer specification.
|
||||
if (stream.const_0x1 != 0x1 || stream.const_0x4b0 != 0x4B0 ||
|
||||
stream.const_0x96 != 0x96 || !stream.index_count) {
|
||||
continue;
|
||||
}
|
||||
const FormatInfo& format_info =
|
||||
|
@ -705,7 +707,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
|
|||
}
|
||||
// Add a new range if haven't expanded an existing one.
|
||||
if (!range_reused) {
|
||||
ranges_out.emplace_back(stream.base_address, stream_size_bytes);
|
||||
ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -824,8 +826,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
|||
// Get the extent of pixels covered by the resolve rectangle, according to the
|
||||
// top-left rasterization rule.
|
||||
// D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU.
|
||||
auto fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
|
||||
xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0);
|
||||
if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) {
|
||||
XELOGE("Unsupported resolve vertex buffer format");
|
||||
assert_always();
|
||||
|
@ -878,10 +879,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
|||
GetScissor(regs, scissor, false);
|
||||
int32_t scissor_right = int32_t(scissor.offset[0] + scissor.extent[0]);
|
||||
int32_t scissor_bottom = int32_t(scissor.offset[1] + scissor.extent[1]);
|
||||
x0 = xe::clamp(x0, int32_t(scissor.offset[0]), scissor_right);
|
||||
y0 = xe::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom);
|
||||
x1 = xe::clamp(x1, int32_t(scissor.offset[0]), scissor_right);
|
||||
y1 = xe::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom);
|
||||
x0 = std::clamp(x0, int32_t(scissor.offset[0]), scissor_right);
|
||||
y0 = std::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom);
|
||||
x1 = std::clamp(x1, int32_t(scissor.offset[0]), scissor_right);
|
||||
y1 = std::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom);
|
||||
|
||||
assert_true(x0 <= x1 && y0 <= y1);
|
||||
|
||||
|
@ -994,7 +995,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
|||
}
|
||||
|
||||
// Calculate the destination memory extent.
|
||||
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
|
||||
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
|
||||
uint32_t copy_dest_base_adjusted = rb_copy_dest_base;
|
||||
uint32_t copy_dest_extent_start, copy_dest_extent_end;
|
||||
auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
|
||||
|
@ -1164,9 +1165,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
|||
info_out.copy_dest_info.copy_dest_swap = false;
|
||||
}
|
||||
|
||||
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
|
||||
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
|
||||
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
|
||||
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR];
|
||||
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR];
|
||||
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO];
|
||||
|
||||
XELOGD(
|
||||
"Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially "
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -1102,10 +1103,10 @@ struct Src : OperandAddress {
|
|||
}
|
||||
static Src LI(int32_t x) { return LI(x, x, x, x); }
|
||||
static Src LF(float x, float y, float z, float w) {
|
||||
return LU(*reinterpret_cast<const uint32_t*>(&x),
|
||||
*reinterpret_cast<const uint32_t*>(&y),
|
||||
*reinterpret_cast<const uint32_t*>(&z),
|
||||
*reinterpret_cast<const uint32_t*>(&w));
|
||||
return LU(xe::memory::Reinterpret<uint32_t>(x),
|
||||
xe::memory::Reinterpret<uint32_t>(y),
|
||||
xe::memory::Reinterpret<uint32_t>(z),
|
||||
xe::memory::Reinterpret<uint32_t>(w));
|
||||
}
|
||||
static Src LF(float x) { return LF(x, x, x, x); }
|
||||
static Src LP(const uint32_t* xyzw) {
|
||||
|
@ -1222,12 +1223,10 @@ struct Src : OperandAddress {
|
|||
bool negate) {
|
||||
if (is_integer) {
|
||||
if (absolute) {
|
||||
*reinterpret_cast<int32_t*>(&value) =
|
||||
std::abs(*reinterpret_cast<const int32_t*>(&value));
|
||||
value = uint32_t(std::abs(int32_t(value)));
|
||||
}
|
||||
if (negate) {
|
||||
*reinterpret_cast<int32_t*>(&value) =
|
||||
-*reinterpret_cast<const int32_t*>(&value);
|
||||
value = uint32_t(-int32_t(value));
|
||||
}
|
||||
} else {
|
||||
if (absolute) {
|
||||
|
|
|
@ -201,7 +201,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
|
|||
}
|
||||
|
||||
assert_true(r < RegisterFile::kRegisterCount);
|
||||
return register_file_.values[r].u32;
|
||||
return register_file_.values[r];
|
||||
}
|
||||
|
||||
void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
|
||||
|
@ -219,7 +219,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
|
|||
}
|
||||
|
||||
assert_true(r < RegisterFile::kRegisterCount);
|
||||
register_file_.values[r].u32 = value;
|
||||
register_file_.values[r] = value;
|
||||
}
|
||||
|
||||
void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) {
|
||||
|
|
|
@ -42,7 +42,7 @@ struct PacketAction {
|
|||
union {
|
||||
struct {
|
||||
uint32_t index;
|
||||
RegisterFile::RegisterValue value;
|
||||
uint32_t value;
|
||||
} register_write;
|
||||
struct {
|
||||
uint64_t value;
|
||||
|
@ -56,7 +56,7 @@ struct PacketAction {
|
|||
PacketAction action;
|
||||
action.type = Type::kRegisterWrite;
|
||||
action.register_write.index = index;
|
||||
action.register_write.value.u32 = value;
|
||||
action.register_write.value = value;
|
||||
return action;
|
||||
}
|
||||
|
||||
|
|
|
@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
uint32_t index_size_log2 =
|
||||
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
|
||||
// The base should already be aligned, but aligning here too for safety.
|
||||
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
|
||||
~uint32_t((1 << index_size_log2) - 1);
|
||||
guest_index_base =
|
||||
regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
|
||||
guest_index_buffer_needed_bytes = guest_draw_vertex_count
|
||||
<< index_size_log2;
|
||||
if (guest_index_base > SharedMemory::kBufferSize ||
|
||||
|
@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
|||
uint32_t index_size_log2 =
|
||||
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
|
||||
// The base should already be aligned, but aligning here too for safety.
|
||||
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
|
||||
~uint32_t((1 << index_size_log2) - 1);
|
||||
guest_index_base =
|
||||
regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
|
||||
guest_index_buffer_needed_bytes = guest_draw_vertex_count
|
||||
<< index_size_log2;
|
||||
if (guest_index_base > SharedMemory::kBufferSize ||
|
||||
|
|
|
@ -12,8 +12,12 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -34,39 +38,53 @@ class RegisterFile {
|
|||
static const RegisterInfo* GetRegisterInfo(uint32_t index);
|
||||
|
||||
static constexpr size_t kRegisterCount = 0x5003;
|
||||
union RegisterValue {
|
||||
uint32_t u32;
|
||||
float f32;
|
||||
};
|
||||
RegisterValue values[kRegisterCount];
|
||||
uint32_t values[kRegisterCount];
|
||||
|
||||
const uint32_t& operator[](uint32_t reg) const { return values[reg]; }
|
||||
uint32_t& operator[](uint32_t reg) { return values[reg]; }
|
||||
|
||||
const RegisterValue& operator[](uint32_t reg) const { return values[reg]; }
|
||||
RegisterValue& operator[](uint32_t reg) { return values[reg]; }
|
||||
const RegisterValue& operator[](Register reg) const { return values[reg]; }
|
||||
RegisterValue& operator[](Register reg) { return values[reg]; }
|
||||
template <typename T>
|
||||
const T& Get(uint32_t reg) const {
|
||||
return *reinterpret_cast<const T*>(&values[reg]);
|
||||
T Get(uint32_t reg) const {
|
||||
return xe::memory::Reinterpret<T>(values[reg]);
|
||||
}
|
||||
template <typename T>
|
||||
T& Get(uint32_t reg) {
|
||||
return *reinterpret_cast<T*>(&values[reg]);
|
||||
T Get(Register reg) const {
|
||||
return Get<T>(static_cast<uint32_t>(reg));
|
||||
}
|
||||
template <typename T>
|
||||
const T& Get(Register reg) const {
|
||||
return *reinterpret_cast<const T*>(&values[reg]);
|
||||
T Get() const {
|
||||
return Get<T>(T::register_index);
|
||||
}
|
||||
template <typename T>
|
||||
T& Get(Register reg) {
|
||||
return *reinterpret_cast<T*>(&values[reg]);
|
||||
|
||||
xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const {
|
||||
assert_true(index < 96);
|
||||
xenos::xe_gpu_vertex_fetch_t fetch;
|
||||
std::memcpy(&fetch,
|
||||
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
(sizeof(fetch) / sizeof(uint32_t)) * index],
|
||||
sizeof(fetch));
|
||||
return fetch;
|
||||
}
|
||||
template <typename T>
|
||||
const T& Get() const {
|
||||
return *reinterpret_cast<const T*>(&values[T::register_index]);
|
||||
|
||||
xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const {
|
||||
assert_true(index < 32);
|
||||
xenos::xe_gpu_texture_fetch_t fetch;
|
||||
std::memcpy(&fetch,
|
||||
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
(sizeof(fetch) / sizeof(uint32_t)) * index],
|
||||
sizeof(fetch));
|
||||
return fetch;
|
||||
}
|
||||
template <typename T>
|
||||
T& Get() {
|
||||
return *reinterpret_cast<T*>(&values[T::register_index]);
|
||||
|
||||
xenos::xe_gpu_memexport_stream_t GetMemExportStream(
|
||||
uint32_t float_constant_index) const {
|
||||
assert_true(float_constant_index < 512);
|
||||
xenos::xe_gpu_memexport_stream_t stream;
|
||||
std::memcpy(
|
||||
&stream,
|
||||
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index],
|
||||
sizeof(stream));
|
||||
return stream;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -28,10 +28,7 @@ void ShaderInterpreter::Execute() {
|
|||
state_.Reset();
|
||||
|
||||
const uint32_t* bool_constants =
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32;
|
||||
const xenos::LoopConstant* loop_constants =
|
||||
reinterpret_cast<const xenos::LoopConstant*>(
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32);
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031];
|
||||
|
||||
bool exec_ended = false;
|
||||
uint32_t cf_index_next = 1;
|
||||
|
@ -140,8 +137,8 @@ void ShaderInterpreter::Execute() {
|
|||
cf_index_next = cf_loop_start.address();
|
||||
continue;
|
||||
}
|
||||
xenos::LoopConstant loop_constant =
|
||||
loop_constants[cf_loop_start.loop_id()];
|
||||
auto loop_constant = register_file_.Get<xenos::LoopConstant>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id());
|
||||
state_.loop_constants[state_.loop_stack_depth] = loop_constant;
|
||||
uint32_t& loop_iterator_ref =
|
||||
state_.loop_iterators[state_.loop_stack_depth];
|
||||
|
@ -170,8 +167,11 @@ void ShaderInterpreter::Execute() {
|
|||
&cf_instr);
|
||||
xenos::LoopConstant loop_constant =
|
||||
state_.loop_constants[state_.loop_stack_depth - 1];
|
||||
assert_true(loop_constant.value ==
|
||||
loop_constants[cf_loop_end.loop_id()].value);
|
||||
assert_zero(
|
||||
std::memcmp(&loop_constant,
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 +
|
||||
cf_loop_end.loop_id()],
|
||||
sizeof(loop_constant)));
|
||||
uint32_t loop_iterator =
|
||||
++state_.loop_iterators[state_.loop_stack_depth - 1];
|
||||
if (loop_iterator < loop_constant.count &&
|
||||
|
@ -257,28 +257,31 @@ void ShaderInterpreter::Execute() {
|
|||
}
|
||||
}
|
||||
|
||||
const float* ShaderInterpreter::GetFloatConstant(
|
||||
const std::array<float, 4> ShaderInterpreter::GetFloatConstant(
|
||||
uint32_t address, bool is_relative, bool relative_address_is_a0) const {
|
||||
static const float zero[4] = {};
|
||||
int32_t index = int32_t(address);
|
||||
if (is_relative) {
|
||||
index += relative_address_is_a0 ? state_.address_register
|
||||
: state_.GetLoopAddress();
|
||||
}
|
||||
if (index < 0) {
|
||||
return zero;
|
||||
return std::array<float, 4>();
|
||||
}
|
||||
auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>(
|
||||
shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST
|
||||
: XE_GPU_REG_SQ_PS_CONST);
|
||||
if (uint32_t(index) > base_and_size_minus_1.size) {
|
||||
return zero;
|
||||
return std::array<float, 4>();
|
||||
}
|
||||
index += base_and_size_minus_1.base;
|
||||
if (index >= 512) {
|
||||
return zero;
|
||||
return std::array<float, 4>();
|
||||
}
|
||||
return ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32;
|
||||
std::array<float, 4> value;
|
||||
std::memcpy(value.data(),
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index],
|
||||
sizeof(float) * 4);
|
||||
return value;
|
||||
}
|
||||
|
||||
void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
||||
|
@ -297,6 +300,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
const float* vector_src_ptr;
|
||||
uint32_t vector_src_register = instr.src_reg(1 + i);
|
||||
bool vector_src_absolute = false;
|
||||
std::array<float, 4> vector_src_float_constant;
|
||||
if (instr.src_is_temp(1 + i)) {
|
||||
vector_src_ptr = GetTempRegister(
|
||||
ucode::AluInstruction::src_temp_reg(vector_src_register),
|
||||
|
@ -304,9 +308,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
|
||||
vector_src_register);
|
||||
} else {
|
||||
vector_src_ptr = GetFloatConstant(
|
||||
vector_src_float_constant = GetFloatConstant(
|
||||
vector_src_register, instr.src_const_is_addressed(1 + i),
|
||||
instr.is_const_address_register_relative());
|
||||
vector_src_ptr = vector_src_float_constant.data();
|
||||
}
|
||||
uint32_t vector_src_absolute_mask =
|
||||
~(uint32_t(vector_src_absolute) << 31);
|
||||
|
@ -341,16 +346,18 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
} break;
|
||||
case ucode::AluVectorOpcode::kMax: {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] = vector_operands[0][i] >= vector_operands[1][i]
|
||||
? vector_operands[0][i]
|
||||
: vector_operands[1][i];
|
||||
vector_result[i] =
|
||||
std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
|
||||
? vector_operands[0][i]
|
||||
: vector_operands[1][i];
|
||||
}
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kMin: {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] = vector_operands[0][i] < vector_operands[1][i]
|
||||
? vector_operands[0][i]
|
||||
: vector_operands[1][i];
|
||||
vector_result[i] =
|
||||
std::isless(vector_operands[0][i], vector_operands[1][i])
|
||||
? vector_operands[0][i]
|
||||
: vector_operands[1][i];
|
||||
}
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kSeq: {
|
||||
|
@ -361,14 +368,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
} break;
|
||||
case ucode::AluVectorOpcode::kSgt: {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] =
|
||||
float(vector_operands[0][i] > vector_operands[1][i]);
|
||||
vector_result[i] = float(
|
||||
std::isgreater(vector_operands[0][i], vector_operands[1][i]));
|
||||
}
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kSge: {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] =
|
||||
float(vector_operands[0][i] >= vector_operands[1][i]);
|
||||
vector_result[i] = float(std::isgreaterequal(vector_operands[0][i],
|
||||
vector_operands[1][i]));
|
||||
}
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kSne: {
|
||||
|
@ -414,14 +421,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
} break;
|
||||
case ucode::AluVectorOpcode::kCndGe: {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] = vector_operands[0][i] >= 0.0f
|
||||
vector_result[i] = std::isgreaterequal(vector_operands[0][i], 0.0f)
|
||||
? vector_operands[1][i]
|
||||
: vector_operands[2][i];
|
||||
}
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kCndGt: {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] = vector_operands[0][i] > 0.0f
|
||||
vector_result[i] = std::isgreater(vector_operands[0][i], 0.0f)
|
||||
? vector_operands[1][i]
|
||||
: vector_operands[2][i];
|
||||
}
|
||||
|
@ -473,32 +480,38 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
float x_abs = std::abs(x), y_abs = std::abs(y), z_abs = std::abs(z);
|
||||
// Result is T coordinate, S coordinate, 2 * major axis, face ID.
|
||||
if (z_abs >= x_abs && z_abs >= y_abs) {
|
||||
bool z_negative = std::isless(z, 0.0f);
|
||||
vector_result[0] = -y;
|
||||
vector_result[1] = z < 0.0f ? -x : x;
|
||||
vector_result[1] = z_negative ? -x : x;
|
||||
vector_result[2] = z;
|
||||
vector_result[3] = z < 0.0f ? 5.0f : 4.0f;
|
||||
vector_result[3] = z_negative ? 5.0f : 4.0f;
|
||||
} else if (y_abs >= x_abs) {
|
||||
vector_result[0] = y < 0.0f ? -z : z;
|
||||
bool y_negative = std::isless(y, 0.0f);
|
||||
vector_result[0] = y_negative ? -z : z;
|
||||
vector_result[1] = x;
|
||||
vector_result[2] = y;
|
||||
vector_result[3] = y < 0.0f ? 3.0f : 2.0f;
|
||||
vector_result[3] = y_negative ? 3.0f : 2.0f;
|
||||
} else {
|
||||
bool x_negative = std::isless(x, 0.0f);
|
||||
vector_result[0] = -y;
|
||||
vector_result[1] = x < 0.0f ? z : -z;
|
||||
vector_result[1] = x_negative ? z : -z;
|
||||
vector_result[2] = x;
|
||||
vector_result[3] = x < 0.0f ? 1.0f : 0.0f;
|
||||
vector_result[3] = x_negative ? 1.0f : 0.0f;
|
||||
}
|
||||
vector_result[2] *= 2.0f;
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kMax4: {
|
||||
if (vector_operands[0][0] >= vector_operands[0][1] &&
|
||||
vector_operands[0][0] >= vector_operands[0][2] &&
|
||||
vector_operands[0][0] >= vector_operands[0][3]) {
|
||||
if (std::isgreaterequal(vector_operands[0][0], vector_operands[0][1]) &&
|
||||
std::isgreaterequal(vector_operands[0][0], vector_operands[0][2]) &&
|
||||
std::isgreaterequal(vector_operands[0][0], vector_operands[0][3])) {
|
||||
vector_result[0] = vector_operands[0][0];
|
||||
} else if (vector_operands[0][1] >= vector_operands[0][2] &&
|
||||
vector_operands[0][1] >= vector_operands[0][3]) {
|
||||
} else if (std::isgreaterequal(vector_operands[0][1],
|
||||
vector_operands[0][2]) &&
|
||||
std::isgreaterequal(vector_operands[0][1],
|
||||
vector_operands[0][3])) {
|
||||
vector_result[0] = vector_operands[0][1];
|
||||
} else if (vector_operands[0][2] >= vector_operands[0][3]) {
|
||||
} else if (std::isgreaterequal(vector_operands[0][2],
|
||||
vector_operands[0][3])) {
|
||||
vector_result[0] = vector_operands[0][2];
|
||||
} else {
|
||||
vector_result[0] = vector_operands[0][3];
|
||||
|
@ -524,21 +537,21 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
replicate_vector_result_x = true;
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kSetpGtPush: {
|
||||
state_.predicate =
|
||||
vector_operands[0][3] == 0.0f && vector_operands[1][3] > 0.0f;
|
||||
vector_result[0] =
|
||||
(vector_operands[0][0] == 0.0f && vector_operands[1][0] > 0.0f)
|
||||
? 0.0f
|
||||
: vector_operands[0][0] + 1.0f;
|
||||
state_.predicate = vector_operands[0][3] == 0.0f &&
|
||||
std::isgreater(vector_operands[1][3], 0.0f);
|
||||
vector_result[0] = (vector_operands[0][0] == 0.0f &&
|
||||
std::isgreater(vector_operands[1][0], 0.0f))
|
||||
? 0.0f
|
||||
: vector_operands[0][0] + 1.0f;
|
||||
replicate_vector_result_x = true;
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kSetpGePush: {
|
||||
state_.predicate =
|
||||
vector_operands[0][3] == 0.0f && vector_operands[1][3] >= 0.0f;
|
||||
vector_result[0] =
|
||||
(vector_operands[0][0] == 0.0f && vector_operands[1][0] >= 0.0f)
|
||||
? 0.0f
|
||||
: vector_operands[0][0] + 1.0f;
|
||||
state_.predicate = vector_operands[0][3] == 0.0f &&
|
||||
std::isgreaterequal(vector_operands[1][3], 0.0f);
|
||||
vector_result[0] = (vector_operands[0][0] == 0.0f &&
|
||||
std::isgreaterequal(vector_operands[1][0], 0.0f))
|
||||
? 0.0f
|
||||
: vector_operands[0][0] + 1.0f;
|
||||
replicate_vector_result_x = true;
|
||||
} break;
|
||||
// Not implementing pixel kill currently, the interpreter is currently
|
||||
|
@ -552,19 +565,19 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
replicate_vector_result_x = true;
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kKillGt: {
|
||||
vector_result[0] =
|
||||
float(vector_operands[0][0] > vector_operands[1][0] ||
|
||||
vector_operands[0][1] > vector_operands[1][1] ||
|
||||
vector_operands[0][2] > vector_operands[1][2] ||
|
||||
vector_operands[0][3] > vector_operands[1][3]);
|
||||
vector_result[0] = float(
|
||||
std::isgreater(vector_operands[0][0], vector_operands[1][0]) ||
|
||||
std::isgreater(vector_operands[0][1], vector_operands[1][1]) ||
|
||||
std::isgreater(vector_operands[0][2], vector_operands[1][2]) ||
|
||||
std::isgreater(vector_operands[0][3], vector_operands[1][3]));
|
||||
replicate_vector_result_x = true;
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kKillGe: {
|
||||
vector_result[0] =
|
||||
float(vector_operands[0][0] >= vector_operands[1][0] ||
|
||||
vector_operands[0][1] >= vector_operands[1][1] ||
|
||||
vector_operands[0][2] >= vector_operands[1][2] ||
|
||||
vector_operands[0][3] >= vector_operands[1][3]);
|
||||
vector_result[0] = float(
|
||||
std::isgreaterequal(vector_operands[0][0], vector_operands[1][0]) ||
|
||||
std::isgreaterequal(vector_operands[0][1], vector_operands[1][1]) ||
|
||||
std::isgreaterequal(vector_operands[0][2], vector_operands[1][2]) ||
|
||||
std::isgreaterequal(vector_operands[0][3], vector_operands[1][3]));
|
||||
replicate_vector_result_x = true;
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kKillNe: {
|
||||
|
@ -585,14 +598,13 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
vector_result[3] = vector_operands[1][3];
|
||||
} break;
|
||||
case ucode::AluVectorOpcode::kMaxA: {
|
||||
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
|
||||
// (-256.0f) is always the result.
|
||||
state_.address_register = int32_t(std::floor(
|
||||
std::min(255.0f, std::max(-256.0f, vector_operands[0][3])) + 0.5f));
|
||||
xe::clamp_float(vector_operands[0][3], -256.0f, 255.0f) + 0.5f));
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] = vector_operands[0][i] >= vector_operands[1][i]
|
||||
? vector_operands[0][i]
|
||||
: vector_operands[1][i];
|
||||
vector_result[i] =
|
||||
std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
|
||||
? vector_operands[0][i]
|
||||
: vector_operands[1][i];
|
||||
}
|
||||
} break;
|
||||
default: {
|
||||
|
@ -618,6 +630,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
// r#/c#.w or r#/c#.wx.
|
||||
const float* scalar_src_ptr;
|
||||
uint32_t scalar_src_register = instr.src_reg(3);
|
||||
std::array<float, 4> scalar_src_float_constant;
|
||||
if (instr.src_is_temp(3)) {
|
||||
scalar_src_ptr = GetTempRegister(
|
||||
ucode::AluInstruction::src_temp_reg(scalar_src_register),
|
||||
|
@ -625,9 +638,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
|
||||
scalar_src_register);
|
||||
} else {
|
||||
scalar_src_ptr = GetFloatConstant(
|
||||
scalar_src_float_constant = GetFloatConstant(
|
||||
scalar_src_register, instr.src_const_is_addressed(3),
|
||||
instr.is_const_address_register_relative());
|
||||
scalar_src_ptr = scalar_src_float_constant.data();
|
||||
}
|
||||
uint32_t scalar_src_swizzle = instr.src_swizzle(3);
|
||||
scalar_operand_component_count =
|
||||
|
@ -695,7 +709,8 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
case ucode::AluScalarOpcode::kMulsPrev2: {
|
||||
if (state_.previous_scalar == -FLT_MAX ||
|
||||
!std::isfinite(state_.previous_scalar) ||
|
||||
!std::isfinite(scalar_operands[1]) || scalar_operands[1] <= 0.0f) {
|
||||
!std::isfinite(scalar_operands[1]) ||
|
||||
std::islessequal(scalar_operands[1], 0.0f)) {
|
||||
state_.previous_scalar = -FLT_MAX;
|
||||
} else {
|
||||
// Direct3D 9 behavior (0 or denormal * anything = +0).
|
||||
|
@ -706,23 +721,26 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
}
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kMaxs: {
|
||||
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
state_.previous_scalar =
|
||||
std::isgreaterequal(scalar_operands[0], scalar_operands[1])
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kMins: {
|
||||
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
state_.previous_scalar =
|
||||
std::isless(scalar_operands[0], scalar_operands[1])
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSeqs: {
|
||||
state_.previous_scalar = float(scalar_operands[0] == 0.0f);
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSgts: {
|
||||
state_.previous_scalar = float(scalar_operands[0] > 0.0f);
|
||||
state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSges: {
|
||||
state_.previous_scalar = float(scalar_operands[0] >= 0.0f);
|
||||
state_.previous_scalar =
|
||||
float(std::isgreaterequal(scalar_operands[0], 0.0f));
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSnes: {
|
||||
state_.previous_scalar = float(scalar_operands[0] != 0.0f);
|
||||
|
@ -788,22 +806,20 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
state_.previous_scalar = 1.0f / std::sqrt(scalar_operands[0]);
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kMaxAs: {
|
||||
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
|
||||
// (-256.0f) is always the result.
|
||||
state_.address_register = int32_t(std::floor(
|
||||
std::min(255.0f, std::max(-256.0f, scalar_operands[0])) + 0.5f));
|
||||
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
xe::clamp_float(scalar_operands[0], -256.0f, 255.0f) + 0.5f));
|
||||
state_.previous_scalar =
|
||||
std::isgreaterequal(scalar_operands[0], scalar_operands[1])
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kMaxAsf: {
|
||||
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
|
||||
// (-256.0f) is always the result.
|
||||
state_.address_register = int32_t(
|
||||
std::floor(std::min(255.0f, std::max(-256.0f, scalar_operands[0]))));
|
||||
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
std::floor(xe::clamp_float(scalar_operands[0], -256.0f, 255.0f)));
|
||||
state_.previous_scalar =
|
||||
std::isgreaterequal(scalar_operands[0], scalar_operands[1])
|
||||
? scalar_operands[0]
|
||||
: scalar_operands[1];
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSubs:
|
||||
case ucode::AluScalarOpcode::kSubsc0:
|
||||
|
@ -822,11 +838,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
state_.previous_scalar = float(!state_.predicate);
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSetpGt: {
|
||||
state_.predicate = scalar_operands[0] > 0.0f;
|
||||
state_.predicate = std::isgreater(scalar_operands[0], 0.0f);
|
||||
state_.previous_scalar = float(!state_.predicate);
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSetpGe: {
|
||||
state_.predicate = scalar_operands[0] >= 0.0f;
|
||||
state_.predicate = std::isgreaterequal(scalar_operands[0], 0.0f);
|
||||
state_.previous_scalar = float(!state_.predicate);
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSetpInv: {
|
||||
|
@ -838,7 +854,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
} break;
|
||||
case ucode::AluScalarOpcode::kSetpPop: {
|
||||
float new_counter = scalar_operands[0] - 1.0f;
|
||||
state_.predicate = new_counter <= 0.0f;
|
||||
state_.predicate = std::islessequal(new_counter, 0.0f);
|
||||
state_.previous_scalar = state_.predicate ? 0.0f : new_counter;
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kSetpClr: {
|
||||
|
@ -855,10 +871,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
state_.previous_scalar = float(scalar_operands[0] == 0.0f);
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kKillsGt: {
|
||||
state_.previous_scalar = float(scalar_operands[0] > 0.0f);
|
||||
state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kKillsGe: {
|
||||
state_.previous_scalar = float(scalar_operands[0] >= 0.0f);
|
||||
state_.previous_scalar =
|
||||
float(std::isgreaterequal(scalar_operands[0], 0.0f));
|
||||
} break;
|
||||
case ucode::AluScalarOpcode::kKillsNe: {
|
||||
state_.previous_scalar = float(scalar_operands[0] != 0.0f);
|
||||
|
@ -884,11 +901,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
|
|||
|
||||
if (instr.vector_clamp()) {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
vector_result[i] = xe::saturate_unsigned(vector_result[i]);
|
||||
vector_result[i] = xe::saturate(vector_result[i]);
|
||||
}
|
||||
}
|
||||
float scalar_result = instr.scalar_clamp()
|
||||
? xe::saturate_unsigned(state_.previous_scalar)
|
||||
? xe::saturate(state_.previous_scalar)
|
||||
: state_.previous_scalar;
|
||||
|
||||
uint32_t scalar_result_write_mask = instr.GetScalarOpResultWriteMask();
|
||||
|
@ -984,10 +1001,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction(
|
|||
state_.vfetch_full_last = instr;
|
||||
}
|
||||
|
||||
xenos::xe_gpu_vertex_fetch_t fetch_constant =
|
||||
*reinterpret_cast<const xenos::xe_gpu_vertex_fetch_t*>(
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
state_.vfetch_full_last.fetch_constant_index()]);
|
||||
xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch(
|
||||
state_.vfetch_full_last.fetch_constant_index());
|
||||
|
||||
if (!instr.is_mini_fetch()) {
|
||||
// Get the part of the address that depends on vfetch_full data.
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define XENIA_GPU_SHADER_INTERPRETER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
|
@ -120,8 +121,8 @@ class ShaderInterpreter {
|
|||
float* GetTempRegister(uint32_t address, bool is_relative) {
|
||||
return temp_registers_[GetTempRegisterIndex(address, is_relative)];
|
||||
}
|
||||
const float* GetFloatConstant(uint32_t address, bool is_relative,
|
||||
bool relative_address_is_a0) const;
|
||||
const std::array<float, 4> GetFloatConstant(
|
||||
uint32_t address, bool is_relative, bool relative_address_is_a0) const;
|
||||
|
||||
void ExecuteAluInstruction(ucode::AluInstruction instr);
|
||||
void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle,
|
||||
|
|
|
@ -333,8 +333,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|||
uint32_t index_bit = UINT32_C(1) << index;
|
||||
textures_remaining &= ~index_bit;
|
||||
TextureBinding& binding = texture_bindings_[index];
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
|
||||
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index);
|
||||
TextureKey old_key = binding.key;
|
||||
uint8_t old_swizzled_signs = binding.swizzled_signs;
|
||||
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "xenia/base/filesystem.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/base/system.h"
|
||||
|
@ -357,9 +358,10 @@ void TraceViewer::DrawPacketDisassemblerUI() {
|
|||
ImGui::NextColumn();
|
||||
if (!register_info ||
|
||||
register_info->type == RegisterInfo::Type::kDword) {
|
||||
ImGui::Text("%.8X", action.register_write.value.u32);
|
||||
ImGui::Text("%.8X", action.register_write.value);
|
||||
} else {
|
||||
ImGui::Text("%8f", action.register_write.value.f32);
|
||||
ImGui::Text("%8f", xe::memory::Reinterpret<float>(
|
||||
action.register_write.value));
|
||||
}
|
||||
ImGui::Columns(1);
|
||||
break;
|
||||
|
@ -709,10 +711,8 @@ void TraceViewer::DrawTextureInfo(
|
|||
const Shader::TextureBinding& texture_binding) {
|
||||
auto& regs = *graphics_system_->register_file();
|
||||
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
texture_binding.fetch_constant * 6;
|
||||
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||
auto& fetch = group->texture_fetch;
|
||||
xenos::xe_gpu_texture_fetch_t fetch =
|
||||
regs.GetTextureFetch(texture_binding.fetch_constant);
|
||||
if (fetch.type != xenos::FetchConstantType::kTexture &&
|
||||
(!cvars::gpu_allow_invalid_fetch_constants ||
|
||||
fetch.type != xenos::FetchConstantType::kInvalidTexture)) {
|
||||
|
@ -780,9 +780,9 @@ void TraceViewer::DrawFailedTextureInfo(
|
|||
|
||||
void TraceViewer::DrawVertexFetcher(Shader* shader,
|
||||
const Shader::VertexBinding& vertex_binding,
|
||||
const xe_gpu_vertex_fetch_t* fetch) {
|
||||
const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2);
|
||||
uint32_t vertex_count = fetch->size / vertex_binding.stride_words;
|
||||
const xe_gpu_vertex_fetch_t& fetch) {
|
||||
const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2);
|
||||
uint32_t vertex_count = fetch.size / vertex_binding.stride_words;
|
||||
int column_count = 0;
|
||||
for (const auto& attrib : vertex_binding.attributes) {
|
||||
switch (attrib.fetch_instr.attributes.data_format) {
|
||||
|
@ -883,7 +883,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
|
|||
#define LOADEL(type, wo) \
|
||||
GpuSwap(xe::load<type>(vstart + \
|
||||
(attrib.fetch_instr.attributes.offset + wo) * 4), \
|
||||
fetch->endian)
|
||||
fetch.endian)
|
||||
switch (attrib.fetch_instr.attributes.data_format) {
|
||||
case xenos::VertexFormat::k_32:
|
||||
ImGui::Text("%.8X", LOADEL(uint32_t, 0));
|
||||
|
@ -1066,7 +1066,7 @@ void ProgressBar(float frac, float width, float height = 0,
|
|||
if (height == 0) {
|
||||
height = ImGui::GetTextLineHeightWithSpacing();
|
||||
}
|
||||
frac = xe::saturate_unsigned(frac);
|
||||
frac = xe::saturate(frac);
|
||||
|
||||
const auto fontAtlas = ImGui::GetIO().Fonts;
|
||||
|
||||
|
@ -1187,7 +1187,7 @@ void TraceViewer::DrawStateUI() {
|
|||
}
|
||||
|
||||
auto enable_mode =
|
||||
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
|
||||
|
||||
const char* mode_name = "Unknown";
|
||||
switch (enable_mode) {
|
||||
|
@ -1210,7 +1210,7 @@ void TraceViewer::DrawStateUI() {
|
|||
break;
|
||||
}
|
||||
case ModeControl::kCopy: {
|
||||
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
|
||||
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
|
||||
ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(),
|
||||
copy_dest_base);
|
||||
break;
|
||||
|
@ -1221,9 +1221,9 @@ void TraceViewer::DrawStateUI() {
|
|||
ImGui::BulletText("Viewport State:");
|
||||
if (true) {
|
||||
ImGui::TreePush((const void*)0);
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
|
||||
if ((pa_su_sc_mode_cntl >> 16) & 1) {
|
||||
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
|
||||
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET];
|
||||
int16_t window_offset_x = window_offset & 0x7FFF;
|
||||
int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
|
||||
if (window_offset_x & 0x4000) {
|
||||
|
@ -1237,8 +1237,8 @@ void TraceViewer::DrawStateUI() {
|
|||
} else {
|
||||
ImGui::BulletText("Window Offset: disabled");
|
||||
}
|
||||
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
|
||||
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
|
||||
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL];
|
||||
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR];
|
||||
ImGui::BulletText(
|
||||
"Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF,
|
||||
(window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF,
|
||||
|
@ -1246,7 +1246,7 @@ void TraceViewer::DrawStateUI() {
|
|||
(window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF),
|
||||
((window_scissor_br >> 16) & 0x7FFF) -
|
||||
((window_scissor_tl >> 16) & 0x7FFF));
|
||||
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
|
||||
uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF;
|
||||
uint32_t surface_pitch = surface_info & 0x3FFF;
|
||||
auto surface_msaa = (surface_info >> 16) & 0x3;
|
||||
|
@ -1258,7 +1258,7 @@ void TraceViewer::DrawStateUI() {
|
|||
ImGui::BulletText("Surface Pitch: %d", surface_pitch);
|
||||
ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz);
|
||||
ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
|
||||
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL];
|
||||
bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
|
||||
bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
|
||||
bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
|
||||
|
@ -1273,14 +1273,20 @@ void TraceViewer::DrawStateUI() {
|
|||
}
|
||||
ImGui::BulletText(
|
||||
"Viewport Offset: %f, %f, %f",
|
||||
vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0,
|
||||
vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0,
|
||||
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0);
|
||||
vport_xoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
|
||||
: 0.0f,
|
||||
vport_yoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
|
||||
: 0.0f,
|
||||
vport_zoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
|
||||
: 0.0f);
|
||||
ImGui::BulletText(
|
||||
"Viewport Scale: %f, %f, %f",
|
||||
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1,
|
||||
vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1,
|
||||
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1);
|
||||
vport_xscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
|
||||
: 1.0f,
|
||||
vport_yscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
|
||||
: 1.0f,
|
||||
vport_zscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
|
||||
: 1.0f);
|
||||
if (!vport_xscale_enable) {
|
||||
ImGui::PopStyleColor();
|
||||
}
|
||||
|
@ -1290,7 +1296,7 @@ void TraceViewer::DrawStateUI() {
|
|||
((vte_control >> 8) & 0x1) ? "y/w0" : "y",
|
||||
((vte_control >> 9) & 0x1) ? "z/w0" : "z",
|
||||
((vte_control >> 10) & 0x1) ? "w0" : "1/w0");
|
||||
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
|
||||
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL];
|
||||
bool clip_enabled = ((clip_control >> 17) & 0x1) == 0;
|
||||
bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1;
|
||||
ImGui::BulletText("Clip Enabled: %s, DX Clip: %s",
|
||||
|
@ -1302,11 +1308,9 @@ void TraceViewer::DrawStateUI() {
|
|||
ImGui::BulletText("Rasterizer State:");
|
||||
if (true) {
|
||||
ImGui::TreePush((const void*)0);
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
|
||||
uint32_t pa_sc_screen_scissor_tl =
|
||||
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
|
||||
uint32_t pa_sc_screen_scissor_br =
|
||||
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
|
||||
uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL];
|
||||
uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR];
|
||||
if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) {
|
||||
int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF;
|
||||
int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
|
||||
|
@ -1361,7 +1365,7 @@ void TraceViewer::DrawStateUI() {
|
|||
}
|
||||
ImGui::Columns(1);
|
||||
|
||||
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
|
||||
uint32_t surface_pitch = rb_surface_info & 0x3FFF;
|
||||
auto surface_msaa =
|
||||
static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3);
|
||||
|
@ -1370,39 +1374,39 @@ void TraceViewer::DrawStateUI() {
|
|||
if (enable_mode != ModeControl::kDepth) {
|
||||
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
|
||||
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
|
||||
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL];
|
||||
if ((color_control & 0x8) != 0) {
|
||||
ImGui::BulletText("Alpha Test: %s %.2f",
|
||||
kCompareFuncNames[color_control & 0x7],
|
||||
regs[XE_GPU_REG_RB_ALPHA_REF].f32);
|
||||
regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF));
|
||||
} else {
|
||||
ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
|
||||
ImGui::BulletText("Alpha Test: disabled");
|
||||
ImGui::PopStyleColor();
|
||||
}
|
||||
|
||||
auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32);
|
||||
auto blend_color = ImVec4(regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
|
||||
ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x,
|
||||
blend_color.y, blend_color.z, blend_color.w);
|
||||
ImGui::SameLine();
|
||||
// TODO small_height (was true) parameter was removed
|
||||
ImGui::ColorButton(nullptr, blend_color);
|
||||
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
|
||||
uint32_t color_info[4] = {
|
||||
regs[XE_GPU_REG_RB_COLOR_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
|
||||
regs[XE_GPU_REG_RB_COLOR_INFO],
|
||||
regs[XE_GPU_REG_RB_COLOR1_INFO],
|
||||
regs[XE_GPU_REG_RB_COLOR2_INFO],
|
||||
regs[XE_GPU_REG_RB_COLOR3_INFO],
|
||||
};
|
||||
uint32_t rb_blendcontrol[4] = {
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL0].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL1].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL2].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL3].u32,
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL0],
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL1],
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL2],
|
||||
regs[XE_GPU_REG_RB_BLENDCONTROL3],
|
||||
};
|
||||
ImGui::Columns(2);
|
||||
for (int i = 0; i < xe::countof(color_info); ++i) {
|
||||
|
@ -1511,9 +1515,9 @@ void TraceViewer::DrawStateUI() {
|
|||
}
|
||||
|
||||
if (ImGui::CollapsingHeader("Depth/Stencil Target")) {
|
||||
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
||||
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL];
|
||||
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK];
|
||||
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO];
|
||||
bool uses_depth =
|
||||
(rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004);
|
||||
uint32_t stencil_ref = (rb_stencilrefmask & 0xFF);
|
||||
|
@ -1697,10 +1701,9 @@ void TraceViewer::DrawStateUI() {
|
|||
draw_info.index_buffer_size,
|
||||
kIndexFormatNames[int(draw_info.index_format)],
|
||||
kEndiannessNames[int(draw_info.index_endianness)]);
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
|
||||
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
|
||||
if (pa_su_sc_mode_cntl & (1 << 21)) {
|
||||
uint32_t reset_index =
|
||||
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
|
||||
uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX];
|
||||
if (draw_info.index_format == xenos::IndexFormat::kInt16) {
|
||||
ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF);
|
||||
} else {
|
||||
|
@ -1760,30 +1763,16 @@ void TraceViewer::DrawStateUI() {
|
|||
auto shader = command_processor->active_vertex_shader();
|
||||
if (shader) {
|
||||
for (const auto& vertex_binding : shader->vertex_bindings()) {
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
(vertex_binding.fetch_constant / 3) * 6;
|
||||
const auto group =
|
||||
reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||
const xe_gpu_vertex_fetch_t* fetch = nullptr;
|
||||
switch (vertex_binding.fetch_constant % 3) {
|
||||
case 0:
|
||||
fetch = &group->vertex_fetch_0;
|
||||
break;
|
||||
case 1:
|
||||
fetch = &group->vertex_fetch_1;
|
||||
break;
|
||||
case 2:
|
||||
fetch = &group->vertex_fetch_2;
|
||||
break;
|
||||
}
|
||||
assert_true(fetch->endian == xenos::Endian::k8in32);
|
||||
xe_gpu_vertex_fetch_t fetch =
|
||||
regs.GetVertexFetch(vertex_binding.fetch_constant);
|
||||
assert_true(fetch.endian == xenos::Endian::k8in32);
|
||||
char tree_root_id[32];
|
||||
sprintf(tree_root_id, "#vertices_root_%d",
|
||||
vertex_binding.fetch_constant);
|
||||
if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
|
||||
vertex_binding.fetch_constant, fetch->address << 2,
|
||||
fetch->size * 4,
|
||||
kEndiannessNames[int(fetch->endian)])) {
|
||||
vertex_binding.fetch_constant, fetch.address << 2,
|
||||
fetch.size * 4,
|
||||
kEndiannessNames[int(fetch.endian)])) {
|
||||
ImGui::BeginChild("#vertices", ImVec2(0, 300));
|
||||
DrawVertexFetcher(shader, vertex_binding, fetch);
|
||||
ImGui::EndChild();
|
||||
|
@ -1831,7 +1820,7 @@ void TraceViewer::DrawStateUI() {
|
|||
ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6,
|
||||
(i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6);
|
||||
ImGui::NextColumn();
|
||||
ImGui::Text("%.8X", regs[i].u32);
|
||||
ImGui::Text("%.8X", regs[i]);
|
||||
ImGui::NextColumn();
|
||||
}
|
||||
ImGui::Columns(1);
|
||||
|
@ -1842,8 +1831,9 @@ void TraceViewer::DrawStateUI() {
|
|||
i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) {
|
||||
ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4);
|
||||
ImGui::NextColumn();
|
||||
ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32,
|
||||
regs[i + 2].f32, regs[i + 3].f32);
|
||||
ImGui::Text("%f, %f, %f, %f", regs.Get<float>(i + 0),
|
||||
regs.Get<float>(i + 1), regs.Get<float>(i + 2),
|
||||
regs.Get<float>(i + 3));
|
||||
ImGui::NextColumn();
|
||||
}
|
||||
ImGui::Columns(1);
|
||||
|
@ -1856,7 +1846,7 @@ void TraceViewer::DrawStateUI() {
|
|||
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32,
|
||||
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31);
|
||||
ImGui::NextColumn();
|
||||
ImGui::Text("%.8X", regs[i].u32);
|
||||
ImGui::Text("%.8X", regs[i]);
|
||||
ImGui::NextColumn();
|
||||
}
|
||||
ImGui::Columns(1);
|
||||
|
@ -1867,7 +1857,7 @@ void TraceViewer::DrawStateUI() {
|
|||
i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) {
|
||||
ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00);
|
||||
ImGui::NextColumn();
|
||||
ImGui::Text("%.8X", regs[i].u32);
|
||||
ImGui::Text("%.8X", regs[i]);
|
||||
ImGui::NextColumn();
|
||||
}
|
||||
ImGui::Columns(1);
|
||||
|
|
|
@ -122,7 +122,7 @@ class TraceViewer : public xe::ui::WindowedApp {
|
|||
|
||||
void DrawVertexFetcher(Shader* shader,
|
||||
const Shader::VertexBinding& vertex_binding,
|
||||
const xenos::xe_gpu_vertex_fetch_t* fetch);
|
||||
const xenos::xe_gpu_vertex_fetch_t& fetch);
|
||||
|
||||
TraceViewerWindowListener window_listener_;
|
||||
|
||||
|
|
|
@ -2486,8 +2486,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
(uint64_t(1) << (vfetch_index & 63))) {
|
||||
continue;
|
||||
}
|
||||
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
|
||||
xenos::xe_gpu_vertex_fetch_t vfetch_constant =
|
||||
regs.GetVertexFetch(vfetch_index);
|
||||
switch (vfetch_constant.type) {
|
||||
case xenos::FetchConstantType::kVertex:
|
||||
break;
|
||||
|
@ -3285,10 +3285,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
|||
|
||||
// Blend constants.
|
||||
float blend_constants[] = {
|
||||
regs[XE_GPU_REG_RB_BLEND_RED].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
|
||||
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
|
||||
};
|
||||
dynamic_blend_constants_update_needed_ |=
|
||||
std::memcmp(dynamic_blend_constants_, blend_constants,
|
||||
|
@ -3434,7 +3434,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
const RegisterFile& regs = *register_file_;
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||
auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
|
||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
|
||||
|
@ -3442,7 +3442,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
|
||||
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
||||
auto vgt_indx_offset = regs.Get<int32_t>(XE_GPU_REG_VGT_INDX_OFFSET);
|
||||
|
||||
bool edram_fragment_shader_interlock =
|
||||
render_target_cache_->GetPath() ==
|
||||
|
@ -3755,7 +3755,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
|
||||
system_constants_.edram_rt_format_flags[i] = format_flags;
|
||||
uint32_t blend_factors_ops =
|
||||
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
|
||||
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
|
||||
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
|
||||
blend_factors_ops;
|
||||
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
|
||||
|
@ -3784,22 +3784,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
if (primitive_polygonal) {
|
||||
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
|
||||
poly_offset_front_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
|
||||
poly_offset_front_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
|
||||
}
|
||||
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
|
||||
poly_offset_back_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
|
||||
poly_offset_back_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
|
||||
}
|
||||
} else {
|
||||
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
||||
poly_offset_front_scale =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
|
||||
poly_offset_front_offset =
|
||||
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
|
||||
poly_offset_back_scale = poly_offset_front_scale;
|
||||
poly_offset_back_offset = poly_offset_front_offset;
|
||||
}
|
||||
|
@ -3862,21 +3862,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|||
}
|
||||
|
||||
dirty |= system_constants_.edram_blend_constant[0] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
|
||||
system_constants_.edram_blend_constant[0] =
|
||||
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
|
||||
dirty |= system_constants_.edram_blend_constant[1] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
|
||||
system_constants_.edram_blend_constant[1] =
|
||||
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
|
||||
dirty |= system_constants_.edram_blend_constant[2] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
|
||||
system_constants_.edram_blend_constant[2] =
|
||||
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
|
||||
dirty |= system_constants_.edram_blend_constant[3] !=
|
||||
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
|
||||
system_constants_.edram_blend_constant[3] =
|
||||
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
|
@ -3903,10 +3903,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
|
|||
// These are the constant base addresses/ranges for shaders.
|
||||
// We have these hardcoded right now cause nothing seems to differ on the Xbox
|
||||
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
|
||||
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
||||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
|
||||
regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
|
||||
// Check if the float constant layout is still the same and get the counts.
|
||||
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
||||
vertex_shader->constant_register_map();
|
||||
|
@ -4001,8 +4001,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
|
|||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(mapping,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
(float_constant_index << 2)],
|
||||
sizeof(float) * 4);
|
||||
mapping += sizeof(float) * 4;
|
||||
}
|
||||
|
@ -4033,8 +4032,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
|
|||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(mapping,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
(float_constant_index << 2)],
|
||||
sizeof(float) * 4);
|
||||
mapping += sizeof(float) * 4;
|
||||
}
|
||||
|
@ -4055,7 +4053,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
|
|||
return false;
|
||||
}
|
||||
buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize);
|
||||
std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
||||
std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
|
||||
kBoolLoopConstantsSize);
|
||||
current_constant_buffers_up_to_date_ |=
|
||||
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop;
|
||||
|
@ -4073,7 +4071,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
|
|||
return false;
|
||||
}
|
||||
buffer_info.range = VkDeviceSize(kFetchConstantsSize);
|
||||
std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
||||
std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
|
||||
kFetchConstantsSize);
|
||||
current_constant_buffers_up_to_date_ |=
|
||||
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch;
|
||||
|
|
|
@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
|||
[common_blend_rt_index]),
|
||||
(((normalized_color_mask &
|
||||
~(uint32_t(0b1111) << (4 * common_blend_rt_index)))
|
||||
? regs[XE_GPU_REG_RB_COLOR_MASK].u32
|
||||
? regs[XE_GPU_REG_RB_COLOR_MASK]
|
||||
: normalized_color_mask) >>
|
||||
(4 * common_blend_rt_index)) &
|
||||
0b1111,
|
||||
|
|
|
@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView(
|
|||
VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters(
|
||||
const VulkanShader::SamplerBinding& binding) const {
|
||||
const auto& regs = register_file();
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
|
||||
xenos::xe_gpu_texture_fetch_t fetch =
|
||||
regs.GetTextureFetch(binding.fetch_constant);
|
||||
|
||||
SamplerParameters parameters;
|
||||
|
||||
|
@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
|
|||
uint32_t& width_scaled_out, uint32_t& height_scaled_out,
|
||||
xenos::TextureFormat& format_out) {
|
||||
const auto& regs = register_file();
|
||||
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
|
||||
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
|
||||
TextureKey key;
|
||||
BindingInfoFromFetchConstant(fetch, key, nullptr);
|
||||
if (!key.is_valid || key.base_page == 0 ||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <cmath>
|
||||
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -26,7 +27,7 @@ namespace xenos {
|
|||
float PWLGammaToLinear(float gamma) {
|
||||
// Not found in game executables, so just using the logic similar to that in
|
||||
// the Source Engine.
|
||||
gamma = xe::saturate_unsigned(gamma);
|
||||
gamma = xe::saturate(gamma);
|
||||
float scale, offset;
|
||||
// While the compiled code for linear to gamma conversion uses `vcmpgtfp
|
||||
// constant, value` comparison (constant > value, or value < constant), it's
|
||||
|
@ -67,7 +68,7 @@ float PWLGammaToLinear(float gamma) {
|
|||
}
|
||||
|
||||
float LinearToPWLGamma(float linear) {
|
||||
linear = xe::saturate_unsigned(linear);
|
||||
linear = xe::saturate(linear);
|
||||
float scale, offset;
|
||||
// While the compiled code uses `vcmpgtfp constant, value` comparison
|
||||
// (constant > value, or value < constant), it's preferable to use `value >=
|
||||
|
@ -118,8 +119,8 @@ float Float7e3To32(uint32_t f10) {
|
|||
exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
|
||||
mantissa = (mantissa << mantissa_lzcnt) & 0x7F;
|
||||
}
|
||||
uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3);
|
||||
return *reinterpret_cast<const float*>(&f32);
|
||||
return xe::memory::Reinterpret<float>(
|
||||
uint32_t(((exponent + 124) << 23) | (mantissa << 3)));
|
||||
}
|
||||
|
||||
// Based on CFloat24 from d3dref9.dll and the 6e4 code from:
|
||||
|
@ -131,7 +132,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) {
|
|||
// Positive only, and not -0 or NaN.
|
||||
return 0;
|
||||
}
|
||||
uint32_t f32u32 = *reinterpret_cast<const uint32_t*>(&f32);
|
||||
auto f32u32 = xe::memory::Reinterpret<uint32_t>(f32);
|
||||
if (f32u32 >= 0x3FFFFFF8) {
|
||||
// Saturate.
|
||||
return 0xFFFFFF;
|
||||
|
@ -165,8 +166,8 @@ float Float20e4To32(uint32_t f24) {
|
|||
exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
|
||||
mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF;
|
||||
}
|
||||
uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3);
|
||||
return *reinterpret_cast<const float*>(&f32);
|
||||
return xe::memory::Reinterpret<float>(
|
||||
uint32_t(((exponent + 112) << 23) | (mantissa << 3)));
|
||||
}
|
||||
|
||||
const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) {
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <algorithm>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/graphics_util.h"
|
||||
#include "xenia/ui/presenter.h"
|
||||
|
||||
|
@ -67,24 +68,19 @@ bool ImmediateDrawer::ScissorToRenderTarget(const ImmediateDraw& immediate_draw,
|
|||
}
|
||||
float render_target_width_float = float(render_target_width);
|
||||
float render_target_height_float = float(render_target_height);
|
||||
// Scale to render target coordinates, drop NaNs (by doing
|
||||
// std::max(0.0f, variable) in this argument order), and clamp to the render
|
||||
// Scale to render target coordinates, drop NaNs, and clamp to the render
|
||||
// target size, below which the values are representable as 16p8 fixed-point.
|
||||
float scale_x = render_target_width / coordinate_space_width();
|
||||
float scale_y = render_target_height / coordinate_space_height();
|
||||
float x0_float =
|
||||
std::min(render_target_width_float,
|
||||
std::max(0.0f, immediate_draw.scissor_left * scale_x));
|
||||
float y0_float =
|
||||
std::min(render_target_height_float,
|
||||
std::max(0.0f, immediate_draw.scissor_top * scale_y));
|
||||
float x0_float = xe::clamp_float(immediate_draw.scissor_left * scale_x, 0.0f,
|
||||
render_target_width_float);
|
||||
float y0_float = xe::clamp_float(immediate_draw.scissor_top * scale_y, 0.0f,
|
||||
render_target_height_float);
|
||||
// Also make sure the size is non-negative.
|
||||
float x1_float =
|
||||
std::min(render_target_width_float,
|
||||
std::max(x0_float, immediate_draw.scissor_right * scale_x));
|
||||
float y1_float =
|
||||
std::min(render_target_height_float,
|
||||
std::max(y0_float, immediate_draw.scissor_bottom * scale_y));
|
||||
float x1_float = xe::clamp_float(immediate_draw.scissor_right * scale_x,
|
||||
x0_float, render_target_width_float);
|
||||
float y1_float = xe::clamp_float(immediate_draw.scissor_bottom * scale_y,
|
||||
y0_float, render_target_height_float);
|
||||
// Top-left - include .5 (0.128 treated as 0 covered, 0.129 as 0 not covered).
|
||||
int32_t x0 = (FloatToD3D11Fixed16p8(x0_float) + 127) >> 8;
|
||||
int32_t y0 = (FloatToD3D11Fixed16p8(y0_float) + 127) >> 8;
|
||||
|
|
|
@ -153,16 +153,16 @@ bool AndroidWindow::OnActivitySurfaceMotionEvent(jobject event) {
|
|||
// with out-of-bounds coordinates), when moving the mouse outside the
|
||||
// View, or when starting moving the mouse when the pointer was previously
|
||||
// outside the View in some cases.
|
||||
int32_t mouse_x = int32_t(
|
||||
std::min(float(GetActualPhysicalWidth()),
|
||||
std::max(0.0f, jni_env->CallFloatMethod(
|
||||
event, jni_ids.motion_event_get_x, 0))) +
|
||||
0.5f);
|
||||
int32_t mouse_y = int32_t(
|
||||
std::min(float(GetActualPhysicalHeight()),
|
||||
std::max(0.0f, jni_env->CallFloatMethod(
|
||||
event, jni_ids.motion_event_get_y, 0))) +
|
||||
0.5f);
|
||||
int32_t mouse_x =
|
||||
int32_t(xe::clamp_float(jni_env->CallFloatMethod(
|
||||
event, jni_ids.motion_event_get_x, 0),
|
||||
0.0f, float(GetActualPhysicalWidth())) +
|
||||
0.5f);
|
||||
int32_t mouse_y =
|
||||
int32_t(xe::clamp_float(jni_env->CallFloatMethod(
|
||||
event, jni_ids.motion_event_get_y, 0),
|
||||
0.0f, float(GetActualPhysicalHeight())) +
|
||||
0.5f);
|
||||
static const MouseEvent::Button kMouseEventButtons[] = {
|
||||
MouseEvent::Button::kLeft, MouseEvent::Button::kRight,
|
||||
MouseEvent::Button::kMiddle, MouseEvent::Button::kX1,
|
||||
|
|
Loading…
Reference in a new issue