Compare commits

...

7 commits

Author SHA1 Message Date
none e1dc25840d
Merge cfdfb600da into 3189a0e259 2024-05-12 22:04:12 +02:00
Triang3l 3189a0e259 [GPU] Check memexport stream constant upper bits in range gathering 2024-05-12 20:26:14 +03:00
Triang3l a3304d252f [Base/GPU] Cleanup float comparisons and NaN and -0 in clamping
C++ relational operators are supposed to raise FE_INVALID if an argument is
NaN, use std::isless/greater[equal] instead where they were easy to locate
(though there are other places possibly, mostly min/max and clamp usage was
checked).

Also fixes a copy-paste error making the CPU shader interpreter execute
MINs as MAXs instead.
2024-05-12 19:21:37 +03:00
Triang3l f964290ea8 [Base] Relax the system clock difference allowance in the test
Hopefully should reduce the CI failure rate, although this testing
approach is fundamentally flawed as it depends on OS scheduling.
2024-05-12 17:44:52 +03:00
Triang3l 376bad5056 [GPU] Remove register reinterpret_casts + WAIT_REG_MEM volatility
Hopefully prevents some potential #1971-like situations.

WAIT_REG_MEM's implementation also allowed the compiler to load the value
only once, which caused an infinite loop with the other changes in the
commit (even in debug builds), so it's now accessed as volatile. Possibly
it would be even better to replace it with some (acquire/release?) atomic
load/store some day at least for the registers actually seen as
participating in those waits.

Also fixes the endianness being handled only on the first wait iteration in
WAIT_REG_MEM.
2024-05-12 17:28:17 +03:00
Triang3l f0ad4f4587 [Base] Add aliasing-safe xe::memory::Reinterpret
Accessing the same memory as different types (other than char) using
reinterpret_cast or a union is undefined behavior that has already caused
issues like #1971.

Also adds a XE_RESTRICT_VAR definition for declaring non-aliasing pointers
in performance-critical areas in the future.
2024-05-12 17:28:16 +03:00
guccigang420 cfdfb600da Added a translation for GTK virtual keys to ui::VirtualKey 2024-02-05 15:17:23 +01:00
28 changed files with 669 additions and 468 deletions

View file

@ -921,7 +921,7 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
auto in = reinterpret_cast<const float*>(samples[j]);
// Raw samples sometimes aren't within [-1, 1]
float scaled_sample = xe::saturate_signed(in[i]) * scale;
float scaled_sample = xe::clamp_float(in[i], -1.0f, 1.0f) * scale;
// Convert the sample and output it in big endian.
auto sample = static_cast<int16_t>(scaled_sample);

View file

@ -60,20 +60,22 @@ constexpr T round_up(T value, V multiple, bool force_non_zero = true) {
return (value + multiple - 1) / multiple * multiple;
}
// Using the same conventions as in shading languages, returning 0 for NaN.
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument is
// always returned. Also -0 is not < +0, so +0 is also chosen for it.
// For NaN, returns min_value (or, if it's NaN too, max_value).
// If either of the boundaries is zero, and if the value is at that boundary or
// exceeds it, the result will have the sign of that boundary. If both
// boundaries are zero, which sign is selected among the argument signs is not
// explicitly defined.
template <typename T>
constexpr T saturate_unsigned(T value) {
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(0.0f), value));
T clamp_float(T value, T min_value, T max_value) {
float clamped_to_min = std::isgreater(value, min_value) ? value : min_value;
return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value;
}
// This diverges from the GPU NaN rules for signed normalized formats (NaN
// should be converted to 0, not to -1), but this expectation is not needed most
// of time, and cannot be met for free (unlike for 0...1 clamping).
// Using the same conventions as in shading languages, returning 0 for NaN.
// 0 is always returned as positive.
template <typename T>
constexpr T saturate_signed(T value) {
return std::min(static_cast<T>(1.0f), std::max(static_cast<T>(-1.0f), value));
T saturate(T value) {
return clamp_float(value, static_cast<T>(0.0f), static_cast<T>(1.0f));
}
// Gets the next power of two value that is greater than or equal to the given
@ -330,12 +332,6 @@ inline uint64_t rotate_left(uint64_t v, uint8_t sh) {
}
#endif // XE_PLATFORM_WIN32
template <typename T>
T clamp(T value, T min_value, T max_value) {
const T t = value < min_value ? min_value : value;
return t > max_value ? max_value : t;
}
#if XE_ARCH_AMD64
// Utilities for SSE values.
template <int N>

View file

@ -16,6 +16,7 @@
#include <functional>
#include <string>
#include <string_view>
#include <type_traits>
#include "xenia/base/assert.h"
#include "xenia/base/byte_order.h"
@ -24,6 +25,30 @@
namespace xe {
namespace memory {
// For variable declarations (not return values or `this` pointer).
// Not propagated.
#define XE_RESTRICT_VAR __restrict
// Aliasing-safe bit reinterpretation.
// For more complex cases such as non-trivially-copyable types, write copying
// code respecting the requirements for them externally instead of using these
// functions.
template <typename Dst, typename Src>
void Reinterpret(Dst& XE_RESTRICT_VAR dst, const Src& XE_RESTRICT_VAR src) {
static_assert(sizeof(Dst) == sizeof(Src));
static_assert(std::is_trivially_copyable_v<Dst>);
static_assert(std::is_trivially_copyable_v<Src>);
std::memcpy(&dst, &src, sizeof(Dst));
}
template <typename Dst, typename Src>
Dst Reinterpret(const Src& XE_RESTRICT_VAR src) {
Dst dst;
Reinterpret(dst, src);
return dst;
}
#if XE_PLATFORM_ANDROID
void AndroidInitialize();
void AndroidShutdown();

View file

@ -107,10 +107,11 @@ TEST_CASE("WinSystemClock <-> XSystemClock", "[clock_cast]") {
auto error2 = xsys.time_since_epoch() - wxsys.time_since_epoch();
auto error3 = wsys - wxsys;
REQUIRE(error1 < 10ms);
REQUIRE(error1 > -10ms);
REQUIRE(error2 < 10ms);
REQUIRE(error2 > -10ms);
// In AppVeyor, the difference often can be as large as roughly 16ms.
REQUIRE(error1 < 20ms);
REQUIRE(error1 > -20ms);
REQUIRE(error2 < 20ms);
REQUIRE(error2 > -20ms);
REQUIRE(error3 < duration);
REQUIRE(error3 > -duration);
}

View file

@ -182,7 +182,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) {
function_pane_width += io.MouseDelta.x;
function_pane_width = xe::clamp(function_pane_width, 30.0f, FLT_MAX);
function_pane_width = xe::clamp_float(function_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##source_pane",
@ -194,7 +194,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) {
source_pane_width += io.MouseDelta.x;
source_pane_width = xe::clamp(source_pane_width, 30.0f, FLT_MAX);
source_pane_width = xe::clamp_float(source_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##registers_pane",
@ -206,7 +206,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, top_panes_height));
if (ImGui::IsItemActive()) {
registers_pane_width += io.MouseDelta.x;
registers_pane_width = xe::clamp(registers_pane_width, 30.0f, FLT_MAX);
registers_pane_width =
xe::clamp_float(registers_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##right_pane", ImVec2(0, top_panes_height), true);
@ -234,7 +235,7 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImGui::InvisibleButton("##hsplitter0", ImVec2(-1, kSplitterWidth));
if (ImGui::IsItemActive()) {
bottom_panes_height -= io.MouseDelta.y;
bottom_panes_height = xe::clamp(bottom_panes_height, 30.0f, FLT_MAX);
bottom_panes_height = xe::clamp_float(bottom_panes_height, 30.0f, FLT_MAX);
}
ImGui::BeginChild("##log_pane", ImVec2(log_pane_width, bottom_panes_height),
true);
@ -245,7 +246,8 @@ void DebugWindow::DrawFrame(ImGuiIO& io) {
ImVec2(kSplitterWidth, bottom_panes_height));
if (ImGui::IsItemActive()) {
breakpoints_pane_width -= io.MouseDelta.x;
breakpoints_pane_width = xe::clamp(breakpoints_pane_width, 30.0f, FLT_MAX);
breakpoints_pane_width =
xe::clamp_float(breakpoints_pane_width, 30.0f, FLT_MAX);
}
ImGui::SameLine();
ImGui::BeginChild("##breakpoints_pane", ImVec2(0, 0), true);

View file

@ -18,6 +18,7 @@
#include "xenia/base/byte_stream.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/base/ring_buffer.h"
#include "xenia/gpu/gpu_flags.h"
@ -334,7 +335,8 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
return;
}
regs.values[index].u32 = value;
// Volatile for the WAIT_REG_MEM loop.
const_cast<volatile uint32_t&>(regs.values[index]) = value;
if (!regs.GetRegisterInfo(index)) {
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
}
@ -342,19 +344,20 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
// Scratch register writeback.
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK].u32) {
if ((1 << scratch_reg) & regs.values[XE_GPU_REG_SCRATCH_UMSK]) {
// Enabled - write to address.
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR].u32;
uint32_t scratch_addr = regs.values[XE_GPU_REG_SCRATCH_ADDR];
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
xe::store_and_swap<uint32_t>(memory_->TranslatePhysical(mem_addr), value);
}
} else {
switch (index) {
// If this is a COHER register, set the dirty flag.
// This will block the command processor the next time it WAIT_MEM_REGs
// This will block the command processor the next time it WAIT_REG_MEMs
// and allow us to synchronize the memory.
case XE_GPU_REG_COHER_STATUS_HOST: {
regs.values[index].u32 |= UINT32_C(0x80000000);
const_cast<volatile uint32_t&>(regs.values[index]) |=
UINT32_C(0x80000000);
} break;
case XE_GPU_REG_DC_LUT_RW_INDEX: {
@ -365,12 +368,12 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
// Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
// enable mask is blue, green, red.
bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
@ -401,14 +404,14 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_PWL_DATA: {
// Should be in the PWL writing mode.
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
assert_not_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
// Bit 7 of the index is ignored for PWL.
uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F;
// DC_LUT_PWL_DATA is likely in the red, green, blue order because
// DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
bool write_gamma_ramp_component =
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 &
(regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
(UINT32_C(1) << (2 - gamma_ramp_rw_component_))) != 0;
if (write_gamma_ramp_component) {
reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
@ -436,10 +439,10 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
case XE_GPU_REG_DC_LUT_30_COLOR: {
// Should be in the 256-entry table writing mode.
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE].u32 & 0b1);
assert_zero(regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1);
auto& gamma_ramp_rw_index = regs.Get<reg::DC_LUT_RW_INDEX>();
uint32_t gamma_ramp_write_enable_mask =
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK].u32 & 0b111;
regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111;
if (gamma_ramp_write_enable_mask) {
reg::DC_LUT_30_COLOR& gamma_ramp_entry =
gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index];
@ -479,10 +482,12 @@ void CommandProcessor::MakeCoherent() {
// https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
// https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
RegisterFile* regs = register_file_;
auto& status_host = regs->Get<reg::COHER_STATUS_HOST>();
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
// Volatile because this may be called from the WAIT_REG_MEM loop.
volatile uint32_t* regs_volatile = register_file_->values;
auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
uint32_t(regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];
if (!status_host.status) {
return;
@ -502,7 +507,7 @@ void CommandProcessor::MakeCoherent() {
base_host + size_host, size_host, action);
// Mark coherent.
status_host.status = 0;
regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0;
}
void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); }
@ -940,28 +945,33 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader,
SCOPE_profile_cpu_f("gpu");
// wait until a register or memory location is a specific value
uint32_t wait_info = reader->ReadAndSwap<uint32_t>();
uint32_t poll_reg_addr = reader->ReadAndSwap<uint32_t>();
uint32_t ref = reader->ReadAndSwap<uint32_t>();
uint32_t mask = reader->ReadAndSwap<uint32_t>();
uint32_t wait = reader->ReadAndSwap<uint32_t>();
bool is_memory = (wait_info & 0x10) != 0;
assert_true(is_memory || poll_reg_addr < RegisterFile::kRegisterCount);
const volatile uint32_t& value_ref =
is_memory ? *reinterpret_cast<uint32_t*>(memory_->TranslatePhysical(
poll_reg_addr & ~uint32_t(0x3)))
: register_file_->values[poll_reg_addr];
bool matched = false;
do {
uint32_t value;
if (wait_info & 0x10) {
// Memory.
auto endianness = static_cast<xenos::Endian>(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = xe::load<uint32_t>(memory_->TranslatePhysical(poll_reg_addr));
value = GpuSwap(value, endianness);
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4);
uint32_t value = value_ref;
if (is_memory) {
trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr & ~uint32_t(0x3)),
sizeof(uint32_t));
value = xenos::GpuSwap(value,
static_cast<xenos::Endian>(poll_reg_addr & 0x3));
} else {
// Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32;
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
MakeCoherent();
value = register_file_->values[poll_reg_addr].u32;
value = value_ref;
}
}
switch (wait_info & 0x7) {
@ -1024,17 +1034,17 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader,
uint32_t rmw_info = reader->ReadAndSwap<uint32_t>();
uint32_t and_mask = reader->ReadAndSwap<uint32_t>();
uint32_t or_mask = reader->ReadAndSwap<uint32_t>();
uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32;
uint32_t value = register_file_->values[rmw_info & 0x1FFF];
if ((rmw_info >> 31) & 0x1) {
// & reg
value &= register_file_->values[and_mask & 0x1FFF].u32;
value &= register_file_->values[and_mask & 0x1FFF];
} else {
// & imm
value &= and_mask;
}
if ((rmw_info >> 30) & 0x1) {
// | reg
value |= register_file_->values[or_mask & 0x1FFF].u32;
value |= register_file_->values[or_mask & 0x1FFF];
} else {
// | imm
value |= or_mask;
@ -1055,7 +1065,7 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader,
uint32_t reg_val;
assert_true(reg_addr < RegisterFile::kRegisterCount);
reg_val = register_file_->values[reg_addr].u32;
reg_val = register_file_->values[reg_addr];
auto endianness = static_cast<xenos::Endian>(mem_addr & 0x3);
mem_addr &= ~0x3;
@ -1105,7 +1115,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader,
} else {
// Register.
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
value = register_file_->values[poll_reg_addr].u32;
value = register_file_->values[poll_reg_addr];
}
bool matched = false;
switch (wait_info & 0x7) {
@ -1240,7 +1250,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
if (fake_sample_count >= 0) {
auto* pSampleCounts =
memory_->TranslatePhysical<xe_gpu_depth_sample_counts*>(
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR].u32);
register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]);
// 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
// and used to detect a finished query.
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
@ -1599,10 +1609,10 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader,
// The scan converter writes the internal result back to the register here.
// We just fake it and say it was visible in case it is read back.
if (id < 32) {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0].u32 |=
uint32_t(1) << id;
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t(1)
<< id;
} else {
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1].u32 |=
register_file_->values[XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |=
uint32_t(1) << (id - 32);
}
}
@ -1614,9 +1624,8 @@ void CommandProcessor::InitializeTrace() {
// Write the initial register values, to be loaded directly into the
// RegisterFile since all registers, including those that may have side
// effects on setting, will be saved.
trace_writer_.WriteRegisters(
0, reinterpret_cast<const uint32_t*>(register_file_->values),
RegisterFile::kRegisterCount, false);
trace_writer_.WriteRegisters(0, register_file_->values,
RegisterFile::kRegisterCount, false);
trace_writer_.WriteGammaRamp(gamma_ramp_256_entry_table(),
gamma_ramp_pwl_rgb(), gamma_ramp_rw_component_);

View file

@ -17,6 +17,7 @@
#include "xenia/base/cvar.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
@ -2306,8 +2307,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
vfetch_bits_remaining &= ~(uint32_t(1) << j);
uint32_t vfetch_index = i * 32 + j;
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
xenos::xe_gpu_vertex_fetch_t vfetch_constant =
regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex:
break;
@ -3050,10 +3051,10 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
// Blend factor.
float blend_factor[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32,
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
};
// std::memcmp instead of != so in case of NaN, every draw won't be
// invalidating it.
@ -3100,7 +3101,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3241,9 +3242,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Tessellation factor range, plus 1.0 according to the images in
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
float tessellation_factor_min =
regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f;
regs.Get<float>(XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL) + 1.0f;
float tessellation_factor_max =
regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f;
regs.Get<float>(XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL) + 1.0f;
dirty |= system_constants_.tessellation_factor_range_min !=
tessellation_factor_min;
system_constants_.tessellation_factor_range_min = tessellation_factor_min;
@ -3280,12 +3281,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
while (xe::bit_scan_forward(user_clip_planes_remaining,
&user_clip_plane_index)) {
user_clip_planes_remaining &= ~(UINT32_C(1) << user_clip_plane_index);
const float* user_clip_plane =
&regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32;
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane,
const void* user_clip_plane_regs =
&regs[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4];
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float))) {
dirty = true;
std::memcpy(user_clip_plane_write_ptr, user_clip_plane,
std::memcpy(user_clip_plane_write_ptr, user_clip_plane_regs,
4 * sizeof(float));
}
user_clip_plane_write_ptr += 4;
@ -3423,9 +3424,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
color_exp_bias -= 5;
}
}
float color_exp_bias_scale;
*reinterpret_cast<int32_t*>(&color_exp_bias_scale) =
0x3F800000 + (color_exp_bias << 23);
auto color_exp_bias_scale = xe::memory::Reinterpret<float>(
int32_t(0x3F800000 + (color_exp_bias << 23)));
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
if (edram_rov_used) {
@ -3454,7 +3454,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
4 * sizeof(float));
uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -3477,22 +3477,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
}
} else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset;
}
@ -3567,21 +3567,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
}
dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
dirty |= system_constants_.edram_blend_constant[1] !=
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
dirty |= system_constants_.edram_blend_constant[2] !=
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
dirty |= system_constants_.edram_blend_constant[3] !=
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
}
cbuffer_binding_system_.up_to_date &= !dirty;
@ -3638,10 +3638,10 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map();
@ -3715,8 +3715,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
4 * sizeof(float));
float_constants += 4 * sizeof(float);
}
@ -3746,8 +3745,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
4 * sizeof(float));
float_constants += 4 * sizeof(float);
}
@ -3767,7 +3765,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
return false;
}
std::memcpy(bool_loop_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
kBoolLoopConstantsSize);
cbuffer_binding_bool_loop_.up_to_date = true;
current_graphics_root_up_to_date_ &=
@ -3782,8 +3780,7 @@ bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader,
if (fetch_constants == nullptr) {
return false;
}
std::memcpy(fetch_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
std::memcpy(fetch_constants, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
kFetchConstantsSize);
cbuffer_binding_fetch_.up_to_date = true;
current_graphics_root_up_to_date_ &=

View file

@ -960,8 +960,8 @@ uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex(
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
xenos::xe_gpu_texture_fetch_t fetch =
regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters;
@ -1441,8 +1441,7 @@ ID3D12Resource* D3D12TextureCache::RequestSwapTexture(
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
xenos::TextureFormat& format_out) {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 ||

View file

@ -15,6 +15,7 @@
#include "xenia/base/assert.h"
#include "xenia/base/cvar.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/ucode.h"
@ -67,7 +68,7 @@ void DrawExtentEstimator::PositionYExportSink::Export(
point_size_ = value[0];
}
if (value_mask & 0b0100) {
vertex_kill_ = *reinterpret_cast<const uint32_t*>(&value[2]);
vertex_kill_ = xe::memory::Reinterpret<uint32_t>(value[2]);
}
}
}
@ -110,7 +111,7 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
xenos::Endian index_endian = vgt_dma_size.swap_mode;
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA) {
xenos::IndexFormat index_format = vgt_draw_initiator.index_size;
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32;
uint32_t index_buffer_base = regs[XE_GPU_REG_VGT_DMA_BASE];
uint32_t index_buffer_read_count =
std::min(uint32_t(vgt_draw_initiator.num_indices),
uint32_t(vgt_dma_size.num_words));
@ -145,21 +146,22 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
float viewport_y_scale = pa_cl_vte_cntl.vport_y_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f;
float viewport_y_offset = pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: 0.0f;
float viewport_y_offset =
pa_cl_vte_cntl.vport_y_offset_ena
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f;
int32_t point_vertex_min_diameter_float = 0;
int32_t point_vertex_max_diameter_float = 0;
float point_constant_radius_y = 0.0f;
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
*reinterpret_cast<float*>(&point_vertex_min_diameter_float) =
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
*reinterpret_cast<float*>(&point_vertex_max_diameter_float) =
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
point_vertex_min_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f));
point_vertex_max_diameter_float = xe::memory::Reinterpret<int32_t>(
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f));
point_constant_radius_y =
float(regs.Get<reg::PA_SU_POINT_SIZE>().height) * (1.0f / 16.0f);
}
@ -224,12 +226,13 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
// Vertex-specified diameter. Clamped effectively as a signed integer in
// the hardware, -NaN, -Infinity ... -0 to the minimum, +Infinity, +NaN
// to the maximum.
point_radius_y = position_y_export_sink.point_size().value();
*reinterpret_cast<int32_t*>(&point_radius_y) = std::min(
point_vertex_max_diameter_float,
std::max(point_vertex_min_diameter_float,
*reinterpret_cast<const int32_t*>(&point_radius_y)));
point_radius_y *= 0.5f;
point_radius_y =
0.5f *
xe::memory::Reinterpret<float>(std::min(
point_vertex_max_diameter_float,
std::max(point_vertex_min_diameter_float,
xe::memory::Reinterpret<int32_t>(
position_y_export_sink.point_size().value()))));
} else {
// Constant radius.
point_radius_y = point_constant_radius_y;
@ -331,11 +334,12 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
}
// Then apply the floating-point viewport offset.
if (pa_cl_vte_cntl.vport_y_offset_ena) {
viewport_bottom += regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32;
viewport_bottom += regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET);
}
viewport_bottom += pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
: 1.0f;
viewport_bottom +=
pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE))
: 1.0f;
// Using floor, or, rather, truncation (because maxing with zero anyway)
// similar to how viewport scissoring behaves on real AMD, Intel and Nvidia
// GPUs on Direct3D 12 (but not WARP), also like in

View file

@ -11,7 +11,6 @@
#include <algorithm>
#include <cmath>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/cvar.h"
@ -100,20 +99,20 @@ void GetPreferredFacePolygonOffset(const RegisterFile& regs,
// ones that are rendered (except for shadow volumes).
if (pa_su_sc_mode_cntl.poly_offset_front_enable &&
!pa_su_sc_mode_cntl.cull_front) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
if (pa_su_sc_mode_cntl.poly_offset_back_enable &&
!pa_su_sc_mode_cntl.cull_back && !scale && !offset) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
}
} else {
// Non-triangle primitives use the front offset, but it's toggled via
// poly_offset_para_enable.
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
scale = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
offset = regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
}
scale_out = scale;
@ -148,7 +147,7 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
}
// Check if a color target is actually written.
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t rts_remaining = shader.writes_color_targets();
uint32_t rt_index;
while (xe::bit_scan_forward(rts_remaining, &rt_index)) {
@ -311,24 +310,26 @@ void GetHostViewportInfo(const RegisterFile& regs,
// Obtain the original viewport values in a normalized way.
float scale_xy[] = {
pa_cl_vte_cntl.vport_x_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32
: 1.0f,
pa_cl_vte_cntl.vport_y_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
: 1.0f,
pa_cl_vte_cntl.vport_x_scale_ena
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
: 1.0f,
pa_cl_vte_cntl.vport_y_scale_ena
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f,
};
float scale_z = pa_cl_vte_cntl.vport_z_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
: 1.0f;
float offset_base_xy[] = {
pa_cl_vte_cntl.vport_x_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
: 0.0f,
pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f,
};
float offset_z = pa_cl_vte_cntl.vport_z_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
: 0.0f;
// Calculate all the integer.0 or integer.5 offsetting exactly at full
// precision, separately so it can be used in other integer calculations
@ -398,16 +399,11 @@ void GetHostViewportInfo(const RegisterFile& regs,
float offset_axis = offset_base_xy[i] + offset_add_xy[i];
float scale_axis = scale_xy[i];
float scale_axis_abs = std::abs(scale_xy[i]);
float axis_0 = offset_axis - scale_axis_abs;
float axis_1 = offset_axis + scale_axis_abs;
float axis_max_unscaled_float = float(xy_max_unscaled[i]);
// max(0.0f, xy) drops NaN and < 0 - max picks the first argument in the
// !(a < b) case (always for NaN), min as float (axis_max_unscaled_float
// is well below 2^24) to safely drop very large values.
uint32_t axis_0_int =
uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_0)));
uint32_t axis_1_int =
uint32_t(std::min(axis_max_unscaled_float, std::max(0.0f, axis_1)));
uint32_t axis_0_int = uint32_t(xe::clamp_float(
offset_axis - scale_axis_abs, 0.0f, axis_max_unscaled_float));
uint32_t axis_1_int = uint32_t(xe::clamp_float(
offset_axis + scale_axis_abs, 0.0f, axis_max_unscaled_float));
uint32_t axis_extent_int = axis_1_int - axis_0_int;
viewport_info_out.xy_offset[i] = axis_0_int * axis_resolution_scale;
viewport_info_out.xy_extent[i] = axis_extent_int * axis_resolution_scale;
@ -510,8 +506,8 @@ void GetHostViewportInfo(const RegisterFile& regs,
// extension. But cases when this really matters are yet to be found -
// trying to fix this will result in more correct depth values, but
// incorrect clipping.
z_min = xe::saturate_unsigned(host_clip_offset_z);
z_max = xe::saturate_unsigned(host_clip_offset_z + host_clip_scale_z);
z_min = xe::saturate(host_clip_offset_z);
z_max = xe::saturate(host_clip_offset_z + host_clip_scale_z);
// Direct3D 12 doesn't allow reverse depth range - on some drivers it
// works, on some drivers it doesn't, actually, but it was never
// explicitly allowed by the specification.
@ -615,7 +611,7 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
return 0;
}
uint32_t normalized_color_mask = 0;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
// Exclude the render targets not statically written to by the pixel shader.
// If the shader doesn't write to a render target, it shouldn't be written
@ -661,10 +657,16 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
? regs.Get<reg::SQ_VS_CONST>().base
: regs.Get<reg::SQ_PS_CONST>().base;
for (uint32_t constant_index : shader.memexport_stream_constants()) {
const auto& stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
XE_GPU_REG_SHADER_CONSTANT_000_X +
(float_constants_base + constant_index) * 4);
if (!stream.index_count) {
xenos::xe_gpu_memexport_stream_t stream =
regs.GetMemExportStream(float_constants_base + constant_index);
// Safety checks for stream constants potentially not set up if the export
// isn't done on the control flow path taken by the shader (not checking the
// Y component because the index is more likely to be constructed
// arbitrarily).
// The hardware validates the upper bits of eA according to the
// IPR2015-00325 sequencer specification.
if (stream.const_0x1 != 0x1 || stream.const_0x4b0 != 0x4B0 ||
stream.const_0x96 != 0x96 || !stream.index_count) {
continue;
}
const FormatInfo& format_info =
@ -705,7 +707,7 @@ void AddMemExportRanges(const RegisterFile& regs, const Shader& shader,
}
// Add a new range if haven't expanded an existing one.
if (!range_reused) {
ranges_out.emplace_back(stream.base_address, stream_size_bytes);
ranges_out.emplace_back(uint32_t(stream.base_address), stream_size_bytes);
}
}
}
@ -824,8 +826,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
// Get the extent of pixels covered by the resolve rectangle, according to the
// top-left rasterization rule.
// D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU.
auto fetch = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
xenos::xe_gpu_vertex_fetch_t fetch = regs.GetVertexFetch(0);
if (fetch.type != xenos::FetchConstantType::kVertex || fetch.size != 3 * 2) {
XELOGE("Unsupported resolve vertex buffer format");
assert_always();
@ -878,10 +879,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
GetScissor(regs, scissor, false);
int32_t scissor_right = int32_t(scissor.offset[0] + scissor.extent[0]);
int32_t scissor_bottom = int32_t(scissor.offset[1] + scissor.extent[1]);
x0 = xe::clamp(x0, int32_t(scissor.offset[0]), scissor_right);
y0 = xe::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom);
x1 = xe::clamp(x1, int32_t(scissor.offset[0]), scissor_right);
y1 = xe::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom);
x0 = std::clamp(x0, int32_t(scissor.offset[0]), scissor_right);
y0 = std::clamp(y0, int32_t(scissor.offset[1]), scissor_bottom);
x1 = std::clamp(x1, int32_t(scissor.offset[0]), scissor_right);
y1 = std::clamp(y1, int32_t(scissor.offset[1]), scissor_bottom);
assert_true(x0 <= x1 && y0 <= y1);
@ -994,7 +995,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
}
// Calculate the destination memory extent.
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
uint32_t rb_copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
uint32_t copy_dest_base_adjusted = rb_copy_dest_base;
uint32_t copy_dest_extent_start, copy_dest_extent_end;
auto rb_copy_dest_pitch = regs.Get<reg::RB_COPY_DEST_PITCH>();
@ -1164,9 +1165,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
info_out.copy_dest_info.copy_dest_swap = false;
}
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32;
info_out.rb_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR];
info_out.rb_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR];
info_out.rb_color_clear_lo = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO];
XELOGD(
"Resolve: {},{} <= x,y < {},{}, {} -> {} at 0x{:08X} (potentially "

View file

@ -17,6 +17,7 @@
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe {
namespace gpu {
@ -1102,10 +1103,10 @@ struct Src : OperandAddress {
}
static Src LI(int32_t x) { return LI(x, x, x, x); }
static Src LF(float x, float y, float z, float w) {
return LU(*reinterpret_cast<const uint32_t*>(&x),
*reinterpret_cast<const uint32_t*>(&y),
*reinterpret_cast<const uint32_t*>(&z),
*reinterpret_cast<const uint32_t*>(&w));
return LU(xe::memory::Reinterpret<uint32_t>(x),
xe::memory::Reinterpret<uint32_t>(y),
xe::memory::Reinterpret<uint32_t>(z),
xe::memory::Reinterpret<uint32_t>(w));
}
static Src LF(float x) { return LF(x, x, x, x); }
static Src LP(const uint32_t* xyzw) {
@ -1222,12 +1223,10 @@ struct Src : OperandAddress {
bool negate) {
if (is_integer) {
if (absolute) {
*reinterpret_cast<int32_t*>(&value) =
std::abs(*reinterpret_cast<const int32_t*>(&value));
value = uint32_t(std::abs(int32_t(value)));
}
if (negate) {
*reinterpret_cast<int32_t*>(&value) =
-*reinterpret_cast<const int32_t*>(&value);
value = uint32_t(-int32_t(value));
}
} else {
if (absolute) {

View file

@ -201,7 +201,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
}
assert_true(r < RegisterFile::kRegisterCount);
return register_file_.values[r].u32;
return register_file_.values[r];
}
void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
@ -219,7 +219,7 @@ void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) {
}
assert_true(r < RegisterFile::kRegisterCount);
register_file_.values[r].u32 = value;
register_file_.values[r] = value;
}
void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) {

View file

@ -42,7 +42,7 @@ struct PacketAction {
union {
struct {
uint32_t index;
RegisterFile::RegisterValue value;
uint32_t value;
} register_write;
struct {
uint64_t value;
@ -56,7 +56,7 @@ struct PacketAction {
PacketAction action;
action.type = Type::kRegisterWrite;
action.register_write.index = index;
action.register_write.value.u32 = value;
action.register_write.value = value;
return action;
}

View file

@ -498,8 +498,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
~uint32_t((1 << index_size_log2) - 1);
guest_index_base =
regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize ||
@ -652,8 +652,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety.
guest_index_base = regs[XE_GPU_REG_VGT_DMA_BASE].u32 &
~uint32_t((1 << index_size_log2) - 1);
guest_index_base =
regs[XE_GPU_REG_VGT_DMA_BASE] & ~uint32_t((1 << index_size_log2) - 1);
guest_index_buffer_needed_bytes = guest_draw_vertex_count
<< index_size_log2;
if (guest_index_base > SharedMemory::kBufferSize ||

View file

@ -12,8 +12,12 @@
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/memory.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
@ -34,39 +38,53 @@ class RegisterFile {
static const RegisterInfo* GetRegisterInfo(uint32_t index);
static constexpr size_t kRegisterCount = 0x5003;
union RegisterValue {
uint32_t u32;
float f32;
};
RegisterValue values[kRegisterCount];
uint32_t values[kRegisterCount];
const uint32_t& operator[](uint32_t reg) const { return values[reg]; }
uint32_t& operator[](uint32_t reg) { return values[reg]; }
const RegisterValue& operator[](uint32_t reg) const { return values[reg]; }
RegisterValue& operator[](uint32_t reg) { return values[reg]; }
const RegisterValue& operator[](Register reg) const { return values[reg]; }
RegisterValue& operator[](Register reg) { return values[reg]; }
template <typename T>
const T& Get(uint32_t reg) const {
return *reinterpret_cast<const T*>(&values[reg]);
T Get(uint32_t reg) const {
return xe::memory::Reinterpret<T>(values[reg]);
}
template <typename T>
T& Get(uint32_t reg) {
return *reinterpret_cast<T*>(&values[reg]);
T Get(Register reg) const {
return Get<T>(static_cast<uint32_t>(reg));
}
template <typename T>
const T& Get(Register reg) const {
return *reinterpret_cast<const T*>(&values[reg]);
T Get() const {
return Get<T>(T::register_index);
}
template <typename T>
T& Get(Register reg) {
return *reinterpret_cast<T*>(&values[reg]);
xenos::xe_gpu_vertex_fetch_t GetVertexFetch(uint32_t index) const {
assert_true(index < 96);
xenos::xe_gpu_vertex_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
}
template <typename T>
const T& Get() const {
return *reinterpret_cast<const T*>(&values[T::register_index]);
xenos::xe_gpu_texture_fetch_t GetTextureFetch(uint32_t index) const {
assert_true(index < 32);
xenos::xe_gpu_texture_fetch_t fetch;
std::memcpy(&fetch,
&values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(sizeof(fetch) / sizeof(uint32_t)) * index],
sizeof(fetch));
return fetch;
}
template <typename T>
T& Get() {
return *reinterpret_cast<T*>(&values[T::register_index]);
xenos::xe_gpu_memexport_stream_t GetMemExportStream(
uint32_t float_constant_index) const {
assert_true(float_constant_index < 512);
xenos::xe_gpu_memexport_stream_t stream;
std::memcpy(
&stream,
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * float_constant_index],
sizeof(stream));
return stream;
}
};

View file

@ -28,10 +28,7 @@ void ShaderInterpreter::Execute() {
state_.Reset();
const uint32_t* bool_constants =
&register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32;
const xenos::LoopConstant* loop_constants =
reinterpret_cast<const xenos::LoopConstant*>(
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32);
&register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031];
bool exec_ended = false;
uint32_t cf_index_next = 1;
@ -140,8 +137,8 @@ void ShaderInterpreter::Execute() {
cf_index_next = cf_loop_start.address();
continue;
}
xenos::LoopConstant loop_constant =
loop_constants[cf_loop_start.loop_id()];
auto loop_constant = register_file_.Get<xenos::LoopConstant>(
XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + cf_loop_start.loop_id());
state_.loop_constants[state_.loop_stack_depth] = loop_constant;
uint32_t& loop_iterator_ref =
state_.loop_iterators[state_.loop_stack_depth];
@ -170,8 +167,11 @@ void ShaderInterpreter::Execute() {
&cf_instr);
xenos::LoopConstant loop_constant =
state_.loop_constants[state_.loop_stack_depth - 1];
assert_true(loop_constant.value ==
loop_constants[cf_loop_end.loop_id()].value);
assert_zero(
std::memcmp(&loop_constant,
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 +
cf_loop_end.loop_id()],
sizeof(loop_constant)));
uint32_t loop_iterator =
++state_.loop_iterators[state_.loop_stack_depth - 1];
if (loop_iterator < loop_constant.count &&
@ -257,28 +257,31 @@ void ShaderInterpreter::Execute() {
}
}
const float* ShaderInterpreter::GetFloatConstant(
const std::array<float, 4> ShaderInterpreter::GetFloatConstant(
uint32_t address, bool is_relative, bool relative_address_is_a0) const {
static const float zero[4] = {};
int32_t index = int32_t(address);
if (is_relative) {
index += relative_address_is_a0 ? state_.address_register
: state_.GetLoopAddress();
}
if (index < 0) {
return zero;
return std::array<float, 4>();
}
auto base_and_size_minus_1 = register_file_.Get<reg::SQ_VS_CONST>(
shader_type_ == xenos::ShaderType::kVertex ? XE_GPU_REG_SQ_VS_CONST
: XE_GPU_REG_SQ_PS_CONST);
if (uint32_t(index) > base_and_size_minus_1.size) {
return zero;
return std::array<float, 4>();
}
index += base_and_size_minus_1.base;
if (index >= 512) {
return zero;
return std::array<float, 4>();
}
return &register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index].f32;
std::array<float, 4> value;
std::memcpy(value.data(),
&register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X + 4 * index],
sizeof(float) * 4);
return value;
}
void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
@ -297,6 +300,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
const float* vector_src_ptr;
uint32_t vector_src_register = instr.src_reg(1 + i);
bool vector_src_absolute = false;
std::array<float, 4> vector_src_float_constant;
if (instr.src_is_temp(1 + i)) {
vector_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(vector_src_register),
@ -304,9 +308,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
vector_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
vector_src_register);
} else {
vector_src_ptr = GetFloatConstant(
vector_src_float_constant = GetFloatConstant(
vector_src_register, instr.src_const_is_addressed(1 + i),
instr.is_const_address_register_relative());
vector_src_ptr = vector_src_float_constant.data();
}
uint32_t vector_src_absolute_mask =
~(uint32_t(vector_src_absolute) << 31);
@ -341,16 +346,18 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break;
case ucode::AluVectorOpcode::kMax: {
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] >= vector_operands[1][i]
? vector_operands[0][i]
: vector_operands[1][i];
vector_result[i] =
std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
? vector_operands[0][i]
: vector_operands[1][i];
}
} break;
case ucode::AluVectorOpcode::kMin: {
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] < vector_operands[1][i]
? vector_operands[0][i]
: vector_operands[1][i];
vector_result[i] =
std::isless(vector_operands[0][i], vector_operands[1][i])
? vector_operands[0][i]
: vector_operands[1][i];
}
} break;
case ucode::AluVectorOpcode::kSeq: {
@ -361,14 +368,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break;
case ucode::AluVectorOpcode::kSgt: {
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] =
float(vector_operands[0][i] > vector_operands[1][i]);
vector_result[i] = float(
std::isgreater(vector_operands[0][i], vector_operands[1][i]));
}
} break;
case ucode::AluVectorOpcode::kSge: {
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] =
float(vector_operands[0][i] >= vector_operands[1][i]);
vector_result[i] = float(std::isgreaterequal(vector_operands[0][i],
vector_operands[1][i]));
}
} break;
case ucode::AluVectorOpcode::kSne: {
@ -414,14 +421,14 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break;
case ucode::AluVectorOpcode::kCndGe: {
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] >= 0.0f
vector_result[i] = std::isgreaterequal(vector_operands[0][i], 0.0f)
? vector_operands[1][i]
: vector_operands[2][i];
}
} break;
case ucode::AluVectorOpcode::kCndGt: {
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] > 0.0f
vector_result[i] = std::isgreater(vector_operands[0][i], 0.0f)
? vector_operands[1][i]
: vector_operands[2][i];
}
@ -473,32 +480,38 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
float x_abs = std::abs(x), y_abs = std::abs(y), z_abs = std::abs(z);
// Result is T coordinate, S coordinate, 2 * major axis, face ID.
if (z_abs >= x_abs && z_abs >= y_abs) {
bool z_negative = std::isless(z, 0.0f);
vector_result[0] = -y;
vector_result[1] = z < 0.0f ? -x : x;
vector_result[1] = z_negative ? -x : x;
vector_result[2] = z;
vector_result[3] = z < 0.0f ? 5.0f : 4.0f;
vector_result[3] = z_negative ? 5.0f : 4.0f;
} else if (y_abs >= x_abs) {
vector_result[0] = y < 0.0f ? -z : z;
bool y_negative = std::isless(y, 0.0f);
vector_result[0] = y_negative ? -z : z;
vector_result[1] = x;
vector_result[2] = y;
vector_result[3] = y < 0.0f ? 3.0f : 2.0f;
vector_result[3] = y_negative ? 3.0f : 2.0f;
} else {
bool x_negative = std::isless(x, 0.0f);
vector_result[0] = -y;
vector_result[1] = x < 0.0f ? z : -z;
vector_result[1] = x_negative ? z : -z;
vector_result[2] = x;
vector_result[3] = x < 0.0f ? 1.0f : 0.0f;
vector_result[3] = x_negative ? 1.0f : 0.0f;
}
vector_result[2] *= 2.0f;
} break;
case ucode::AluVectorOpcode::kMax4: {
if (vector_operands[0][0] >= vector_operands[0][1] &&
vector_operands[0][0] >= vector_operands[0][2] &&
vector_operands[0][0] >= vector_operands[0][3]) {
if (std::isgreaterequal(vector_operands[0][0], vector_operands[0][1]) &&
std::isgreaterequal(vector_operands[0][0], vector_operands[0][2]) &&
std::isgreaterequal(vector_operands[0][0], vector_operands[0][3])) {
vector_result[0] = vector_operands[0][0];
} else if (vector_operands[0][1] >= vector_operands[0][2] &&
vector_operands[0][1] >= vector_operands[0][3]) {
} else if (std::isgreaterequal(vector_operands[0][1],
vector_operands[0][2]) &&
std::isgreaterequal(vector_operands[0][1],
vector_operands[0][3])) {
vector_result[0] = vector_operands[0][1];
} else if (vector_operands[0][2] >= vector_operands[0][3]) {
} else if (std::isgreaterequal(vector_operands[0][2],
vector_operands[0][3])) {
vector_result[0] = vector_operands[0][2];
} else {
vector_result[0] = vector_operands[0][3];
@ -524,21 +537,21 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
replicate_vector_result_x = true;
} break;
case ucode::AluVectorOpcode::kSetpGtPush: {
state_.predicate =
vector_operands[0][3] == 0.0f && vector_operands[1][3] > 0.0f;
vector_result[0] =
(vector_operands[0][0] == 0.0f && vector_operands[1][0] > 0.0f)
? 0.0f
: vector_operands[0][0] + 1.0f;
state_.predicate = vector_operands[0][3] == 0.0f &&
std::isgreater(vector_operands[1][3], 0.0f);
vector_result[0] = (vector_operands[0][0] == 0.0f &&
std::isgreater(vector_operands[1][0], 0.0f))
? 0.0f
: vector_operands[0][0] + 1.0f;
replicate_vector_result_x = true;
} break;
case ucode::AluVectorOpcode::kSetpGePush: {
state_.predicate =
vector_operands[0][3] == 0.0f && vector_operands[1][3] >= 0.0f;
vector_result[0] =
(vector_operands[0][0] == 0.0f && vector_operands[1][0] >= 0.0f)
? 0.0f
: vector_operands[0][0] + 1.0f;
state_.predicate = vector_operands[0][3] == 0.0f &&
std::isgreaterequal(vector_operands[1][3], 0.0f);
vector_result[0] = (vector_operands[0][0] == 0.0f &&
std::isgreaterequal(vector_operands[1][0], 0.0f))
? 0.0f
: vector_operands[0][0] + 1.0f;
replicate_vector_result_x = true;
} break;
// Not implementing pixel kill currently, the interpreter is currently
@ -552,19 +565,19 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
replicate_vector_result_x = true;
} break;
case ucode::AluVectorOpcode::kKillGt: {
vector_result[0] =
float(vector_operands[0][0] > vector_operands[1][0] ||
vector_operands[0][1] > vector_operands[1][1] ||
vector_operands[0][2] > vector_operands[1][2] ||
vector_operands[0][3] > vector_operands[1][3]);
vector_result[0] = float(
std::isgreater(vector_operands[0][0], vector_operands[1][0]) ||
std::isgreater(vector_operands[0][1], vector_operands[1][1]) ||
std::isgreater(vector_operands[0][2], vector_operands[1][2]) ||
std::isgreater(vector_operands[0][3], vector_operands[1][3]));
replicate_vector_result_x = true;
} break;
case ucode::AluVectorOpcode::kKillGe: {
vector_result[0] =
float(vector_operands[0][0] >= vector_operands[1][0] ||
vector_operands[0][1] >= vector_operands[1][1] ||
vector_operands[0][2] >= vector_operands[1][2] ||
vector_operands[0][3] >= vector_operands[1][3]);
vector_result[0] = float(
std::isgreaterequal(vector_operands[0][0], vector_operands[1][0]) ||
std::isgreaterequal(vector_operands[0][1], vector_operands[1][1]) ||
std::isgreaterequal(vector_operands[0][2], vector_operands[1][2]) ||
std::isgreaterequal(vector_operands[0][3], vector_operands[1][3]));
replicate_vector_result_x = true;
} break;
case ucode::AluVectorOpcode::kKillNe: {
@ -585,14 +598,13 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
vector_result[3] = vector_operands[1][3];
} break;
case ucode::AluVectorOpcode::kMaxA: {
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
// (-256.0f) is always the result.
state_.address_register = int32_t(std::floor(
std::min(255.0f, std::max(-256.0f, vector_operands[0][3])) + 0.5f));
xe::clamp_float(vector_operands[0][3], -256.0f, 255.0f) + 0.5f));
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = vector_operands[0][i] >= vector_operands[1][i]
? vector_operands[0][i]
: vector_operands[1][i];
vector_result[i] =
std::isgreaterequal(vector_operands[0][i], vector_operands[1][i])
? vector_operands[0][i]
: vector_operands[1][i];
}
} break;
default: {
@ -618,6 +630,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
// r#/c#.w or r#/c#.wx.
const float* scalar_src_ptr;
uint32_t scalar_src_register = instr.src_reg(3);
std::array<float, 4> scalar_src_float_constant;
if (instr.src_is_temp(3)) {
scalar_src_ptr = GetTempRegister(
ucode::AluInstruction::src_temp_reg(scalar_src_register),
@ -625,9 +638,10 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
scalar_src_absolute = ucode::AluInstruction::is_src_temp_value_absolute(
scalar_src_register);
} else {
scalar_src_ptr = GetFloatConstant(
scalar_src_float_constant = GetFloatConstant(
scalar_src_register, instr.src_const_is_addressed(3),
instr.is_const_address_register_relative());
scalar_src_ptr = scalar_src_float_constant.data();
}
uint32_t scalar_src_swizzle = instr.src_swizzle(3);
scalar_operand_component_count =
@ -695,7 +709,8 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
case ucode::AluScalarOpcode::kMulsPrev2: {
if (state_.previous_scalar == -FLT_MAX ||
!std::isfinite(state_.previous_scalar) ||
!std::isfinite(scalar_operands[1]) || scalar_operands[1] <= 0.0f) {
!std::isfinite(scalar_operands[1]) ||
std::islessequal(scalar_operands[1], 0.0f)) {
state_.previous_scalar = -FLT_MAX;
} else {
// Direct3D 9 behavior (0 or denormal * anything = +0).
@ -706,23 +721,26 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
}
} break;
case ucode::AluScalarOpcode::kMaxs: {
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
? scalar_operands[0]
: scalar_operands[1];
state_.previous_scalar =
std::isgreaterequal(scalar_operands[0], scalar_operands[1])
? scalar_operands[0]
: scalar_operands[1];
} break;
case ucode::AluScalarOpcode::kMins: {
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
? scalar_operands[0]
: scalar_operands[1];
state_.previous_scalar =
std::isless(scalar_operands[0], scalar_operands[1])
? scalar_operands[0]
: scalar_operands[1];
} break;
case ucode::AluScalarOpcode::kSeqs: {
state_.previous_scalar = float(scalar_operands[0] == 0.0f);
} break;
case ucode::AluScalarOpcode::kSgts: {
state_.previous_scalar = float(scalar_operands[0] > 0.0f);
state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
} break;
case ucode::AluScalarOpcode::kSges: {
state_.previous_scalar = float(scalar_operands[0] >= 0.0f);
state_.previous_scalar =
float(std::isgreaterequal(scalar_operands[0], 0.0f));
} break;
case ucode::AluScalarOpcode::kSnes: {
state_.previous_scalar = float(scalar_operands[0] != 0.0f);
@ -788,22 +806,20 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
state_.previous_scalar = 1.0f / std::sqrt(scalar_operands[0]);
} break;
case ucode::AluScalarOpcode::kMaxAs: {
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
// (-256.0f) is always the result.
state_.address_register = int32_t(std::floor(
std::min(255.0f, std::max(-256.0f, scalar_operands[0])) + 0.5f));
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
? scalar_operands[0]
: scalar_operands[1];
xe::clamp_float(scalar_operands[0], -256.0f, 255.0f) + 0.5f));
state_.previous_scalar =
std::isgreaterequal(scalar_operands[0], scalar_operands[1])
? scalar_operands[0]
: scalar_operands[1];
} break;
case ucode::AluScalarOpcode::kMaxAsf: {
// std::max is `a < b ? b : a`, thus in case of NaN, the first argument
// (-256.0f) is always the result.
state_.address_register = int32_t(
std::floor(std::min(255.0f, std::max(-256.0f, scalar_operands[0]))));
state_.previous_scalar = scalar_operands[0] >= scalar_operands[1]
? scalar_operands[0]
: scalar_operands[1];
std::floor(xe::clamp_float(scalar_operands[0], -256.0f, 255.0f)));
state_.previous_scalar =
std::isgreaterequal(scalar_operands[0], scalar_operands[1])
? scalar_operands[0]
: scalar_operands[1];
} break;
case ucode::AluScalarOpcode::kSubs:
case ucode::AluScalarOpcode::kSubsc0:
@ -822,11 +838,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
state_.previous_scalar = float(!state_.predicate);
} break;
case ucode::AluScalarOpcode::kSetpGt: {
state_.predicate = scalar_operands[0] > 0.0f;
state_.predicate = std::isgreater(scalar_operands[0], 0.0f);
state_.previous_scalar = float(!state_.predicate);
} break;
case ucode::AluScalarOpcode::kSetpGe: {
state_.predicate = scalar_operands[0] >= 0.0f;
state_.predicate = std::isgreaterequal(scalar_operands[0], 0.0f);
state_.previous_scalar = float(!state_.predicate);
} break;
case ucode::AluScalarOpcode::kSetpInv: {
@ -838,7 +854,7 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
} break;
case ucode::AluScalarOpcode::kSetpPop: {
float new_counter = scalar_operands[0] - 1.0f;
state_.predicate = new_counter <= 0.0f;
state_.predicate = std::islessequal(new_counter, 0.0f);
state_.previous_scalar = state_.predicate ? 0.0f : new_counter;
} break;
case ucode::AluScalarOpcode::kSetpClr: {
@ -855,10 +871,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
state_.previous_scalar = float(scalar_operands[0] == 0.0f);
} break;
case ucode::AluScalarOpcode::kKillsGt: {
state_.previous_scalar = float(scalar_operands[0] > 0.0f);
state_.previous_scalar = float(std::isgreater(scalar_operands[0], 0.0f));
} break;
case ucode::AluScalarOpcode::kKillsGe: {
state_.previous_scalar = float(scalar_operands[0] >= 0.0f);
state_.previous_scalar =
float(std::isgreaterequal(scalar_operands[0], 0.0f));
} break;
case ucode::AluScalarOpcode::kKillsNe: {
state_.previous_scalar = float(scalar_operands[0] != 0.0f);
@ -884,11 +901,11 @@ void ShaderInterpreter::ExecuteAluInstruction(ucode::AluInstruction instr) {
if (instr.vector_clamp()) {
for (uint32_t i = 0; i < 4; ++i) {
vector_result[i] = xe::saturate_unsigned(vector_result[i]);
vector_result[i] = xe::saturate(vector_result[i]);
}
}
float scalar_result = instr.scalar_clamp()
? xe::saturate_unsigned(state_.previous_scalar)
? xe::saturate(state_.previous_scalar)
: state_.previous_scalar;
uint32_t scalar_result_write_mask = instr.GetScalarOpResultWriteMask();
@ -984,10 +1001,8 @@ void ShaderInterpreter::ExecuteVertexFetchInstruction(
state_.vfetch_full_last = instr;
}
xenos::xe_gpu_vertex_fetch_t fetch_constant =
*reinterpret_cast<const xenos::xe_gpu_vertex_fetch_t*>(
&register_file_[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
state_.vfetch_full_last.fetch_constant_index()]);
xenos::xe_gpu_vertex_fetch_t fetch_constant = register_file_.GetVertexFetch(
state_.vfetch_full_last.fetch_constant_index());
if (!instr.is_mini_fetch()) {
// Get the part of the address that depends on vfetch_full data.

View file

@ -11,6 +11,7 @@
#define XENIA_GPU_SHADER_INTERPRETER_H_
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
@ -120,8 +121,8 @@ class ShaderInterpreter {
float* GetTempRegister(uint32_t address, bool is_relative) {
return temp_registers_[GetTempRegisterIndex(address, is_relative)];
}
const float* GetFloatConstant(uint32_t address, bool is_relative,
bool relative_address_is_a0) const;
const std::array<float, 4> GetFloatConstant(
uint32_t address, bool is_relative, bool relative_address_is_a0) const;
void ExecuteAluInstruction(ucode::AluInstruction instr);
void StoreFetchResult(uint32_t dest, bool is_dest_relative, uint32_t swizzle,

View file

@ -333,8 +333,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
uint32_t index_bit = UINT32_C(1) << index;
textures_remaining &= ~index_bit;
TextureBinding& binding = texture_bindings_[index];
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6);
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(index);
TextureKey old_key = binding.key;
uint8_t old_swizzled_signs = binding.swizzled_signs;
BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzled_signs);

View file

@ -19,6 +19,7 @@
#include "xenia/base/filesystem.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/platform.h"
#include "xenia/base/string.h"
#include "xenia/base/system.h"
@ -357,9 +358,10 @@ void TraceViewer::DrawPacketDisassemblerUI() {
ImGui::NextColumn();
if (!register_info ||
register_info->type == RegisterInfo::Type::kDword) {
ImGui::Text("%.8X", action.register_write.value.u32);
ImGui::Text("%.8X", action.register_write.value);
} else {
ImGui::Text("%8f", action.register_write.value.f32);
ImGui::Text("%8f", xe::memory::Reinterpret<float>(
action.register_write.value));
}
ImGui::Columns(1);
break;
@ -709,10 +711,8 @@ void TraceViewer::DrawTextureInfo(
const Shader::TextureBinding& texture_binding) {
auto& regs = *graphics_system_->register_file();
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
texture_binding.fetch_constant * 6;
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(&regs.values[r]);
auto& fetch = group->texture_fetch;
xenos::xe_gpu_texture_fetch_t fetch =
regs.GetTextureFetch(texture_binding.fetch_constant);
if (fetch.type != xenos::FetchConstantType::kTexture &&
(!cvars::gpu_allow_invalid_fetch_constants ||
fetch.type != xenos::FetchConstantType::kInvalidTexture)) {
@ -780,9 +780,9 @@ void TraceViewer::DrawFailedTextureInfo(
void TraceViewer::DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding,
const xe_gpu_vertex_fetch_t* fetch) {
const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2);
uint32_t vertex_count = fetch->size / vertex_binding.stride_words;
const xe_gpu_vertex_fetch_t& fetch) {
const uint8_t* addr = memory_->TranslatePhysical(fetch.address << 2);
uint32_t vertex_count = fetch.size / vertex_binding.stride_words;
int column_count = 0;
for (const auto& attrib : vertex_binding.attributes) {
switch (attrib.fetch_instr.attributes.data_format) {
@ -883,7 +883,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
#define LOADEL(type, wo) \
GpuSwap(xe::load<type>(vstart + \
(attrib.fetch_instr.attributes.offset + wo) * 4), \
fetch->endian)
fetch.endian)
switch (attrib.fetch_instr.attributes.data_format) {
case xenos::VertexFormat::k_32:
ImGui::Text("%.8X", LOADEL(uint32_t, 0));
@ -1066,7 +1066,7 @@ void ProgressBar(float frac, float width, float height = 0,
if (height == 0) {
height = ImGui::GetTextLineHeightWithSpacing();
}
frac = xe::saturate_unsigned(frac);
frac = xe::saturate(frac);
const auto fontAtlas = ImGui::GetIO().Fonts;
@ -1187,7 +1187,7 @@ void TraceViewer::DrawStateUI() {
}
auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
const char* mode_name = "Unknown";
switch (enable_mode) {
@ -1210,7 +1210,7 @@ void TraceViewer::DrawStateUI() {
break;
}
case ModeControl::kCopy: {
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(),
copy_dest_base);
break;
@ -1221,9 +1221,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Viewport State:");
if (true) {
ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if ((pa_su_sc_mode_cntl >> 16) & 1) {
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET];
int16_t window_offset_x = window_offset & 0x7FFF;
int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
if (window_offset_x & 0x4000) {
@ -1237,8 +1237,8 @@ void TraceViewer::DrawStateUI() {
} else {
ImGui::BulletText("Window Offset: disabled");
}
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL];
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR];
ImGui::BulletText(
"Window Scissor: %d,%d to %d,%d (%d x %d)", window_scissor_tl & 0x7FFF,
(window_scissor_tl >> 16) & 0x7FFF, window_scissor_br & 0x7FFF,
@ -1246,7 +1246,7 @@ void TraceViewer::DrawStateUI() {
(window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF),
((window_scissor_br >> 16) & 0x7FFF) -
((window_scissor_tl >> 16) & 0x7FFF));
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_hiz = (surface_info >> 18) & 0x3FFF;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = (surface_info >> 16) & 0x3;
@ -1258,7 +1258,7 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Surface Pitch: %d", surface_pitch);
ImGui::BulletText("Surface HI-Z Pitch: %d", surface_hiz);
ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL];
bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0;
bool vport_yscale_enable = (vte_control & (1 << 2)) > 0;
@ -1273,14 +1273,20 @@ void TraceViewer::DrawStateUI() {
}
ImGui::BulletText(
"Viewport Offset: %f, %f, %f",
vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0,
vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0,
vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0);
vport_xoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XOFFSET)
: 0.0f,
vport_yoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YOFFSET)
: 0.0f,
vport_zoffset_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZOFFSET)
: 0.0f);
ImGui::BulletText(
"Viewport Scale: %f, %f, %f",
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1,
vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1,
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1);
vport_xscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_XSCALE)
: 1.0f,
vport_yscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_YSCALE)
: 1.0f,
vport_zscale_enable ? regs.Get<float>(XE_GPU_REG_PA_CL_VPORT_ZSCALE)
: 1.0f);
if (!vport_xscale_enable) {
ImGui::PopStyleColor();
}
@ -1290,7 +1296,7 @@ void TraceViewer::DrawStateUI() {
((vte_control >> 8) & 0x1) ? "y/w0" : "y",
((vte_control >> 9) & 0x1) ? "z/w0" : "z",
((vte_control >> 10) & 0x1) ? "w0" : "1/w0");
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL];
bool clip_enabled = ((clip_control >> 17) & 0x1) == 0;
bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1;
ImGui::BulletText("Clip Enabled: %s, DX Clip: %s",
@ -1302,11 +1308,9 @@ void TraceViewer::DrawStateUI() {
ImGui::BulletText("Rasterizer State:");
if (true) {
ImGui::TreePush((const void*)0);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
uint32_t pa_sc_screen_scissor_tl =
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32;
uint32_t pa_sc_screen_scissor_br =
regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
uint32_t pa_sc_screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL];
uint32_t pa_sc_screen_scissor_br = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR];
if (pa_sc_screen_scissor_tl != 0 && pa_sc_screen_scissor_br != 0x20002000) {
int32_t screen_scissor_x = pa_sc_screen_scissor_tl & 0x7FFF;
int32_t screen_scissor_y = (pa_sc_screen_scissor_tl >> 16) & 0x7FFF;
@ -1361,7 +1365,7 @@ void TraceViewer::DrawStateUI() {
}
ImGui::Columns(1);
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
auto rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO];
uint32_t surface_pitch = rb_surface_info & 0x3FFF;
auto surface_msaa =
static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3);
@ -1370,39 +1374,39 @@ void TraceViewer::DrawStateUI() {
if (enable_mode != ModeControl::kDepth) {
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL];
if ((color_control & 0x8) != 0) {
ImGui::BulletText("Alpha Test: %s %.2f",
kCompareFuncNames[color_control & 0x7],
regs[XE_GPU_REG_RB_ALPHA_REF].f32);
regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF));
} else {
ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
ImGui::BulletText("Alpha Test: disabled");
ImGui::PopStyleColor();
}
auto blend_color = ImVec4(regs[XE_GPU_REG_RB_BLEND_RED].f32,
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32);
auto blend_color = ImVec4(regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA));
ImGui::BulletText("Blend Color: (%.2f,%.2f,%.2f,%.2f)", blend_color.x,
blend_color.y, blend_color.z, blend_color.w);
ImGui::SameLine();
// TODO small_height (was true) parameter was removed
ImGui::ColorButton(nullptr, blend_color);
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK];
uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32,
regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
regs[XE_GPU_REG_RB_COLOR_INFO],
regs[XE_GPU_REG_RB_COLOR1_INFO],
regs[XE_GPU_REG_RB_COLOR2_INFO],
regs[XE_GPU_REG_RB_COLOR3_INFO],
};
uint32_t rb_blendcontrol[4] = {
regs[XE_GPU_REG_RB_BLENDCONTROL0].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL1].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL2].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL3].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL0],
regs[XE_GPU_REG_RB_BLENDCONTROL1],
regs[XE_GPU_REG_RB_BLENDCONTROL2],
regs[XE_GPU_REG_RB_BLENDCONTROL3],
};
ImGui::Columns(2);
for (int i = 0; i < xe::countof(color_info); ++i) {
@ -1511,9 +1515,9 @@ void TraceViewer::DrawStateUI() {
}
if (ImGui::CollapsingHeader("Depth/Stencil Target")) {
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
auto rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL];
auto rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK];
auto rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO];
bool uses_depth =
(rb_depthcontrol & 0x00000002) || (rb_depthcontrol & 0x00000004);
uint32_t stencil_ref = (rb_stencilrefmask & 0xFF);
@ -1697,10 +1701,9 @@ void TraceViewer::DrawStateUI() {
draw_info.index_buffer_size,
kIndexFormatNames[int(draw_info.index_format)],
kEndiannessNames[int(draw_info.index_endianness)]);
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL];
if (pa_su_sc_mode_cntl & (1 << 21)) {
uint32_t reset_index =
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX];
if (draw_info.index_format == xenos::IndexFormat::kInt16) {
ImGui::Text("Reset Index: %.4X", reset_index & 0xFFFF);
} else {
@ -1760,30 +1763,16 @@ void TraceViewer::DrawStateUI() {
auto shader = command_processor->active_vertex_shader();
if (shader) {
for (const auto& vertex_binding : shader->vertex_bindings()) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(vertex_binding.fetch_constant / 3) * 6;
const auto group =
reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
const xe_gpu_vertex_fetch_t* fetch = nullptr;
switch (vertex_binding.fetch_constant % 3) {
case 0:
fetch = &group->vertex_fetch_0;
break;
case 1:
fetch = &group->vertex_fetch_1;
break;
case 2:
fetch = &group->vertex_fetch_2;
break;
}
assert_true(fetch->endian == xenos::Endian::k8in32);
xe_gpu_vertex_fetch_t fetch =
regs.GetVertexFetch(vertex_binding.fetch_constant);
assert_true(fetch.endian == xenos::Endian::k8in32);
char tree_root_id[32];
sprintf(tree_root_id, "#vertices_root_%d",
vertex_binding.fetch_constant);
if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
vertex_binding.fetch_constant, fetch->address << 2,
fetch->size * 4,
kEndiannessNames[int(fetch->endian)])) {
vertex_binding.fetch_constant, fetch.address << 2,
fetch.size * 4,
kEndiannessNames[int(fetch.endian)])) {
ImGui::BeginChild("#vertices", ImVec2(0, 300));
DrawVertexFetcher(shader, vertex_binding, fetch);
ImGui::EndChild();
@ -1831,7 +1820,7 @@ void TraceViewer::DrawStateUI() {
ImGui::Text("f%02d_%d", (i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6,
(i - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) % 6);
ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32);
ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn();
}
ImGui::Columns(1);
@ -1842,8 +1831,9 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_511_X; i += 4) {
ImGui::Text("c%d", (i - XE_GPU_REG_SHADER_CONSTANT_000_X) / 4);
ImGui::NextColumn();
ImGui::Text("%f, %f, %f, %f", regs[i + 0].f32, regs[i + 1].f32,
regs[i + 2].f32, regs[i + 3].f32);
ImGui::Text("%f, %f, %f, %f", regs.Get<float>(i + 0),
regs.Get<float>(i + 1), regs.Get<float>(i + 2),
regs.Get<float>(i + 3));
ImGui::NextColumn();
}
ImGui::Columns(1);
@ -1856,7 +1846,7 @@ void TraceViewer::DrawStateUI() {
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32,
(i - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031) * 32 + 31);
ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32);
ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn();
}
ImGui::Columns(1);
@ -1867,7 +1857,7 @@ void TraceViewer::DrawStateUI() {
i <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31; ++i) {
ImGui::Text("l%d", i - XE_GPU_REG_SHADER_CONSTANT_LOOP_00);
ImGui::NextColumn();
ImGui::Text("%.8X", regs[i].u32);
ImGui::Text("%.8X", regs[i]);
ImGui::NextColumn();
}
ImGui::Columns(1);

View file

@ -122,7 +122,7 @@ class TraceViewer : public xe::ui::WindowedApp {
void DrawVertexFetcher(Shader* shader,
const Shader::VertexBinding& vertex_binding,
const xenos::xe_gpu_vertex_fetch_t* fetch);
const xenos::xe_gpu_vertex_fetch_t& fetch);
TraceViewerWindowListener window_listener_;

View file

@ -2486,8 +2486,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
(uint64_t(1) << (vfetch_index & 63))) {
continue;
}
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
xenos::xe_gpu_vertex_fetch_t vfetch_constant =
regs.GetVertexFetch(vfetch_index);
switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex:
break;
@ -3285,10 +3285,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
// Blend constants.
float blend_constants[] = {
regs[XE_GPU_REG_RB_BLEND_RED].f32,
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED),
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN),
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE),
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA),
};
dynamic_blend_constants_update_needed_ |=
std::memcmp(dynamic_blend_constants_, blend_constants,
@ -3434,7 +3434,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
const RegisterFile& regs = *register_file_;
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
auto rb_alpha_ref = regs.Get<float>(XE_GPU_REG_RB_ALPHA_REF);
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
@ -3442,7 +3442,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
auto vgt_indx_offset = regs.Get<int32_t>(XE_GPU_REG_VGT_INDX_OFFSET);
bool edram_fragment_shader_interlock =
render_target_cache_->GetPath() ==
@ -3755,7 +3755,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
system_constants_.edram_rt_format_flags[i] = format_flags;
uint32_t blend_factors_ops =
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]] & 0x1FFF1FFF;
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
blend_factors_ops;
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
@ -3784,22 +3784,22 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
if (primitive_polygonal) {
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
}
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
poly_offset_back_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE);
poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET);
}
} else {
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE);
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
regs.Get<float>(XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET);
poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset;
}
@ -3862,21 +3862,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
}
dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
system_constants_.edram_blend_constant[0] =
regs[XE_GPU_REG_RB_BLEND_RED].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_RED);
dirty |= system_constants_.edram_blend_constant[1] !=
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
system_constants_.edram_blend_constant[1] =
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_GREEN);
dirty |= system_constants_.edram_blend_constant[2] !=
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
system_constants_.edram_blend_constant[2] =
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_BLUE);
dirty |= system_constants_.edram_blend_constant[3] !=
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
system_constants_.edram_blend_constant[3] =
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
regs.Get<float>(XE_GPU_REG_RB_BLEND_ALPHA);
}
if (dirty) {
@ -3903,10 +3903,10 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_VS_CONST] == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST] == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST] == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST] == 0x00000000);
// Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map();
@ -4001,8 +4001,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
sizeof(float) * 4);
mapping += sizeof(float) * 4;
}
@ -4033,8 +4032,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)]
.f32,
(float_constant_index << 2)],
sizeof(float) * 4);
mapping += sizeof(float) * 4;
}
@ -4055,7 +4053,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false;
}
buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
kBoolLoopConstantsSize);
current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop;
@ -4073,7 +4071,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
return false;
}
buffer_info.range = VkDeviceSize(kFetchConstantsSize);
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
std::memcpy(mapping, &regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0],
kFetchConstantsSize);
current_constant_buffers_up_to_date_ |=
UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch;

View file

@ -718,7 +718,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
[common_blend_rt_index]),
(((normalized_color_mask &
~(uint32_t(0b1111) << (4 * common_blend_rt_index)))
? regs[XE_GPU_REG_RB_COLOR_MASK].u32
? regs[XE_GPU_REG_RB_COLOR_MASK]
: normalized_color_mask) >>
(4 * common_blend_rt_index)) &
0b1111,

View file

@ -612,8 +612,8 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView(
VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters(
const VulkanShader::SamplerBinding& binding) const {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6);
xenos::xe_gpu_texture_fetch_t fetch =
regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters;
@ -875,8 +875,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
uint32_t& width_scaled_out, uint32_t& height_scaled_out,
xenos::TextureFormat& format_out) {
const auto& regs = register_file();
const auto& fetch = regs.Get<xenos::xe_gpu_texture_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0);
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 ||

View file

@ -12,6 +12,7 @@
#include <cmath>
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
namespace xe {
namespace gpu {
@ -26,7 +27,7 @@ namespace xenos {
float PWLGammaToLinear(float gamma) {
// Not found in game executables, so just using the logic similar to that in
// the Source Engine.
gamma = xe::saturate_unsigned(gamma);
gamma = xe::saturate(gamma);
float scale, offset;
// While the compiled code for linear to gamma conversion uses `vcmpgtfp
// constant, value` comparison (constant > value, or value < constant), it's
@ -67,7 +68,7 @@ float PWLGammaToLinear(float gamma) {
}
float LinearToPWLGamma(float linear) {
linear = xe::saturate_unsigned(linear);
linear = xe::saturate(linear);
float scale, offset;
// While the compiled code uses `vcmpgtfp constant, value` comparison
// (constant > value, or value < constant), it's preferable to use `value >=
@ -118,8 +119,8 @@ float Float7e3To32(uint32_t f10) {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0x7F;
}
uint32_t f32 = ((exponent + 124) << 23) | (mantissa << 3);
return *reinterpret_cast<const float*>(&f32);
return xe::memory::Reinterpret<float>(
uint32_t(((exponent + 124) << 23) | (mantissa << 3)));
}
// Based on CFloat24 from d3dref9.dll and the 6e4 code from:
@ -131,7 +132,7 @@ uint32_t Float32To20e4(float f32, bool round_to_nearest_even) {
// Positive only, and not -0 or NaN.
return 0;
}
uint32_t f32u32 = *reinterpret_cast<const uint32_t*>(&f32);
auto f32u32 = xe::memory::Reinterpret<uint32_t>(f32);
if (f32u32 >= 0x3FFFFFF8) {
// Saturate.
return 0xFFFFFF;
@ -165,8 +166,8 @@ float Float20e4To32(uint32_t f24) {
exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFF;
}
uint32_t f32 = ((exponent + 112) << 23) | (mantissa << 3);
return *reinterpret_cast<const float*>(&f32);
return xe::memory::Reinterpret<float>(
uint32_t(((exponent + 112) << 23) | (mantissa << 3)));
}
const char* GetColorRenderTargetFormatName(ColorRenderTargetFormat format) {

View file

@ -12,6 +12,7 @@
#include <algorithm>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/ui/graphics_util.h"
#include "xenia/ui/presenter.h"
@ -67,24 +68,19 @@ bool ImmediateDrawer::ScissorToRenderTarget(const ImmediateDraw& immediate_draw,
}
float render_target_width_float = float(render_target_width);
float render_target_height_float = float(render_target_height);
// Scale to render target coordinates, drop NaNs (by doing
// std::max(0.0f, variable) in this argument order), and clamp to the render
// Scale to render target coordinates, drop NaNs, and clamp to the render
// target size, below which the values are representable as 16p8 fixed-point.
float scale_x = render_target_width / coordinate_space_width();
float scale_y = render_target_height / coordinate_space_height();
float x0_float =
std::min(render_target_width_float,
std::max(0.0f, immediate_draw.scissor_left * scale_x));
float y0_float =
std::min(render_target_height_float,
std::max(0.0f, immediate_draw.scissor_top * scale_y));
float x0_float = xe::clamp_float(immediate_draw.scissor_left * scale_x, 0.0f,
render_target_width_float);
float y0_float = xe::clamp_float(immediate_draw.scissor_top * scale_y, 0.0f,
render_target_height_float);
// Also make sure the size is non-negative.
float x1_float =
std::min(render_target_width_float,
std::max(x0_float, immediate_draw.scissor_right * scale_x));
float y1_float =
std::min(render_target_height_float,
std::max(y0_float, immediate_draw.scissor_bottom * scale_y));
float x1_float = xe::clamp_float(immediate_draw.scissor_right * scale_x,
x0_float, render_target_width_float);
float y1_float = xe::clamp_float(immediate_draw.scissor_bottom * scale_y,
y0_float, render_target_height_float);
// Top-left - include .5 (0.128 treated as 0 covered, 0.129 as 0 not covered).
int32_t x0 = (FloatToD3D11Fixed16p8(x0_float) + 127) >> 8;
int32_t y0 = (FloatToD3D11Fixed16p8(y0_float) + 127) >> 8;

View file

@ -153,16 +153,16 @@ bool AndroidWindow::OnActivitySurfaceMotionEvent(jobject event) {
// with out-of-bounds coordinates), when moving the mouse outside the
// View, or when starting moving the mouse when the pointer was previously
// outside the View in some cases.
int32_t mouse_x = int32_t(
std::min(float(GetActualPhysicalWidth()),
std::max(0.0f, jni_env->CallFloatMethod(
event, jni_ids.motion_event_get_x, 0))) +
0.5f);
int32_t mouse_y = int32_t(
std::min(float(GetActualPhysicalHeight()),
std::max(0.0f, jni_env->CallFloatMethod(
event, jni_ids.motion_event_get_y, 0))) +
0.5f);
int32_t mouse_x =
int32_t(xe::clamp_float(jni_env->CallFloatMethod(
event, jni_ids.motion_event_get_x, 0),
0.0f, float(GetActualPhysicalWidth())) +
0.5f);
int32_t mouse_y =
int32_t(xe::clamp_float(jni_env->CallFloatMethod(
event, jni_ids.motion_event_get_y, 0),
0.0f, float(GetActualPhysicalHeight())) +
0.5f);
static const MouseEvent::Button kMouseEventButtons[] = {
MouseEvent::Button::kLeft, MouseEvent::Button::kRight,
MouseEvent::Button::kMiddle, MouseEvent::Button::kX1,

View file

@ -350,6 +350,153 @@ void GTKWindow::EndBatchedSizeUpdate(
}
}
VirtualKey GTKWindow::TranslateVirtualKey(guint keyval) {
switch (keyval) {
case GDK_KEY_a:
return VirtualKey::kA;
case GDK_KEY_A:
return VirtualKey::kA;
case GDK_KEY_b:
return VirtualKey::kB;
case GDK_KEY_B:
return VirtualKey::kB;
case GDK_KEY_c:
return VirtualKey::kC;
case GDK_KEY_C:
return VirtualKey::kC;
case GDK_KEY_d:
return VirtualKey::kD;
case GDK_KEY_D:
return VirtualKey::kD;
case GDK_KEY_e:
return VirtualKey::kE;
case GDK_KEY_E:
return VirtualKey::kE;
case GDK_KEY_f:
return VirtualKey::kF;
case GDK_KEY_F:
return VirtualKey::kF;
case GDK_KEY_g:
return VirtualKey::kG;
case GDK_KEY_G:
return VirtualKey::kG;
case GDK_KEY_h:
return VirtualKey::kH;
case GDK_KEY_H:
return VirtualKey::kH;
case GDK_KEY_i:
return VirtualKey::kI;
case GDK_KEY_I:
return VirtualKey::kI;
case GDK_KEY_j:
return VirtualKey::kJ;
case GDK_KEY_J:
return VirtualKey::kJ;
case GDK_KEY_k:
return VirtualKey::kK;
case GDK_KEY_K:
return VirtualKey::kK;
case GDK_KEY_l:
return VirtualKey::kL;
case GDK_KEY_L:
return VirtualKey::kL;
case GDK_KEY_m:
return VirtualKey::kM;
case GDK_KEY_M:
return VirtualKey::kM;
case GDK_KEY_n:
return VirtualKey::kN;
case GDK_KEY_N:
return VirtualKey::kN;
case GDK_KEY_o:
return VirtualKey::kO;
case GDK_KEY_O:
return VirtualKey::kO;
case GDK_KEY_p:
return VirtualKey::kP;
case GDK_KEY_P:
return VirtualKey::kP;
case GDK_KEY_q:
return VirtualKey::kQ;
case GDK_KEY_Q:
return VirtualKey::kQ;
case GDK_KEY_r:
return VirtualKey::kR;
case GDK_KEY_R:
return VirtualKey::kR;
case GDK_KEY_s:
return VirtualKey::kS;
case GDK_KEY_S:
return VirtualKey::kS;
case GDK_KEY_t:
return VirtualKey::kT;
case GDK_KEY_T:
return VirtualKey::kT;
case GDK_KEY_u:
return VirtualKey::kU;
case GDK_KEY_U:
return VirtualKey::kU;
case GDK_KEY_v:
return VirtualKey::kV;
case GDK_KEY_V:
return VirtualKey::kV;
case GDK_KEY_w:
return VirtualKey::kW;
case GDK_KEY_W:
return VirtualKey::kW;
case GDK_KEY_x:
return VirtualKey::kX;
case GDK_KEY_X:
return VirtualKey::kX;
case GDK_KEY_y:
return VirtualKey::kY;
case GDK_KEY_Y:
return VirtualKey::kY;
case GDK_KEY_z:
return VirtualKey::kZ;
case GDK_KEY_Z:
return VirtualKey::kZ;
case GDK_KEY_0:
return VirtualKey::k0;
case GDK_KEY_1:
return VirtualKey::k1;
case GDK_KEY_2:
return VirtualKey::k2;
case GDK_KEY_3:
return VirtualKey::k3;
case GDK_KEY_4:
return VirtualKey::k4;
case GDK_KEY_5:
return VirtualKey::k5;
case GDK_KEY_6:
return VirtualKey::k6;
case GDK_KEY_7:
return VirtualKey::k7;
case GDK_KEY_8:
return VirtualKey::k8;
case GDK_KEY_9:
return VirtualKey::k9;
case GDK_KEY_semicolon:
return VirtualKey::kOem1;
case GDK_KEY_apostrophe:
return VirtualKey::kOem7;
case GDK_KEY_comma:
return VirtualKey::kOemComma;
case GDK_KEY_period:
return VirtualKey::kOemPeriod;
case GDK_KEY_Up:
return VirtualKey::kUp;
case GDK_KEY_Down:
return VirtualKey::kDown;
case GDK_KEY_Left:
return VirtualKey::kLeft;
case GDK_KEY_Right:
return VirtualKey::kRight;
default:
return VirtualKey(keyval);
}
}
bool GTKWindow::HandleMouse(GdkEvent* event,
WindowDestructionReceiver& destruction_receiver) {
MouseEvent::Button button = MouseEvent::Button::kNone;
@ -433,7 +580,7 @@ bool GTKWindow::HandleKeyboard(
bool super_pressed = modifiers & GDK_SUPER_MASK;
uint32_t key_char = gdk_keyval_to_unicode(event->keyval);
// TODO(Triang3l): event->hardware_keycode to VirtualKey translation.
KeyEvent e(this, VirtualKey(event->hardware_keycode), 1,
KeyEvent e(this, TranslateVirtualKey(event->keyval), 1,
event->type == GDK_KEY_RELEASE, shift_pressed, ctrl_pressed,
alt_pressed, super_pressed);
switch (event->type) {

View file

@ -59,6 +59,10 @@ class GTKWindow : public Window {
void BeginBatchedSizeUpdate();
void EndBatchedSizeUpdate(WindowDestructionReceiver& destruction_receiver);
//Translates a gtk virtual key to xenia ui::VirtualKey
VirtualKey TranslateVirtualKey(guint keyval);
// Handling events related to the whole window.
bool HandleMouse(GdkEvent* event,
WindowDestructionReceiver& destruction_receiver);