[Code] Remove game names from code comments (most of at least)

This commit is contained in:
Triang3l 2021-09-05 21:03:05 +03:00
parent 6986d6c7e8
commit e720e0a540
48 changed files with 246 additions and 245 deletions

View file

@ -811,12 +811,13 @@ void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel,
// Loop through every sample, convert and drop it into the output array.
// If more than one channel, we need to interleave the samples from each
// channel next to each other. Always saturate because FFmpeg output is
// not limited to [-1, 1] (for example 1.095 as seen in RDR)
// not limited to [-1, 1] (for example 1.095 as seen in 5454082B).
constexpr float scale = (1 << 15) - 1;
auto out = reinterpret_cast<int16_t*>(output_buffer);
// For testing of vectorized versions, stereo audio is common in Halo 3, since
// the first menu frame; the intro cutscene also has more than 2 channels.
// For testing of vectorized versions, stereo audio is common in 4D5307E6,
// since the first menu frame; the intro cutscene also has more than 2
// channels.
#if XE_ARCH_AMD64
static_assert(kSamplesPerFrame % 8 == 0);
const auto in_channel_0 = reinterpret_cast<const float*>(samples[0]);

View file

@ -1862,8 +1862,8 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
src = i.src1;
}
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
// are valid - max before min to pack NaN as zero (Red Dead Redemption is
// heavily affected by the order - packs 0xFFFFFFFF in matrix code to get 0
// are valid - max before min to pack NaN as zero (5454082B is heavily
// affected by the order - packs 0xFFFFFFFF in matrix code to get a 0
// constant).
e.vmaxps(i.dest, src, e.GetXmmConstPtr(XMM3333));
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));

View file

@ -2069,7 +2069,8 @@ int InstrEmit_vpkd3d128(PPCHIRBuilder& f, const InstrData& i) {
v = f.Pack(v, PACK_TYPE_FLOAT16_4);
break;
case 6: // VPACK_NORMPACKED64 4_20_20_20 w_z_y_x
// Used in 2K games like NBA 2K9, pretty rarely in general.
// Used in 54540829 and other installments in the series, pretty rarely in
// general.
v = f.Pack(v, PACK_TYPE_ULONG_4202020);
break;
default:

View file

@ -738,7 +738,7 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) {
break;
}
case PM4_WAIT_FOR_IDLE: {
// This opcode is used by "Duke Nukem Forever" while going/being ingame
// This opcode is used by 5454084E while going / being ingame.
assert_true(count == 1);
uint32_t value = reader->ReadAndSwap<uint32_t>();
XELOGGPU("GPU wait for idle = {:08X}", value);
@ -1168,7 +1168,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
// and used to detect a finished query.
bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
pSampleCounts->ZPass_B == kQueryFinished;
// Older versions of D3D also checks for ZFail (First Gears of War)
// Older versions of D3D also checks for ZFail (4D5307D5).
bool is_end_via_z_fail = pSampleCounts->ZFail_A == kQueryFinished &&
pSampleCounts->ZFail_B == kQueryFinished;
std::memset(pSampleCounts, 0, sizeof(xe_gpu_depth_sample_counts));

View file

@ -1662,7 +1662,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
for (uint32_t i = 0; i < 256; ++i) {
uint32_t value = gamma_ramp_.normal[i].value;
// Swap red and blue (Project Sylpheed has settings allowing separate
// Swap red and blue (535107D4 has settings allowing separate
// configuration).
mapping[i] = ((value & 1023) << 20) | (value & (1023 << 10)) |
((value >> 20) & 1023);
@ -2076,7 +2076,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
memexport_stream.index_count * memexport_format_size;
// Try to reduce the number of shared memory operations when writing
// different elements into the same buffer through different exports
// (happens in Halo 3).
// (happens in 4D5307E6).
bool memexport_range_reused = false;
for (uint32_t i = 0; i < memexport_range_count; ++i) {
MemExportRange& memexport_range = memexport_ranges[i];
@ -2878,8 +2878,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Get the color info register values for each render target. Also, for ROV,
// exclude components that don't exist in the format from the write mask.
// Don't exclude fully overlapping render targets, however - two render
// targets with the same base address are used in the lighting pass of Halo 3,
// for example, with the needed one picked with dynamic control flow.
// targets with the same base address are used in the lighting pass of
// 4D5307E6, for example, with the needed one picked with dynamic control
// flow.
reg::RB_COLOR_INFO color_infos[4];
float rt_clamp[4][4];
uint32_t rt_keep_masks[4][2];
@ -2898,8 +2899,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
}
// Disable depth and stencil if it aliases a color render target (for
// instance, during the XBLA logo in Banjo-Kazooie, though depth writing is
// already disabled there).
// instance, during the XBLA logo in 58410954, though depth writing is already
// disabled there).
bool depth_stencil_enabled =
rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
if (edram_rov_used && depth_stencil_enabled) {

View file

@ -83,9 +83,9 @@ class D3D12CommandProcessor : public CommandProcessor {
// Gets the current color write mask, taking the pixel shader's write mask
// into account. If a shader doesn't write to a render target, it shouldn't be
// written to and it shouldn't be even bound - otherwise, in Halo 3, one
// written to and it shouldn't be even bound - otherwise, in 4D5307E6, one
// render target is being destroyed by a shader not writing anything, and in
// Banjo-Tooie, the result of clearing the top tile is being ignored because
// 58410955, the result of clearing the top tile is being ignored because
// there are 4 render targets bound with the same EDRAM base (clearly not
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
// conflict with each other.

View file

@ -3619,7 +3619,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
case xenos::DepthRenderTargetFormat::kD24S8:
// Round to the nearest even integer. This seems to be the correct,
// adding +0.5 and rounding towards zero results in red instead of
// black in GTA IV and Halo 3 clear shaders.
// black in the 4D5307E6 clear shader.
a.OpMul(dxbc::Dest::R(i, 0b1000), dxbc::Src::R(i, dxbc::Src::kWWWW),
dxbc::Src::LF(float(0xFFFFFF)));
a.OpRoundNE(dxbc::Dest::R(i, 0b1000),
@ -3804,7 +3804,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
case xenos::DepthRenderTargetFormat::kD24S8:
// Round to the nearest even integer. This seems to be the correct,
// adding +0.5 and rounding towards zero results in red instead of
// black in GTA IV and Halo 3 clear shaders.
// black in the 4D5307E6 clear shader.
a.OpMul(dxbc::Dest::R(1, 0b1000), dxbc::Src::R(1, dxbc::Src::kWWWW),
dxbc::Src::LF(float(0xFFFFFF)));
a.OpRoundNE(dxbc::Dest::R(1, 0b1000),
@ -4181,7 +4181,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
case xenos::DepthRenderTargetFormat::kD24S8:
// Round to the nearest even integer. This seems to be the
// correct, adding +0.5 and rounding towards zero results in red
// instead of black in GTA IV and Halo 3 clear shaders.
// instead of black in the 4D5307E6 clear shader.
a.OpMul(dxbc::Dest::R(0, 0b0010),
dxbc::Src::R(0, dxbc::Src::kXXXX),
dxbc::Src::LF(float(0xFFFFFF)));
@ -6228,7 +6228,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
case xenos::DepthRenderTargetFormat::kD24S8:
// Round to the nearest even integer. This seems to be the correct,
// adding +0.5 and rounding towards zero results in red instead of
// black in GTA IV and Halo 3 clear shaders.
// black in the 4D5307E6 clear shader.
a.OpMul(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
dxbc::Src::LF(float(0xFFFFFF)));
a.OpRoundNE(dxbc::Dest::R(1, 0b0001),

View file

@ -1567,7 +1567,8 @@ bool PipelineCache::GetCurrentStateDescription(
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
};
// Like kBlendFactorMap, but with color modes changed to alpha. Some
// pipelines aren't created in Prey because a color mode is used for alpha.
// pipelines aren't created in 545407E0 because a color mode is used for
// alpha.
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
/* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne,
@ -1599,7 +1600,7 @@ bool PipelineCache::GetCurrentStateDescription(
// have their sample count matching the one set in the pipeline - however if
// we set NumRenderTargets to 0 and also disable depth / stencil, the sample
// count must be set to 1 - while the command list may still have
// multisampled render targets bound (happens in Halo 3 main menu).
// multisampled render targets bound (happens in 4D5307E6 main menu).
// TODO(Triang3l): Investigate interaction of OMSetRenderTargets with
// non-null depth and DSVFormat DXGI_FORMAT_UNKNOWN in the same case.
for (uint32_t i = 0; i < 4; ++i) {
@ -2005,7 +2006,7 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
state_desc.BlendState.RenderTarget[i];
// Treat 1 * src + 0 * dest as disabled blending (there are opaque
// surfaces drawn with blending enabled, but it's 1 * src + 0 * dest, in
// Call of Duty 4 - GPU performance is better when not blending.
// 415607E6 - GPU performance is better when not blending.
if (rt.src_blend != PipelineBlendFactor::kOne ||
rt.dest_blend != PipelineBlendFactor::kZero ||
rt.blend_op != xenos::BlendOp::kAdd ||

View file

@ -121,8 +121,8 @@ namespace shaders {
// components of operands in shaders.
// For DXT3A and DXT5A, RRRR swizzle is specified in:
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
// Halo 3 also expects replicated components in k_8 sprites.
// DXN is read as RG in Halo 3, but as RA in Call of Duty.
// 4D5307E6 also expects replicated components in k_8 sprites.
// DXN is read as RG in 4D5307E6, but as RA in 415607E6.
// TODO(Triang3l): Find out the correct contents of unused texture components.
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
// k_1_REVERSE
@ -250,9 +250,9 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = {
LoadMode::kUnknown,
{2, 1, 0, 3}},
// k_Y1_Cr_Y0_Cb_REP
// Used for videos in NBA 2K9. Red and blue must be swapped.
// Used for videos in 54540829. Red and blue must be swapped.
// TODO(Triang3l): D3DFMT_G8R8_G8B8 is DXGI_FORMAT_R8G8_B8G8_UNORM * 255.0f,
// watch out for num_format int, division in shaders, etc., in NBA 2K9 it
// watch out for num_format int, division in shaders, etc., in 54540829 it
// works as is. Also need to decompress if the size is uneven, but should be
// a very rare case.
{DXGI_FORMAT_R8G8_B8G8_UNORM,
@ -1309,7 +1309,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
// Clear the bindings not only for this draw call, but entirely, because
// loading may be needed in some draw call later, which may have the same
// key for some binding as before the invalidation, but texture_invalidated_
// being false (menu background in Halo 3).
// being false (menu background in 4D5307E6).
for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) {
texture_bindings_[i].Clear();
}

View file

@ -418,7 +418,7 @@ class TextureCache {
// Uncompression info for when the regular host format for this texture is
// block-compressed, but the size is not block-aligned, and thus such
// texture cannot be created in Direct3D on PC and needs decompression,
// however, such textures are common, for instance, in Halo 3. This only
// however, such textures are common, for instance, in 4D5307E6. This only
// supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not
// used for DXN and DXT5A.
DXGI_FORMAT dxgi_format_uncompressed;

View file

@ -24,12 +24,13 @@
#include "xenia/gpu/texture_util.h"
#include "xenia/gpu/xenos.h"
// Very prominent in 545407F2.
DEFINE_bool(
resolve_resolution_scale_duplicate_second_pixel, true,
"When using resolution scale, apply the hack that duplicates the "
"right/lower host pixel in the left and top sides of render target resolve "
"areas to eliminate the gap caused by half-pixel offset (this is necessary "
"for certain games like GTA IV to work).",
"for certain games to display the scene graphics).",
"GPU");
DEFINE_bool(
@ -952,11 +953,11 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
dest_dimension = xenos::DataDimension::k2DOrStacked;
// RB_COPY_DEST_PITCH::copy_dest_height is the real texture height used
// for 3D texture pitch, it's not relative to 0,0 of the coordinate space
// (in Halo 3, the sniper rifle scope has copy_dest_height of 192, but the
// rectangle's Y is 64...256) - provide the real height of the rectangle
// since 32x32 tiles are stored linearly anyway. In addition, the height
// in RB_COPY_DEST_PITCH may be larger than needed - in Red Dead
// Redemption, a UI texture for the letterbox bars alpha is located within
// (in 4D5307E6, the sniper rifle scope has copy_dest_height of 192, but
// the rectangle's Y is 64...256) - provide the real height of the
// rectangle since 32x32 tiles are stored linearly anyway. In addition,
// the height in RB_COPY_DEST_PITCH may be larger than needed - in
// 5454082B, a UI texture for the letterbox bars alpha is located within
// the range of a 1280x720 resolve target, so with resolution scaling it's
// also wrongly detected as scaled, while only 1280x208 is being resolved.
dest_height = uint32_t(y1 - y0);

View file

@ -67,7 +67,7 @@ constexpr bool IsPrimitivePolygonal(bool vgt_output_path_is_tessellation_enable,
// TODO(Triang3l): Investigate how kRectangleList should be treated - possibly
// actually drawn as two polygons on the console, however, the current
// geometry shader doesn't care about the winding order - allowing backface
// culling for rectangles currently breaks Gears of War 2.
// culling for rectangles currently breaks 4D53082D.
return false;
}
@ -112,10 +112,10 @@ constexpr float GetD3D10PolygonOffsetFactor(
return float(1 << 24);
}
// 20 explicit + 1 implicit (1.) mantissa bits.
// 2^20 is not enough for Call of Duty 4 retail version's first mission F.N.G.
// shooting range floor (with the number 1) on Direct3D 12. Tested on Nvidia
// GeForce GTX 1070, the exact formula (taking into account the 0...1 to
// 0...0.5 remapping described below) used for testing is
// 2^20 is not enough for 415607E6 retail version's training mission shooting
// range floor (with the number 1) on Direct3D 12. Tested on Nvidia GeForce
// GTX 1070, the exact formula (taking into account the 0...1 to 0...0.5
// remapping described below) used for testing is
// `int(ceil(offset * 2^20 * 0.5)) * sign(offset)`. With 2^20 * 0.5, there
// are various kinds of stripes dependending on the view angle in that
// location. With 2^21 * 0.5, the issue is not present.
@ -141,7 +141,7 @@ inline bool DoesCoverageDependOnAlpha(reg::RB_COLORCONTROL rb_colorcontrol) {
// pre-passes and shadowmaps. The shader must have its ucode analyzed. If
// IsRasterizationPotentiallyDone, this shouldn't be called, and assumed false
// instead. Helps reject the pixel shader in some cases - memexport draws in
// Halo 3, and also most of some 1-point draws not covering anything done for
// 4D5307E6, and also most of some 1-point draws not covering anything done for
// some reason in different games with a leftover pixel shader from the previous
// draw, but with SQ_PROGRAM_CNTL destroyed, reducing the number of
// unpredictable unneeded translations of random shaders with different host

View file

@ -23,11 +23,12 @@
#include "xenia/gpu/xenos.h"
#include "xenia/ui/graphics_provider.h"
// The test case for AMD is 4D5307E6 (checked in 2018).
DEFINE_bool(dxbc_switch, true,
"Use switch rather than if for flow control. Turning this off or "
"on may improve stability, though this heavily depends on the "
"driver - on AMD, it's recommended to have this set to true, as "
"Halo 3 appears to crash when if is used for flow control "
"some titles appear to crash when if is used for flow control "
"(possibly the shader compiler tries to flatten them). On Intel "
"HD Graphics, this is ignored because of a crash with the switch "
"instruction.",
@ -398,7 +399,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
assert_true(register_count() >= 2);
if (register_count() >= 1) {
// Copy the domain location to r0.xyz.
// ZYX swizzle according to Call of Duty 3 and Viva Pinata.
// ZYX swizzle according to 415607E1 and 4D5307F2.
in_domain_location_used_ |= 0b0111;
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111)
: dxbc::Dest::R(0, 0b0111),
@ -425,7 +426,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
if (register_count() >= 1) {
// Copy the domain location to r0.xyz.
// ZYX swizzle with r1.y == 0, according to the water shader in
// Banjo-Kazooie: Nuts & Bolts.
// 4D5307ED.
in_domain_location_used_ |= 0b0111;
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111)
: dxbc::Dest::R(0, 0b0111),
@ -447,10 +448,10 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// appears that the tessellator offloads the reordering of coordinates
// for edges to game shaders.
//
// In Banjo-Kazooie: Nuts & Bolts, the water shader multiplies the
// first control point's position by r0.z, the second CP's by r0.y,
// and the third CP's by r0.x. But before doing that it swizzles
// r0.xyz the following way depending on the value in r1.y:
// In 4D5307ED, the water shader multiplies the first control point's
// position by r0.z, the second CP's by r0.y, and the third CP's by
// r0.x. But before doing that it swizzles r0.xyz the following way
// depending on the value in r1.y:
// - ZXY for 1.0.
// - YZX for 2.0.
// - XZY for 4.0.
@ -478,9 +479,9 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0011)
: dxbc::Dest::R(0, 0b0011),
dxbc::Src::VDomain());
// Control point indices according to the shader from the main menu of
// Defender, which starts from `cndeq r2, c255.xxxy, r1.xyzz, r0.zzzz`,
// where c255.x is 0, and c255.y is 1.
// Control point indices according the main menu of 58410823, with
// `cndeq r2, c255.xxxy, r1.xyzz, r0.zzzz` in the prologue of the
// shader, where c255.x is 0, and c255.y is 1.
// r0.z for (1 - r0.x) * (1 - r0.y)
// r1.x for r0.x * (1 - r0.y)
// r1.y for r0.x * r0.y
@ -509,7 +510,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
assert_true(register_count() >= 2);
if (register_count() >= 1) {
// Copy the domain location to r0.yz.
// XY swizzle according to the ground shader in Viva Pinata.
// XY swizzle according to the ground shader in 4D5307F2.
in_domain_location_used_ |= 0b0011;
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0110)
: dxbc::Dest::R(0, 0b0110),
@ -530,9 +531,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// the tessellator offloads the reordering of coordinates for edges to
// game shaders.
//
// In Viva Pinata, if we assume that r0.y is U and r0.z is V, the
// factors each control point value is multiplied by are the
// following:
// In 4D5307F2, if we assume that r0.y is U and r0.z is V, the factors
// each control point value is multiplied by are the following:
// - (1-u)*(1-v), u*(1-v), (1-u)*v, u*v for 0.0 (identity swizzle).
// - u*(1-v), (1-u)*(1-v), u*v, (1-u)*v for 1.0 (YXWZ).
// - u*v, (1-u)*v, u*(1-v), (1-u)*(1-v) for 2.0 (WZYX).
@ -1452,7 +1452,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
dest = dxbc::Dest::R(system_temp_point_size_edge_flag_kill_vertex_);
break;
case InstructionStorageTarget::kExportAddress:
// Validate memexport writes (Halo 3 has some weird invalid ones).
// Validate memexport writes (4D5307E6 has some completely invalid ones).
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
@ -1463,7 +1463,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
system_temps_memexport_address_[memexport_alloc_current_count_ - 1]);
break;
case InstructionStorageTarget::kExportData: {
// Validate memexport writes (Halo 3 has some weird invalid ones).
// Validate memexport writes (4D5307E6 has some completely invalid ones).
if (memexport_alloc_current_count_ == 0 ||
memexport_alloc_current_count_ > Shader::kMaxMemExports ||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]

View file

@ -705,10 +705,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Add a small epsilon to the offset (1.5/4 the fixed-point texture
// coordinate ULP - shouldn't significantly effect the fixed-point
// conversion; 1/4 is also not enough with 3x resolution scaling very
// noticeably on the weapon in Halo 3) to resolve ambiguity when fetching
// noticeably on the weapon in 4D5307E6) to resolve ambiguity when fetching
// point-sampled textures between texels. This applies to both normalized
// (Banjo-Kazooie Xbox Live Arcade logo, coordinates interpolated between
// vertices with half-pixel offset) and unnormalized (Halo 3 lighting
// (58410954 Xbox Live Arcade logo, coordinates interpolated between
// vertices with half-pixel offset) and unnormalized (4D5307E6 lighting
// G-buffer reading, ps_param_gen pixels) coordinates. On Nvidia Pascal,
// without this adjustment, blockiness is visible in both cases. Possibly
// there is a better way, however, an attempt was made to error-correct
@ -1595,13 +1595,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// - Data.
// Viva Pinata uses vertex displacement map textures for tessellated
// models like the beehive tree with explicit LOD with point sampling
// (they store values packed in two components), however, the fetch
// constant has anisotropic filtering enabled. However, Direct3D 12
// doesn't allow mixing anisotropic and point filtering. Possibly
// anistropic filtering should be disabled when explicit LOD is used - do
// this here.
// 4D5307F2 uses vertex displacement map textures for tessellated models
// like the beehive tree with explicit LOD with point sampling (they store
// values packed in two components), however, the fetch constant has
// anisotropic filtering enabled. However, Direct3D 12 doesn't allow
// mixing anisotropic and point filtering. Possibly anistropic filtering
// should be disabled when explicit LOD is used - do this here.
uint32_t sampler_binding_index = FindOrAddSamplerBinding(
tfetch_index, instr.attributes.mag_filter,
instr.attributes.min_filter, instr.attributes.mip_filter,

View file

@ -287,8 +287,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY));
// Choose in which 40-sample half of the tile the pixel is, for swapping
// 40-sample columns when accessing the depth buffer - games expect this
// behavior when writing depth back to the EDRAM via color writing (GTA IV,
// Halo 3).
// behavior when writing depth back to the EDRAM via color writing (4D5307E6).
// system_temp_rov_params_.x = tile-local sample 0 X >= 40
// system_temp_rov_params_.y = row offset
// system_temp_rov_params_.z = X sample 0 position within the tile
@ -3282,7 +3281,7 @@ void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp,
dxbc::Src::LF(float(0xFFFFFF)));
// Round to the nearest even integer. This seems to be the correct way:
// rounding towards zero gives 0xFF instead of 0x100 in clear shaders in,
// for instance, Halo 3, but other clear shaders in it are also broken if
// for instance, 4D5307E6, but other clear shaders in it are also broken if
// 0.5 is added before ftou instead of round_ne.
a_.OpRoundNE(d24_dest, d24_src);
// Convert to fixed-point.

View file

@ -28,16 +28,18 @@ DEFINE_bool(
"the real reason why they're invalid is found.",
"GPU");
// Extremely bright screen borders in 4D5307E6.
// Reading between texels with half-pixel offset in 58410954.
DEFINE_bool(
half_pixel_offset, true,
"Enable support of vertex half-pixel offset (D3D9 PA_SU_VTX_CNTL "
"PIX_CENTER). Generally games are aware of the half-pixel offset, and "
"having this enabled is the correct behavior (disabling this may "
"significantly break post-processing in some games, like Halo 3), but in "
"some games it might have been ignored, resulting in slight blurriness of "
"UI textures, for instance, when they are read between texels rather than "
"at texel centers (Banjo-Kazooie), or the leftmost/topmost pixels may not "
"be fully covered when MSAA is used with fullscreen passes.",
"significantly break post-processing in some games), but in certain games "
"it might have been ignored, resulting in slight blurriness of UI "
"textures, for instance, when they are read between texels rather than "
"at texel centers, or the leftmost/topmost pixels may not be fully covered "
"when MSAA is used with fullscreen passes.",
"GPU");
DEFINE_int32(query_occlusion_fake_sample_count, 1000,

View file

@ -57,7 +57,7 @@ DEFINE_bool(
// TODO(Triang3l): More investigation of the cache threshold as cache lookups
// and insertions require global critical region locking, and insertions also
// require protecting pages. At 1024, the cache only made the performance worse
// (Tony Hawk's American Wasteland, 16-bit primitive reset index replacement).
// (415607D4, 16-bit primitive reset index replacement).
DEFINE_int32(
primitive_processor_cache_min_indices, 4096,
"Smallest number of guest indices to store in the cache to try reusing "
@ -247,14 +247,14 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
// games using tessellated strips / fans so far.
switch (tessellation_mode) {
case xenos::TessellationMode::kDiscrete:
// - Call of Duty 3 - nets above barrels in the beginning of the
// first mission (turn right after the end of the intro) -
// - 415607E1 - nets above barrels in the beginning of the first
// mission (turn right after the end of the intro) -
// kTriangleList.
host_vertex_shader_type =
Shader::HostVertexShaderType::kTriangleDomainCPIndexed;
break;
case xenos::TessellationMode::kContinuous:
// - Viva Pinata - tree building with a beehive in the beginning
// - 4D5307F2 - tree building with a beehive in the beginning
// (visible on the start screen behind the logo), waterfall in the
// beginning - kTriangleList.
host_vertex_shader_type =
@ -276,7 +276,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
Shader::HostVertexShaderType::kQuadDomainCPIndexed;
break;
case xenos::TessellationMode::kContinuous:
// - Defender - retro screen and beams in the main menu - kQuadList.
// - 58410823 - retro screen and beams in the main menu - kQuadList.
host_vertex_shader_type =
Shader::HostVertexShaderType::kQuadDomainCPIndexed;
break;
@ -285,14 +285,14 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
}
break;
case xenos::PrimitiveType::kTrianglePatch:
// - Banjo-Kazooie: Nuts & Bolts - water - adaptive.
// - Halo 3 - water - adaptive.
// - 4D5307E6 - water - adaptive.
// - 4D5307ED - water - adaptive.
host_vertex_shader_type =
Shader::HostVertexShaderType::kTriangleDomainPatchIndexed;
break;
case xenos::PrimitiveType::kQuadPatch:
// - Fable II - continuous.
// - Viva Pinata - garden ground - adaptive.
// - 4D5307F1 - continuous.
// - 4D5307F2 - garden ground - adaptive.
host_vertex_shader_type =
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
break;

View file

@ -335,10 +335,10 @@ union alignas(uint32_t) PA_SU_SC_MODE_CNTL {
uint32_t cull_back : 1; // +1
// 0 - front is CCW, 1 - front is CW.
uint32_t face : 1; // +2
// The game Fuse uses poly_mode 2 for triangles, which is "reserved" on R6xx
// and not defined on Adreno 2xx, but polymode_front/back_ptype are 0
// (points) in this case in Fuse, which should not be respected for
// non-kDualMode as the game wants to draw filled triangles.
// 4541096E uses poly_mode 2 for triangles, which is "reserved" on R6xx and
// not defined on Adreno 2xx, but polymode_front/back_ptype are 0 (points)
// in this case in 4541096E, which should not be respected for non-kDualMode
// as the title wants to draw filled triangles.
xenos::PolygonModeEnable poly_mode : 2; // +3
xenos::PolygonType polymode_front_ptype : 3; // +5
xenos::PolygonType polymode_back_ptype : 3; // +8
@ -559,16 +559,16 @@ union alignas(uint32_t) RB_COLORCONTROL {
// (gl_FragCoord.y near 0 in the top, near 1 in the bottom here - D3D-like.)
// For 2 samples, the top sample (closer to gl_FragCoord.y 0) is covered
// when alpha is in [0.5, 1), the bottom sample is covered when the alpha is
// [1. With these thresholds, however, in Red Dead Redemption, almost all
// distant trees are transparent, this is asymmetric - fully transparent for
// a quarter of the range (or even half of the range for 2x and almost the
// entire range for 1x), but fully opaque only in one value.
// [1. With these thresholds, however, in 5454082B, almost all distant trees
// are transparent, this is asymmetric - fully transparent for a quarter of
// the range (or even half of the range for 2x and almost the entire range
// for 1x), but fully opaque only in one value.
// Though, 2, 2, 2, 2 offset values are commonly used for undithered alpha
// to coverage (in games such as Red Dead Redemption, and overall in AMD
// driver implementations) - it appears that 2, 2, 2, 2 offsets are supposed
// to make this symmetric.
// Both Red Dead Redemption and RADV (which used AMDVLK as a reference) use
// 3, 1, 0, 2 offsets for dithered alpha to mask.
// to coverage (in games such as 5454082B, and overall in AMD driver
// implementations) - it appears that 2, 2, 2, 2 offsets are supposed to
// make this symmetric.
// Both 5454082B and RADV (which used AMDVLK as a reference) use 3, 1, 0, 2
// offsets for dithered alpha to mask.
// https://gitlab.freedesktop.org/nchery/mesa/commit/8a52e4cc4fad4f1c75acc0badd624778f9dfe202
// It appears that the offsets lower the thresholds by (offset / 4 /
// sample count). That's consistent with both 2, 2, 2, 2 making the test

View file

@ -40,6 +40,7 @@ DEFINE_bool(
"reduce bandwidth usage during transfers as the previous depth won't need "
"to be read.",
"GPU");
// The round trip is done, in particular, in 545407F2.
DEFINE_string(
depth_float24_conversion, "",
"Method for converting 32-bit Z values to 20e4 floating point when using "
@ -56,8 +57,8 @@ DEFINE_string(
" + Highest performance, allows early depth test and writing.\n"
" + Host MSAA is possible with pixel-rate shading where supported.\n"
" - EDRAM > RAM > EDRAM depth buffer round trip done in certain games "
"(such as GTA IV) destroys precision irreparably, causing artifacts if "
"another rendering pass is done after the EDRAM reupload.\n"
"destroys precision irreparably, causing artifacts if another rendering "
"pass is done after the EDRAM reupload.\n"
" truncate:\n"
" Convert to 20e4 directly in pixel shaders, always rounding down.\n"
" + Average performance, conservative early depth test is possible.\n"
@ -96,18 +97,15 @@ DEFINE_bool(
"bloom, etc., in some cases.",
"GPU");
// Disabled by default because of full-screen effects that occur when game
// shaders assume piecewise linear, much more severe than blending-related
// issues.
// shaders assume piecewise linear (4541080F), much more severe than
// blending-related issues.
DEFINE_bool(
gamma_render_target_as_srgb, false,
"When the host can't write piecewise linear gamma directly with correct "
"blending, use sRGB output on the host for conceptually correct blending "
"in linear color space (to prevent issues such as bright squares around "
"bullet holes and overly dark lighting in Halo 3) while having slightly "
"different precision distribution in the render target and severely "
"incorrect values if the game accesses the resulting colors directly as "
"raw data (the whole screen in The Orange Box, for instance, since when "
"the first loading bar appears).",
"in linear color space while having slightly different precision "
"distribution in the render target and severely incorrect values if the "
"game accesses the resulting colors directly as raw data.",
"GPU");
DEFINE_bool(
mrt_edram_used_range_clamp_to_min, true,
@ -493,9 +491,9 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
// (issues caused by color and depth render target collisions haven't been
// found yet), but render targets with smaller index are considered more
// important - specifically, because of the usage in the lighting pass of
// Halo 3, which can be checked in the vertical look calibration sequence in
// 4D5307E6, which can be checked in the vertical look calibration sequence in
// the beginning of the game: if render target 0 is removed in favor of 1, the
// UNSC servicemen and the world will be too dark, like fully in shadow -
// characters and the world will be too dark, like fully in shadow -
// especially prominent on the helmet. This happens because the shader picks
// between two render targets to write dynamically (though with a static, bool
// constant condition), but all other state is set up in a way that implies
@ -624,7 +622,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
// "As if it was 64bpp" (contribution of 32bpp render targets multiplied by 2,
// and clamping for 32bpp render targets divides this by 2) because 32bpp
// render targets can be combined with twice as long 64bpp render targets. An
// example is the Dead Space 3 menu background (1-sample 1152x720, or 1200x720
// example is the 4541099D menu background (1-sample 1152x720, or 1200x720
// after rounding to tiles, with a 32bpp depth buffer at 0 requiring 675
// tiles, and a 64bpp color buffer at 675 requiring 1350 tiles, but the
// smallest distance between two render target bases is 675 tiles).

View file

@ -70,10 +70,10 @@ class RenderTargetCache {
// Significant differences:
// - 8_8_8_8_GAMMA - the piecewise linear gamma curve is very different than
// sRGB, one possible path is conversion in shaders (resulting in
// incorrect blending, especially visible on decals in Halo 3), another is
// using sRGB render targets and either conversion on resolve or reading
// the resolved data as a true sRGB texture (incorrect when the game
// accesses the data directly, like The Orange Box).
// incorrect blending, especially visible on decals in 4D5307E6), another
// is using sRGB render targets and either conversion on resolve or
// reading the resolved data as a true sRGB texture (incorrect when the
// game accesses the data directly, like 4541080F).
// - 2_10_10_10_FLOAT - ranges significantly different than in float16, much
// smaller RGB range, and alpha is fixed-point and has only 2 bits.
// - 16_16, 16_16_16_16 - has -32 to 32 range, not -1 to 1 - need either to
@ -445,9 +445,9 @@ class RenderTargetCache {
// aliasing naively, precision may be lost - host depth must only be
// overwritten if the new guest value is different than the current host depth
// when converted to the guest format (this catches the usual case of
// overwriting the depth buffer for clearing it mostly). Sonic the Hedgehog's
// intro cutscene, for example, has a good example of corruption that happens
// if this is not handled - the upper 1280x384 pixels are rendered in a very
// overwriting the depth buffer for clearing it mostly). 534507D6 intro
// cutscene, for example, has a good example of corruption that happens if
// this is not handled - the upper 1280x384 pixels are rendered in a very
// "striped" way if the depth precision is lost (if this is made always return
// false).
virtual bool IsHostDepthEncodingDifferent(
@ -627,7 +627,7 @@ class RenderTargetCache {
// surface info was changed), to avoid unneeded render target switching (which
// is especially undesirable on tile-based GPUs) in the implementation if
// simply disabling depth / stencil test or color writes and then re-enabling
// (Banjo-Kazooie does this often with color). Must also be used to determine
// (58410954 does this often with color). Must also be used to determine
// whether it's safe to enable depth / stencil or writing to a specific color
// render target in the pipeline for this draw call.
// Only valid for non-pixel-shader-interlock paths.

View file

@ -551,7 +551,7 @@ struct ParsedAluInstruction {
InstructionResult scalar_result;
// Both operations must be executed before any result is stored if vector and
// scalar operations are paired. There are cases of vector result being used
// as scalar operand or vice versa (the halo on Avalanche in Halo 3, for
// as scalar operand or vice versa (the ring on Avalanche in 4D5307E6, for
// example), in this case there must be no dependency between the two
// operations.
@ -851,11 +851,11 @@ class Shader {
// highest static register address + 1, or 0 if no registers referenced this
// way. SQ_PROGRAM_CNTL is not always reliable - some draws (like single point
// draws with oPos = 0001 that are done by Xbox 360's Direct3D 9 sometimes;
// can be reproduced by launching Arrival in Halo 3 from the campaign lobby)
// that aren't supposed to cover any pixels use an invalid (zero)
// SQ_PROGRAM_CNTL, but with an outdated pixel shader loaded, in this case
// SQ_PROGRAM_CNTL may contain a number smaller than actually needed by the
// pixel shader - SQ_PROGRAM_CNTL should be used to go above this count if
// can be reproduced by launching the intro mission in 4D5307E6 from the
// campaign lobby) that aren't supposed to cover any pixels use an invalid
// (zero) SQ_PROGRAM_CNTL, but with an outdated pixel shader loaded, in this
// case SQ_PROGRAM_CNTL may contain a number smaller than actually needed by
// the pixel shader - SQ_PROGRAM_CNTL should be used to go above this count if
// uses_register_dynamic_addressing is true.
uint32_t register_static_address_bound() const {
return register_static_address_bound_;

View file

@ -388,8 +388,8 @@ void Shader::GatherAluInstructionInformation(
// allocation in shader translator implementations.
// eA is (hopefully) always written to using:
// mad eA, r#, const0100, c#
// (though there are some exceptions, shaders in Halo 3 for some reason set eA
// to zeros, but the swizzle of the constant is not .xyzw in this case, and
// (though there are some exceptions, shaders in 4D5307E6 for some reason set
// eA to zeros, but the swizzle of the constant is not .xyzw in this case, and
// they don't write to eM#).
// Export is done to vector_dest of the ucode instruction for both vector and
// scalar operations - no need to check separately.

View file

@ -36,7 +36,7 @@ XeHSConstantDataOutput XePatchConstant(
// 2) r0.zyx -> r0.zyx by the guest (because r1.y is set to 0 by Xenia, which
// apparently means identity swizzle to games).
// 3) r0.z * v0 + r0.y * v1 + r0.x * v2 by the guest.
// With this order, there are no cracks in Halo 3 water.
// With this order, there are no cracks in 4D5307E6 water.
[unroll] for (i = 0u; i < 3u; ++i) {
output.edges[i] = xe_input_patch[(i + 1u) % 3u].edge_factor;
}

View file

@ -986,11 +986,11 @@ uint4 XeDXT3AAs1111TwoBlocksRowToBGRA4(uint2 halfblocks) {
// DXT1/DXT3/DXT5 color components and CTX1 X/Y are ordered in:
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
// (LSB on the right, MSB on the left.)
// TODO(Triang3l): Investigate this better, Halo: Reach is the only known game
// TODO(Triang3l): Investigate this better, 4D53085B is the only known game
// that uses it (for lighting in certain places - one of easy to notice usages
// is the T-shaped (or somewhat H-shaped) metal beams in the beginning of
// Winter Contingency), however the contents don't say anything about the
// channel order.
// is the T-shaped (or somewhat H-shaped) metal beams in the beginning of the
// first mission), however the contents don't say anything about the channel
// order.
uint4 row = (((halfblocks.xxyy >> uint2(3u, 11u).xyxy) & 1u) << 8u) |
(((halfblocks.xxyy >> uint2(7u, 15u).xyxy) & 1u) << 24u) |
(((halfblocks.xxyy >> uint2(2u, 10u).xyxy) & 1u) << 4u) |

View file

@ -5,8 +5,8 @@ XeHSControlPointInputAdaptive main(uint xe_edge_factor : SV_VertexID) {
XeHSControlPointInputAdaptive output;
// The Xbox 360's GPU accepts the float32 tessellation factors for edges
// through a special kind of an index buffer.
// While Viva Pinata sets the factors to 0 for frustum-culled (quad) patches,
// in Halo 3 only allowing patches with factors above 0 makes distant
// While 4D5307F2 sets the factors to 0 for frustum-culled (quad) patches, in
// 4D5307E6 only allowing patches with factors above 0 makes distant
// (triangle) patches disappear - it appears that there are no special values
// for culled patches on the Xbox 360 (unlike zero, negative and NaN on
// Direct3D 11).

View file

@ -11,7 +11,7 @@ RWBuffer<uint4> xe_texture_load_dest : register(u0);
// Dword 1:
// rrrrrrrrgggggggg
// RRRRRRRRGGGGGGGG
// (R is in the higher bits, according to how this format is used in Halo 3).
// (R is in the higher bits, according to how this format is used in 4D5307E6).
// Dword 2:
// AA BB CC DD
// EE FF GG HH

View file

@ -465,9 +465,10 @@ std::pair<uint32_t, uint32_t> SharedMemory::MemoryInvalidationCallback(
// invalidated - if no GPU-written data nearby that was not intended to be
// invalidated since it's not in sync with CPU memory and can't be
// reuploaded. It's a lot cheaper to upload some excess data than to catch
// access violations - with 4 KB callbacks, the original Doom runs at 4 FPS
// on Intel Core i7-3770, with 64 KB the CPU game code takes 3 ms to run per
// frame, but with 256 KB it's 0.7 ms.
// access violations - with 4 KB callbacks, 58410824 (being a
// software-rendered game) runs at 4 FPS on Intel Core i7-3770, with 64 KB,
// the CPU game code takes 3 ms to run per frame, but with 256 KB, it's
// 0.7 ms.
if (page_first & 63) {
uint64_t gpu_written_start =
system_page_flags_[block_first].valid_and_gpu_written;

View file

@ -49,7 +49,8 @@ void CopySwapBlock(xenos::Endian endian, void* output, const void* input,
void ConvertTexelCTX1ToR8G8(xenos::Endian endian, void* output,
const void* input, size_t length) {
// https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
// (R is in the higher bits, according to how this format is used in Halo 3).
// (R is in the higher bits, according to how this format is used in
// 4D5307E6).
union {
uint8_t data[8];
struct {

View file

@ -352,11 +352,11 @@ TextureGuestLayout GetGuestTextureLayout(
xenos::kTextureSubresourceAlignmentBytes);
// Estimate the memory amount actually referenced by the texture, which may
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in Ridge
// Racer Unbounded, for which memory exactly for 1280x720 is allocated, and
// aligning the height to 32 would cause access of an unallocated page) or
// bigger than the stride. For tiled textures, this is the dimensions
// aligned to 32x32x4 blocks (or x1 for the missing dimensions).
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in 4E4D083E,
// for which memory exactly for 1280x720 is allocated, and aligning the
// height to 32 would cause access of an unallocated page) or bigger than
// the stride. For tiled textures, this is the dimensions aligned to 32x32x4
// blocks (or x1 for the missing dimensions).
uint32_t level_width_blocks =
xe::align(std::max(width_texels >> level, uint32_t(1)),
format_info->block_width) /

View file

@ -64,14 +64,14 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
// implies 32-block alignment for both uncompressed and compressed textures)
// stored in the fetch constant, and height aligned to 32 blocks for Z slice
// and array layer stride calculation purposes. The pitch can be different
// from the actual width - an example is Plants vs. Zombies, using 1408 pitch
// for a 1280x menu background).
// from the actual width - an example is 584109FF, using 1408 pitch for a
// 1280x menu background).
// - The mip levels use `max(next_pow2(width or height in texels) >> level, 1)`
// aligned to 32 blocks for the same purpose, likely disregarding the pitch
// from the fetch constant.
//
// There is also mip tail packing if the fetch constant specifies that packed
// mips are enabled, for both tiled and linear textures (Prey uses linear
// mips are enabled, for both tiled and linear textures (545407E0 uses linear
// DXT-compressed textures with packed mips very extensively for the game world
// materials). In this case, mips with width or height of 16 or smaller are
// stored not individually, but instead, in 32-texel (note: not 32-block - mip
@ -99,7 +99,7 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
// tail, and the offset calculation function doesn't have level == 0 checks in
// it, only early-out if level < packed tail level (which can be 0). There are
// examples of textures with packed base, for example, in the intro level of
// Prey (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling
// 545407E0 (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling
// machines).
//
// Linear texture rows are aligned to 256 bytes, for both the base and the mips
@ -107,22 +107,21 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
// fetch constant).
//
// However, all the 32x32x4 padding, being just padding, is not necessarily
// being actually accessed, especially for linear textures. Ridge Racer
// Unbounded has a 1280x720 k_8_8_8_8 linear texture, and allocates memory for
// exactly 1280x720, so aligning the height to 32 to 1280x736 results in access
// violations. So, while for stride calculations all the padding must be
// respected, for actual memory loads it's better to avoid trying to access it
// when possible:
// being actually accessed, especially for linear textures. 4E4D083E has a
// 1280x720 k_8_8_8_8 linear texture, and allocates memory for exactly 1280x720,
// so aligning the height to 32 to 1280x736 results in access violations. So,
// while for stride calculations all the padding must be respected, for actual
// memory loads it's better to avoid trying to access it when possible:
// - If the pitch is bigger than the width, it's better to calculate the last
// row's length from the width rather than the pitch (this also possibly works
// in the other direction though - pitch < width is a weird situation, but
// probably legal, and may lead to reading data from beyond the calculated
// subresource stride).
// - For linear textures (like that 1280x720 example from Ridge Racer
// Unbounded), it's easy to calculate the exact memory extent that may be
// accessed knowing the dimensions (unlike for tiled textures with complex
// addressing within 32x32x4-block tiles), so there's no need to align them to
// 32x32x4 for memory extent calculation.
// - For linear textures (like that 1280x720 example from 4E4D083E), it's easy
// to calculate the exact memory extent that may be accessed knowing the
// dimensions (unlike for tiled textures with complex addressing within
// 32x32x4-block tiles), so there's no need to align them to 32x32x4 for
// memory extent calculation.
// - For the linear packed mip tail, the extent can be calculated as max of
// (block offsets + block extents) of all levels stored in it.
//
@ -152,16 +151,16 @@ struct TextureGuestLayout {
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
// depending on the dimension), but for the linear subresources, this may be
// significantly (including less 4 KB pages) smaller than the aligned size
// (like for Ridge Racer Unbounded where aligning the height of a 1280x720
// linear texture results in access violations). For the linear mip tail,
// this includes all the mip levels stored in it. If the width is bigger
// than the pitch, this will also be taken into account for the last row so
// all memory actually used by the texture will be loaded, and may be bigger
// than the distance between array slices or levels. The purpose of this
// parameter is to make the memory amount that needs to be resident as close
// to the real amount as possible, to make sure all the needed data will be
// read, but also, if possible, unneeded memory pages won't be accessed
// (since that may trigger an access violation on the CPU).
// (like for 4E4D083E where aligning the height of a 1280x720 linear texture
// results in access violations). For the linear mip tail, this includes all
// the mip levels stored in it. If the width is bigger than the pitch, this
// will also be taken into account for the last row so all memory actually
// used by the texture will be loaded, and may be bigger than the distance
// between array slices or levels. The purpose of this parameter is to make
// the memory amount that needs to be resident as close to the real amount
// as possible, to make sure all the needed data will be read, but also, if
// possible, unneeded memory pages won't be accessed (since that may trigger
// an access violation on the CPU).
uint32_t x_extent_blocks;
uint32_t y_extent_blocks;
uint32_t z_extent;

View file

@ -483,7 +483,7 @@ enum class FetchOpcode : uint32_t {
// - 3D (used for both 3D and stacked 2D texture): U, V, W (normalized or
// unnormalized - same for both 3D W and stack layer; also VolMagFilter /
// VolMinFilter between stack layers is supported, used for color correction
// in Burnout Revenge).
// in 454107DC).
// - Cube: SC, TC (between 1 and 2 for normalized), face ID (0.0 to 5.0), the
// cube vector ALU instruction is used to calculate them.
// https://gpuopen.com/learn/fetching-from-cubes-and-octahedrons/
@ -495,9 +495,9 @@ enum class FetchOpcode : uint32_t {
// The total LOD for a sample is additive and is based on what is enabled.
//
// For cube maps, according to what texCUBEgrad compiles to in a modified
// HLSL shader of Brave: A Warrior's Tale and to XNA assembler output for PC
// SM3 texldd, register gradients are in cube space (not in SC/TC space,
// unlike the coordinates themselves). This isn't true for the GCN, however.
// HLSL shader of 455607D1 and to XNA assembler output for PC SM3 texldd,
// register gradients are in cube space (not in SC/TC space, unlike the
// coordinates themselves). This isn't true for the GCN, however.
//
// TODO(Triang3l): Find if gradients are unnormalized for cube maps if
// coordinates are unnormalized. Since texldd doesn't perform any
@ -814,8 +814,8 @@ static_assert_size(TextureFetchInstruction, sizeof(uint32_t) * 3);
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
// this rule for multiplication, as games often rely on it in vector
// normalization (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of
// things in games - causes white screen in Halo 3, white specular on
// characters in GTA IV. The result is always positive zero in this case, no
// things in games - causes white screen in 4D5307E6, white specular on
// characters in 545407F2. The result is always positive zero in this case, no
// matter what the signs of the other operands are, according to R5xx
// Acceleration section 8.7.5 "Legacy multiply behavior" and testing on
// Adreno 200. This means that the following need to be taken into account
@ -1628,8 +1628,8 @@ enum class ExportRegister : uint32_t {
// X - PSIZE (gl_PointSize).
// Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point
// drawing.
// Z - KILLVERTEX flag (used in Banjo-Kazooie: Nuts & Bolts for grass), set
// for killing primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition.
// Z - KILLVERTEX flag (used in 4D5307ED for grass), set for killing
// primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition.
kVSPointSizeEdgeFlagKillVertex = 63,
kPSColor0 = 0,

View file

@ -507,7 +507,7 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
uint32_t upload_size = source_length;
// Ping the memory subsystem for allocation size.
// TODO(DrChat): Artifacting occurring in GripShift with this enabled.
// TODO(DrChat): Artifacting occurring in 5841089E with this enabled.
// physical_heap->QueryBaseAndSize(&upload_base, &upload_size);
assert(upload_base <= source_addr);
uint32_t source_offset = source_addr - upload_base;

View file

@ -758,7 +758,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
depth_bias_scale = depth_bias_scales[1];
depth_bias_offset = depth_bias_offsets[1];
}
// Convert to Vulkan units based on the values in Call of Duty 4:
// Convert to Vulkan units based on the values in 415607E6:
// r_polygonOffsetScale is -1 there, but 32 in the register.
// r_polygonOffsetBias is -1 also, but passing 2/65536.
// 1/65536 and 2 scales are applied separately, however, and for shadow maps

View file

@ -235,10 +235,10 @@ enum class SurfaceNumFormat : uint32_t {
//
// Depth surfaces are also stored as 32bpp tiles, however, as opposed to color
// surfaces, 40x16-sample halves of each tile are swapped - game shaders (for
// example, in GTA IV, Halo 3) perform this swapping when writing specific
// depth/stencil values by drawing to a depth buffer's memory through a color
// render target (to reupload a depth/stencil surface previously evicted from
// the EDRAM to the main memory, for instance).
// example, in 4D5307E6 main menu, 545407F2) perform this swapping when writing
// specific depth/stencil values by drawing to a depth buffer's memory through a
// color render target (to reupload a depth/stencil surface previously evicted
// from the EDRAM to the main memory, for instance).
enum class MsaaSamples : uint32_t {
k1X = 0,
@ -728,12 +728,12 @@ enum class SampleControl : uint32_t {
// - sample_control is SQ_CONTEXT_MISC::sc_sample_cntl.
// - interpolator_control_sampling_pattern is
// SQ_INTERPOLATOR_CNTL::sampling_pattern.
// Centroid interpolation can be tested in Red Dead Redemption. If the GPU host
// backend implements guest MSAA properly, using host MSAA, with everything
// interpolated at centers, the Diez Coronas start screen background may have
// a few distinctly bright pixels on the mesas/buttes, where extrapolation
// happens. Interpolating certain values (ones that aren't used for gradient
// calculation, not texture coordinates) at centroids fixes this issue.
// Centroid interpolation can be tested in 5454082B. If the GPU host backend
// implements guest MSAA properly, using host MSAA, with everything interpolated
// at centers, the Monument Valley start screen background may have a few
// distinctly bright pixels on the mesas/buttes, where extrapolation happens.
// Interpolating certain values (ones that aren't used for gradient calculation,
// not texture coordinates) at centroids fixes this issue.
inline uint32_t GetInterpolatorSamplingPattern(
MsaaSamples msaa_samples, SampleControl sample_control,
uint32_t interpolator_control_sampling_pattern) {
@ -763,9 +763,9 @@ enum class TessellationMode : uint32_t {
enum class PolygonModeEnable : uint32_t {
kDisabled = 0, // Render triangles.
kDualMode = 1, // Send 2 sets of 3 polygons with the specified polygon type.
// The game Fuse uses 2 for triangles, which is "reserved" on R6xx and not
// defined on Adreno 2xx, but polymode_front/back_ptype are 0 (points) in this
// case in Fuse, which should not be respected for non-kDualMode as the game
// 4541096E uses 2 for triangles, which is "reserved" on R6xx and not defined
// on Adreno 2xx, but polymode_front/back_ptype are 0 (points) in this case in
// 4541096E, which should not be respected for non-kDualMode as the title
// wants to draw filled triangles.
};
@ -785,17 +785,15 @@ enum class ModeControl : uint32_t {
// for it especially since the Xbox 360 doesn't have early per-sample depth /
// stencil, only early hi-Z / hi-stencil, and other registers possibly
// toggling pixel shader execution are yet to be found):
// - Most of depth pre-pass draws in Call of Duty 4 use the kDepth more with
// a `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though.
// - Most of depth pre-pass draws in 415607E6 use the kDepth more with a
// `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though.
// However, when alphatested surfaces are drawn, kColorDepth is explicitly
// used with the same shader performing the texture fetch.
// - Red Dead Redemption has some kDepth draws with alphatest enabled, but the
// shader is `oC0 = r0`, which makes no sense (alphatest based on an
// interpolant from the vertex shader) as no texture alpha cutout is
// involved.
// - Red Dead Redemption also has kDepth draws with pretty complex shaders
// clearly for use only in the color pass - even fetching and filtering a
// shadowmap.
// - 5454082B has some kDepth draws with alphatest enabled, but the shader is
// `oC0 = r0`, which makes no sense (alphatest based on an interpolant from
// the vertex shader) as no texture alpha cutout is involved.
// - 5454082B also has kDepth draws with pretty complex shaders clearly for
// use only in the color pass - even fetching and filtering a shadowmap.
// For now, based on these, let's assume the pixel shader is never used with
// kDepth.
kDepth = 5,
@ -833,10 +831,10 @@ enum class ModeControl : uint32_t {
// coordinates of the corners).
//
// The rectangle is used for both the source render target and the destination
// texture, according to how it's used in Tales of Vesperia.
// texture, according to how it's used in 4E4D07E9.
//
// Direct3D 9 gives the rectangle in source render target coordinates (for
// example, in Halo 3, the sniper rifle scope has a (128,64)->(448,256)
// example, in 4D5307E6, the sniper rifle scope has a (128,64)->(448,256)
// rectangle). It doesn't adjust the EDRAM base pointer, otherwise (taking into
// account that 4x MSAA is used for the scope) it would have been
// (8,0)->(328,192), but it's not. However, it adjusts the destination texture
@ -851,7 +849,7 @@ enum class ModeControl : uint32_t {
// RB_COPY_DEST_PITCH's purpose appears to be not clamping or something like
// that, but just specifying pitch for going between rows, and height for going
// between 3D texture slices. copy_dest_pitch is rounded to 32 by Direct3D 9,
// copy_dest_height is not. In the Halo 3 sniper rifle scope example,
// copy_dest_height is not. In the 4D5307E6 sniper rifle scope example,
// copy_dest_pitch is 320, and copy_dest_height is 192 - the same as the resolve
// rectangle size (resolving from a 320x192 portion of the surface at 128,64 to
// the whole texture, at 0,0). Relative to RB_COPY_DEST_BASE, the height should
@ -860,17 +858,17 @@ enum class ModeControl : uint32_t {
// of the register) that it exists purely to be able to go between 3D texture
// slices.
//
// Window scissor must also be applied - in the jigsaw puzzle in Banjo-Tooie,
// there are 1280x720 resolve rectangles, but only the scissored 1280x256
// needs to be copied, otherwise it overflows even beyond the EDRAM, and the
// depth buffer is visible on the screen. It also ensures the coordinates are
// not negative (in F.E.A.R., for example, the right tile is resolved with
// vertices (-640,0)->(640,720), however, the destination texture pointer is
// adjusted properly to the right half of the texture, and the source render
// target has a pitch of 800).
// Window scissor must also be applied - in the jigsaw puzzle in 58410955, there
// are 1280x720 resolve rectangles, but only the scissored 1280x256 needs to be
// copied, otherwise it overflows even beyond the EDRAM, and the depth buffer is
// visible on the screen. It also ensures the coordinates are not negative (in
// 565507D9, for example, the right tile is resolved with vertices
// (-640,0)->(640,720), however, the destination texture pointer is adjusted
// properly to the right half of the texture, and the source render target has a
// pitch of 800).
// Granularity of offset and size in resolve operations is 8x8 pixels
// (GPU_RESOLVE_ALIGNMENT - for example, Halo 3 resolves a 24x16 region for a
// (GPU_RESOLVE_ALIGNMENT - for example, 4D5307E6 resolves a 24x16 region for a
// 18x10 texture, 8x8 region for a 1x1 texture).
// https://github.com/jmfauvel/CSGO-SDK/blob/master/game/client/view.cpp#L944
// https://github.com/stanriders/hl2-asw-port/blob/master/src/game/client/vgui_int.cpp#L901
@ -1072,9 +1070,9 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t {
// pitch is irrelevant to them (but the 256-byte alignment requirement still
// applies to linear textures).
// Examples of pitch > aligned width:
// - Plants vs. Zombies (loading screen and menu backgrounds, 1408 for a
// 1280x linear k_DXT4_5 texture, which corresponds to 22 * 256 bytes
// rather than 20 * 256 for just 1280x).
// - 584109FF (loading screen and menu backgrounds, 1408 for a 1280x linear
// k_DXT4_5 texture, which corresponds to 22 * 256 bytes rather than
// 20 * 256 for just 1280x).
uint32_t pitch : 9; // +22
uint32_t tiled : 1; // +31

View file

@ -98,7 +98,7 @@ X_HRESULT XgiApp::DispatchMessageSync(uint32_t message, uint32_t buffer_ptr,
return X_E_SUCCESS;
}
case 0x000B0014: {
// Gets Jetpac XBLA in game
// Gets 584107FB in game.
// get high score table?
XELOGD("XGI_unknown");
return X_STATUS_SUCCESS;

View file

@ -66,7 +66,7 @@ X_HRESULT XLiveBaseApp::DispatchMessageSync(uint32_t message,
return X_E_FAIL;
}
case 0x00058046: {
// Required to be successful for Forza 4 to detect signed-in profile
// Required to be successful for 4D530910 to detect signed-in profile
// Doesn't seem to set anything in the given buffer, probably only takes
// input
XELOGD("XLiveBaseUnk58046({:08X}, {:08X}) unimplemented", buffer_ptr,

View file

@ -73,8 +73,8 @@ struct XCONTENT_DATA {
}
void set_display_name(const std::u16string_view value) {
// Some games (eg Goldeneye XBLA) require multiple null-terminators for it
// to read the string properly, blanking the array should take care of that
// Some games (e.g. 584108A9) require multiple null-terminators for it to
// read the string properly, blanking the array should take care of that
std::fill_n(display_name_raw.chars, countof(display_name_raw.chars), 0);
string_util::copy_and_swap_truncating(display_name_raw.chars, value,

View file

@ -20,9 +20,8 @@ namespace kernel {
namespace xam {
UserProfile::UserProfile() {
// NeoGeo Battle Coliseum checks the user XUID against a mask of
// 0x00C0000000000000 (3<<54), if non-zero, it prevents the user from playing
// the game.
// 58410A1F checks the user XUID against a mask of 0x00C0000000000000 (3<<54),
// if non-zero, it prevents the user from playing the game.
// "You do not have permissions to perform this operation."
xuid_ = 0xB13EBABEBABEBABE;
name_ = "User";

View file

@ -249,8 +249,10 @@ dword_result_t NetDll_WSAStartup(dword_t caller, word_t version,
data_ptr->max_sockets = wsaData.iMaxSockets;
data_ptr->max_udpdg = wsaData.iMaxUdpDg;
// Some games (PoG) want this value round-tripped - they'll compare if it
// changes and bugcheck if it does.
// Some games (5841099F) want this value round-tripped - they'll compare if
// it changes and bugcheck if it does.
// TODO(Triang3l): Verify if the title ID in the comment is correct - added
// by benvanik as an acronym initially.
uint32_t vendor_ptr = xe::load_and_swap<uint32_t>(data_out + 0x190);
xe::store_and_swap<uint32_t>(data_out + 0x190, vendor_ptr);
}
@ -459,7 +461,7 @@ dword_result_t NetDll_XNetGetTitleXnAddr(dword_t caller,
// TODO(gibbed): A proper mac address.
// RakNet's 360 version appears to depend on abEnet to create "random" 64-bit
// numbers. A zero value will cause RakPeer::Startup to fail. This causes
// Peggle 2 to crash on startup.
// 58411436 to crash on startup.
// The 360-specific code is scrubbed from the RakNet repo, but there's still
// traces of what it's doing which match the game code.
// https://github.com/facebookarchive/RakNet/blob/master/Source/RakPeer.cpp#L382

View file

@ -79,8 +79,8 @@ dword_result_t XNotifyGetNext(dword_t handle, dword_t match_id,
}
*id_ptr = dequeued ? id : 0;
// param_ptr may be null - Ghost Recon Advanced Warfighter 2 Demo explicitly
// passes nullptr in the code.
// param_ptr may be null - 555307F0 Demo explicitly passes nullptr in the
// code.
// https://github.com/xenia-project/xenia/pull/1577
if (param_ptr) {
*param_ptr = dequeued ? param : 0;

View file

@ -17,8 +17,7 @@ namespace kernel {
namespace xam {
dword_result_t XamPartyGetUserList(dword_t player_count, lpdword_t party_list) {
// Sonic & All-Stars Racing Transformed want specificly this code
// to skip loading party data.
// 5345085D wants specifically this code to skip loading party data.
// This code is not documented in NT_STATUS code list
return 0x807D0003;
}

View file

@ -716,7 +716,7 @@ dword_result_t XamUserCreateAchievementEnumerator(dword_t title_id,
i, // dummy image id
0,
{0, 0},
8}; // flags=8 makes dummy achievements show up in Crackdown's
8}; // flags=8 makes dummy achievements show up in 4D5307DC
// achievements list.
e->AppendItem(item);
}

View file

@ -34,7 +34,7 @@ DECLARE_XBDM_EXPORT1(DmCloseLoadedModules, kDebug, kStub);
MAKE_DUMMY_STUB_STATUS(DmFreePool);
dword_result_t DmGetXbeInfo() {
// TODO(gibbed): Crackdown appears to expect this as success?
// TODO(gibbed): 4D5307DC appears to expect this as success?
// Unknown arguments -- let's hope things don't explode.
return 0x02DA0000;
}

View file

@ -119,7 +119,7 @@ static_assert_size(XMA_CONTEXT_INIT, 56);
dword_result_t XMAInitializeContext(lpvoid_t context_ptr,
pointer_t<XMA_CONTEXT_INIT> context_init) {
// Input buffers may be null (buffer 1 in Tony Hawk's American Wasteland).
// Input buffers may be null (buffer 1 in 415607D4).
// Convert to host endianness.
uint32_t input_buffer_0_guest_ptr = context_init->input_buffer_0_ptr;
uint32_t input_buffer_0_physical_address = 0;

View file

@ -47,8 +47,8 @@ void HandleSetThreadName(pointer_t<X_EXCEPTION_RECORD> record) {
return;
}
// Shadowrun (and its demo) has a bug where it ends up passing freed memory
// for the name, so at the point of SetThreadName it's filled with junk.
// 4D5307D6 (and its demo) has a bug where it ends up passing freed memory for
// the name, so at the point of SetThreadName it's filled with junk.
// TODO(gibbed): cvar for thread name encoding for conversion, some games use
// SJIS and there's no way to automatically know this.

View file

@ -49,10 +49,10 @@ static bool IsValidPath(const std::string_view s, bool is_pattern) {
if (got_asterisk) {
// * must be followed by a . (*.)
//
// Viva Piñata: Party Animals (4D530819) has a bug in its game code where
// it attempts to FindFirstFile() with filters of "Game:\\*_X3.rkv",
// "Game:\\m*_X3.rkv", and "Game:\\w*_X3.rkv" and will infinite loop if
// the path filter is allowed.
// 4D530819 has a bug in its game code where it attempts to
// FindFirstFile() with filters of "Game:\\*_X3.rkv", "Game:\\m*_X3.rkv",
// and "Game:\\w*_X3.rkv" and will infinite loop if the path filter is
// allowed.
if (c != '.') {
return false;
}

View file

@ -111,7 +111,7 @@ struct X_OBJECT_TYPE {
class XObject {
public:
// Burnout Paradise needs proper handle value for certain calculations
// 45410806 needs proper handle value for certain calculations
// It gets handle value from TLS (without base handle value is 0x88)
// and substract 0xF8000088. Without base we're receiving wrong address
// Instead of receiving address that starts with 0x82... we're receiving

View file

@ -1531,12 +1531,11 @@ bool PhysicalHeap::Release(uint32_t base_address, uint32_t* out_region_size) {
// Must invalidate here because the range being released may be reused in
// another mapping of physical memory - but callback flags are set in each
// heap separately (https://github.com/xenia-project/xenia/issues/1559 -
// dynamic vertices in Viva Pinata start screen and menu allocated in
// 0xA0000000 at addresses that overlap intro video textures in 0xE0000000,
// with the state of the allocator as of February 24th, 2020). If memory is
// invalidated in Alloc instead, Alloc won't be aware of callbacks enabled in
// other heaps, thus callback handlers will keep considering this range valid
// forever.
// dynamic vertices in 4D5307F2 start screen and menu allocated in 0xA0000000
// at addresses that overlap intro video textures in 0xE0000000, with the
// state of the allocator as of February 24th, 2020). If memory is invalidated
// in Alloc instead, Alloc won't be aware of callbacks enabled in other heaps,
// thus callback handlers will keep considering this range valid forever.
uint32_t region_size;
if (QuerySize(base_address, &region_size)) {
TriggerCallbacks(std::move(global_lock), base_address, region_size, true,