diff --git a/GPCS4/Graphics/Gcn/GcnCompiler.cpp b/GPCS4/Graphics/Gcn/GcnCompiler.cpp index d5f02caf..e247f79b 100644 --- a/GPCS4/Graphics/Gcn/GcnCompiler.cpp +++ b/GPCS4/Graphics/Gcn/GcnCompiler.cpp @@ -2477,10 +2477,6 @@ namespace sce::gcn GcnRegisterValuePair GcnCompiler::emitBuildInlineConst(const GcnInstOperand& reg) { GcnRegisterValuePair result = {}; - result.low.type.ctype = (reg.type == GcnScalarType::Uint64 || - reg.type == GcnScalarType::Sint64) ? getHalfType(reg.type) : reg.type; - result.low.type.ccount = 1; - result.high.type = result.low.type; bool doubleType = isDoubleType(reg.type); auto field = reg.field; @@ -2488,38 +2484,32 @@ namespace sce::gcn { case GcnOperandField::ConstZero: { - result.low.id = m_module.consti32(0); - result.low.type.ctype = GcnScalarType::Sint32; + result.low.id = m_module.consti32(0); if (doubleType) { - result.high.id = m_module.consti32(0); - result.high.type.ctype = GcnScalarType::Sint32; + result.high.id = m_module.consti32(0); } } - break; + break; case GcnOperandField::SignedConstIntPos: { constexpr int32_t InlineConstZero = 128; int32_t value = reg.code - InlineConstZero; result.low.id = m_module.consti32(value); - result.low.type.ctype = GcnScalarType::Sint32; if (doubleType) { result.high.id = m_module.consti32(0); - result.high.type.ctype = GcnScalarType::Sint32; } } - break; + break; case GcnOperandField::SignedConstIntNeg: { constexpr int32_t InlineConst64 = 192; int32_t value = InlineConst64 - reg.code; result.low.id = m_module.consti32(value); - result.low.type.ctype = GcnScalarType::Sint32; if (doubleType) { result.high.id = m_module.consti32(0); - result.high.type.ctype = GcnScalarType::Sint32; } } break; @@ -2549,6 +2539,20 @@ namespace sce::gcn break; } + if (field >= GcnOperandField::ConstZero && field <= GcnOperandField::SignedConstIntNeg) + { + result.low.type.ctype = GcnScalarType::Sint32; + } + else + { + result.low.type.ctype = doubleType + ? GcnScalarType::Float64 + : GcnScalarType::Float32; + } + + result.low.type.ccount = 1; + result.high.type = result.low.type; + return result; } @@ -3202,5 +3206,4 @@ namespace sce::gcn } - } // namespace sce::gcn \ No newline at end of file diff --git a/GPCS4/Graphics/Gcn/GcnCompiler.h b/GPCS4/Graphics/Gcn/GcnCompiler.h index 71a8f649..aa2c788b 100644 --- a/GPCS4/Graphics/Gcn/GcnCompiler.h +++ b/GPCS4/Graphics/Gcn/GcnCompiler.h @@ -221,7 +221,8 @@ namespace sce::gcn void emitLaneRead( const GcnShaderInstruction& ins); GcnRegisterValue emitQueryTextureSize( - const GcnShaderInstruction& ins); + const GcnShaderInstruction& ins, + const GcnRegisterValue& lod); GcnRegisterValue emitQueryTextureLevels( const GcnShaderInstruction& ins); void emitQueryTextureInfo( @@ -452,6 +453,14 @@ namespace sce::gcn GcnRegisterValue emitLoadTexOffset( const GcnShaderInstruction& ins); + GcnRegisterValue emitCalcQueryLod( + const GcnRegisterValue& lod); + + GcnRegisterValue emitApplyTexOffset( + const GcnShaderInstruction& ins, + const GcnRegisterValue& coord, + const GcnRegisterValue& lod); + GcnRegisterValue emitRecoverCubeCoord( const GcnRegisterValue& coord); diff --git a/GPCS4/Graphics/Gcn/GcnCompilerVectorALU.cpp b/GPCS4/Graphics/Gcn/GcnCompilerVectorALU.cpp index 430b82cc..646f2d3a 100644 --- a/GPCS4/Graphics/Gcn/GcnCompilerVectorALU.cpp +++ b/GPCS4/Graphics/Gcn/GcnCompilerVectorALU.cpp @@ -406,12 +406,18 @@ namespace sce::gcn // VectorThreadMask case GcnOpcode::V_CNDMASK_B32: { - auto smask = ins.encoding == GcnInstEncoding::VOP3 ? - src[2].low : - m_state.vcc.emitLoad(GcnRegMask::select(0)).low; + auto smask = ins.encoding == GcnInstEncoding::VOP3 + ? src[2].low + : m_state.vcc.emitLoad(GcnRegMask::select(0)).low; + auto eqMask = emitCommonSystemValueLoad( + GcnSystemValue::SubgroupEqMask, GcnRegMask::select(0)); + + auto mask = smask; + mask.id = m_module.opBitwiseAnd( + typeId, smask.id, eqMask.id); // Should we calculate the LSB of the mask? - auto condition = emitRegisterZeroTest(smask, GcnZeroTest::TestNz); + auto condition = emitRegisterZeroTest(mask, GcnZeroTest::TestNz); dst.low.id = m_module.opSelect(typeId, condition.id, src[1].low.id, diff --git a/GPCS4/Graphics/Gcn/GcnCompilerVectorMemory.cpp b/GPCS4/Graphics/Gcn/GcnCompilerVectorMemory.cpp index 75c300c0..fa37fba3 100644 --- a/GPCS4/Graphics/Gcn/GcnCompilerVectorMemory.cpp +++ b/GPCS4/Graphics/Gcn/GcnCompilerVectorMemory.cpp @@ -193,7 +193,9 @@ namespace sce::gcn } } - GcnRegisterValue GcnCompiler::emitQueryTextureSize(const GcnShaderInstruction& ins) + GcnRegisterValue GcnCompiler::emitQueryTextureSize( + const GcnShaderInstruction& ins, + const GcnRegisterValue& lod) { const GcnInstOperand& textureReg = ins.src[2]; const uint32_t textureId = textureReg.code * 4; @@ -205,12 +207,10 @@ namespace sce::gcn if (info.imageInfo.ms == 0 && info.imageInfo.sampled == 1) { - auto lod = emitRegisterLoad(ins.src[0]); - result.id = m_module.opImageQuerySizeLod( getVectorTypeId(result.type), m_module.opLoad(info.imageTypeId, info.varId), - lod.low.id); + lod.id); } else { @@ -250,8 +250,10 @@ namespace sce::gcn { GcnImageResFlags flags = GcnImageResFlags(ins.control.mimg.dmask); - GcnRegisterValue textureSize = emitQueryTextureSize(ins); - GcnRegisterValue textureLevel = {}; + auto lod = emitRegisterLoad(ins.src[0]); + GcnRegisterValue textureSize = emitQueryTextureSize(ins, lod.low); + GcnRegisterValue textureLevel = {}; + if (flags.test(GcnImageResComponent::MipCount)) { textureLevel = emitQueryTextureLevels(ins); @@ -720,12 +722,7 @@ namespace sce::gcn if (flags.test(GcnMimgModifier::Offset)) { - // m_module.enableCapability(spv::CapabilityImageGatherExtended); - - auto offset = emitLoadTexOffset(ins); - - imageOperands.flags |= spv::ImageOperandsOffsetMask; - imageOperands.sConstOffset = offset.id; + coord = emitApplyTexOffset(ins, coord, lod); } // Combine the texture and the sampler into a sampled image @@ -881,7 +878,7 @@ namespace sce::gcn GcnRegisterValue GcnCompiler::emitLoadTexOffset(const GcnShaderInstruction& ins) { - const uint32_t typeId = getScalarTypeId(GcnScalarType::Uint32); + const uint32_t typeId = getScalarTypeId(GcnScalarType::Sint32); auto offsets = emitLoadAddrComponent(GcnImageAddrComponent::Offsets, ins); const GcnImageInfo imageInfo = getImageInfo(ins); @@ -890,7 +887,7 @@ namespace sce::gcn util::static_vector components; for (uint32_t i = 0; i != dim; ++i) { - uint32_t offsetId = m_module.opBitFieldUExtract(typeId, + uint32_t offsetId = m_module.opBitFieldSExtract(typeId, offsets.id, m_module.constu32(i * 8), m_module.constu32(6)); @@ -898,13 +895,74 @@ namespace sce::gcn } GcnRegisterValue result = {}; - result.type.ctype = GcnScalarType::Uint32; + result.type.ctype = GcnScalarType::Sint32; result.type.ccount = components.size(); result.id = m_module.opCompositeConstruct(getVectorTypeId(result.type), components.size(), components.data()); return result; } + GcnRegisterValue GcnCompiler::emitCalcQueryLod( + const GcnRegisterValue& lod) + { + GcnRegisterValue result; + result.type.ctype = GcnScalarType::Uint32; + result.type.ccount = 1; + if (result.id == 0) + { + // If instruction doesn't have lod component, + // we need to feed a zero lod for image size query. + result.id = m_module.constu32(0); + } + else + { + result.id = m_module.opConvertFtoU( + getVectorTypeId(result.type), + lod.id); + } + return result; + } + + GcnRegisterValue GcnCompiler::emitApplyTexOffset( + const GcnShaderInstruction& ins, + const GcnRegisterValue& coord, + const GcnRegisterValue& lod) + { + // Spir-v doesn't allow non-constant texture offset for OpImageSample*, + // so we need to calculate the coordinate manually. + + GcnRegisterValue result; + + // Calculate lod used to query image size + auto queryLod = emitCalcQueryLod(lod); + + // Unnormalized texel coordinate + auto size_u = emitQueryTextureSize(ins, queryLod); + + result.type.ctype = GcnScalarType::Float32; + result.type.ccount = size_u.type.ccount; + const uint32_t typeId = getVectorTypeId(result.type); + + uint32_t size_f = m_module.opConvertUtoF(typeId, + size_u.id); + uint32_t coordUnorm = m_module.opFMul(typeId, coord.id, size_f); + + // Apply offset + auto offset_s = emitLoadTexOffset(ins); + uint32_t offset_f = m_module.opConvertStoF(typeId, + offset_s.id); + uint32_t coordAdjusted = m_module.opFAdd(typeId, + coordUnorm, + offset_f); + + // Normalized texel coordinate + result.id = m_module.opFDiv(typeId, + coordAdjusted, + size_f); + + return result; + } + GcnRegisterValue GcnCompiler::emitRecoverCubeCoord( const GcnRegisterValue& coord) { @@ -960,9 +1018,9 @@ namespace sce::gcn } else { - auto type = flags.test(GcnMimgModifier::Offset) - ? GcnScalarType::Uint32 - : GcnScalarType::Float32; + type = flags.test(GcnMimgModifier::Offset) + ? GcnScalarType::Sint32 + : GcnScalarType::Float32; } return emitRegisterBitcast(emitVgprLoad(reg), type); } diff --git a/GPCS4/Graphics/Violet/VltAdapter.cpp b/GPCS4/Graphics/Violet/VltAdapter.cpp index 898879db..a8f84b54 100644 --- a/GPCS4/Graphics/Violet/VltAdapter.cpp +++ b/GPCS4/Graphics/Violet/VltAdapter.cpp @@ -418,15 +418,16 @@ namespace sce::vlt // Modify and add request features as development goes. // Create pNext chain for additional device features - enabled.core.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - enabled.core.pNext = nullptr; - enabled.core.features.geometryShader = VK_TRUE; - enabled.core.features.samplerAnisotropy = supported.core.features.samplerAnisotropy; - enabled.core.features.shaderFloat64 = VK_TRUE; - enabled.core.features.shaderInt64 = VK_TRUE; - enabled.core.features.tessellationShader = VK_TRUE; - enabled.core.features.logicOp = VK_TRUE; - enabled.core.features.imageCubeArray = VK_TRUE; + enabled.core.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + enabled.core.pNext = nullptr; + enabled.core.features.geometryShader = VK_TRUE; + enabled.core.features.samplerAnisotropy = supported.core.features.samplerAnisotropy; + enabled.core.features.shaderFloat64 = VK_TRUE; + enabled.core.features.shaderInt64 = VK_TRUE; + enabled.core.features.tessellationShader = VK_TRUE; + enabled.core.features.logicOp = VK_TRUE; + enabled.core.features.imageCubeArray = VK_TRUE; + enabled.core.features.shaderImageGatherExtended = VK_TRUE; enabled.vk11.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; enabled.vk11.pNext = std::exchange(enabled.core.pNext, &enabled.vk11);