fix some gcn compiler errors

This commit is contained in:
Asuka 2022-07-14 02:00:36 +08:00
parent ac061b2fe1
commit ff18254a18
5 changed files with 124 additions and 47 deletions

View file

@ -2477,10 +2477,6 @@ namespace sce::gcn
GcnRegisterValuePair GcnCompiler::emitBuildInlineConst(const GcnInstOperand& reg)
{
GcnRegisterValuePair result = {};
result.low.type.ctype = (reg.type == GcnScalarType::Uint64 ||
reg.type == GcnScalarType::Sint64) ? getHalfType(reg.type) : reg.type;
result.low.type.ccount = 1;
result.high.type = result.low.type;
bool doubleType = isDoubleType(reg.type);
auto field = reg.field;
@ -2488,38 +2484,32 @@ namespace sce::gcn
{
case GcnOperandField::ConstZero:
{
result.low.id = m_module.consti32(0);
result.low.type.ctype = GcnScalarType::Sint32;
result.low.id = m_module.consti32(0);
if (doubleType)
{
result.high.id = m_module.consti32(0);
result.high.type.ctype = GcnScalarType::Sint32;
result.high.id = m_module.consti32(0);
}
}
break;
break;
case GcnOperandField::SignedConstIntPos:
{
constexpr int32_t InlineConstZero = 128;
int32_t value = reg.code - InlineConstZero;
result.low.id = m_module.consti32(value);
result.low.type.ctype = GcnScalarType::Sint32;
if (doubleType)
{
result.high.id = m_module.consti32(0);
result.high.type.ctype = GcnScalarType::Sint32;
}
}
break;
break;
case GcnOperandField::SignedConstIntNeg:
{
constexpr int32_t InlineConst64 = 192;
int32_t value = InlineConst64 - reg.code;
result.low.id = m_module.consti32(value);
result.low.type.ctype = GcnScalarType::Sint32;
if (doubleType)
{
result.high.id = m_module.consti32(0);
result.high.type.ctype = GcnScalarType::Sint32;
}
}
break;
@ -2549,6 +2539,20 @@ namespace sce::gcn
break;
}
if (field >= GcnOperandField::ConstZero && field <= GcnOperandField::SignedConstIntNeg)
{
result.low.type.ctype = GcnScalarType::Sint32;
}
else
{
result.low.type.ctype = doubleType
? GcnScalarType::Float64
: GcnScalarType::Float32;
}
result.low.type.ccount = 1;
result.high.type = result.low.type;
return result;
}
@ -3202,5 +3206,4 @@ namespace sce::gcn
}
} // namespace sce::gcn

View file

@ -221,7 +221,8 @@ namespace sce::gcn
void emitLaneRead(
const GcnShaderInstruction& ins);
GcnRegisterValue emitQueryTextureSize(
const GcnShaderInstruction& ins);
const GcnShaderInstruction& ins,
const GcnRegisterValue& lod);
GcnRegisterValue emitQueryTextureLevels(
const GcnShaderInstruction& ins);
void emitQueryTextureInfo(
@ -452,6 +453,14 @@ namespace sce::gcn
GcnRegisterValue emitLoadTexOffset(
const GcnShaderInstruction& ins);
GcnRegisterValue emitCalcQueryLod(
const GcnRegisterValue& lod);
GcnRegisterValue emitApplyTexOffset(
const GcnShaderInstruction& ins,
const GcnRegisterValue& coord,
const GcnRegisterValue& lod);
GcnRegisterValue emitRecoverCubeCoord(
const GcnRegisterValue& coord);

View file

@ -406,12 +406,18 @@ namespace sce::gcn
// VectorThreadMask
case GcnOpcode::V_CNDMASK_B32:
{
auto smask = ins.encoding == GcnInstEncoding::VOP3 ?
src[2].low :
m_state.vcc.emitLoad(GcnRegMask::select(0)).low;
auto smask = ins.encoding == GcnInstEncoding::VOP3
? src[2].low
: m_state.vcc.emitLoad(GcnRegMask::select(0)).low;
auto eqMask = emitCommonSystemValueLoad(
GcnSystemValue::SubgroupEqMask, GcnRegMask::select(0));
auto mask = smask;
mask.id = m_module.opBitwiseAnd(
typeId, smask.id, eqMask.id);
// Should we calculate the LSB of the mask?
auto condition = emitRegisterZeroTest(smask, GcnZeroTest::TestNz);
auto condition = emitRegisterZeroTest(mask, GcnZeroTest::TestNz);
dst.low.id = m_module.opSelect(typeId,
condition.id,
src[1].low.id,

View file

@ -193,7 +193,9 @@ namespace sce::gcn
}
}
GcnRegisterValue GcnCompiler::emitQueryTextureSize(const GcnShaderInstruction& ins)
GcnRegisterValue GcnCompiler::emitQueryTextureSize(
const GcnShaderInstruction& ins,
const GcnRegisterValue& lod)
{
const GcnInstOperand& textureReg = ins.src[2];
const uint32_t textureId = textureReg.code * 4;
@ -205,12 +207,10 @@ namespace sce::gcn
if (info.imageInfo.ms == 0 && info.imageInfo.sampled == 1)
{
auto lod = emitRegisterLoad(ins.src[0]);
result.id = m_module.opImageQuerySizeLod(
getVectorTypeId(result.type),
m_module.opLoad(info.imageTypeId, info.varId),
lod.low.id);
lod.id);
}
else
{
@ -250,8 +250,10 @@ namespace sce::gcn
{
GcnImageResFlags flags = GcnImageResFlags(ins.control.mimg.dmask);
GcnRegisterValue textureSize = emitQueryTextureSize(ins);
GcnRegisterValue textureLevel = {};
auto lod = emitRegisterLoad(ins.src[0]);
GcnRegisterValue textureSize = emitQueryTextureSize(ins, lod.low);
GcnRegisterValue textureLevel = {};
if (flags.test(GcnImageResComponent::MipCount))
{
textureLevel = emitQueryTextureLevels(ins);
@ -720,12 +722,7 @@ namespace sce::gcn
if (flags.test(GcnMimgModifier::Offset))
{
// m_module.enableCapability(spv::CapabilityImageGatherExtended);
auto offset = emitLoadTexOffset(ins);
imageOperands.flags |= spv::ImageOperandsOffsetMask;
imageOperands.sConstOffset = offset.id;
coord = emitApplyTexOffset(ins, coord, lod);
}
// Combine the texture and the sampler into a sampled image
@ -881,7 +878,7 @@ namespace sce::gcn
GcnRegisterValue GcnCompiler::emitLoadTexOffset(const GcnShaderInstruction& ins)
{
const uint32_t typeId = getScalarTypeId(GcnScalarType::Uint32);
const uint32_t typeId = getScalarTypeId(GcnScalarType::Sint32);
auto offsets = emitLoadAddrComponent(GcnImageAddrComponent::Offsets, ins);
const GcnImageInfo imageInfo = getImageInfo(ins);
@ -890,7 +887,7 @@ namespace sce::gcn
util::static_vector<uint32_t, 3> components;
for (uint32_t i = 0; i != dim; ++i)
{
uint32_t offsetId = m_module.opBitFieldUExtract(typeId,
uint32_t offsetId = m_module.opBitFieldSExtract(typeId,
offsets.id,
m_module.constu32(i * 8),
m_module.constu32(6));
@ -898,13 +895,74 @@ namespace sce::gcn
}
GcnRegisterValue result = {};
result.type.ctype = GcnScalarType::Uint32;
result.type.ctype = GcnScalarType::Sint32;
result.type.ccount = components.size();
result.id = m_module.opCompositeConstruct(getVectorTypeId(result.type),
components.size(), components.data());
return result;
}
GcnRegisterValue GcnCompiler::emitCalcQueryLod(
const GcnRegisterValue& lod)
{
GcnRegisterValue result;
result.type.ctype = GcnScalarType::Uint32;
result.type.ccount = 1;
if (result.id == 0)
{
// If instruction doesn't have lod component,
// we need to feed a zero lod for image size query.
result.id = m_module.constu32(0);
}
else
{
result.id = m_module.opConvertFtoU(
getVectorTypeId(result.type),
lod.id);
}
return result;
}
GcnRegisterValue GcnCompiler::emitApplyTexOffset(
const GcnShaderInstruction& ins,
const GcnRegisterValue& coord,
const GcnRegisterValue& lod)
{
// Spir-v doesn't allow non-constant texture offset for OpImageSample*,
// so we need to calculate the coordinate manually.
GcnRegisterValue result;
// Calculate lod used to query image size
auto queryLod = emitCalcQueryLod(lod);
// Unnormalized texel coordinate
auto size_u = emitQueryTextureSize(ins, queryLod);
result.type.ctype = GcnScalarType::Float32;
result.type.ccount = size_u.type.ccount;
const uint32_t typeId = getVectorTypeId(result.type);
uint32_t size_f = m_module.opConvertUtoF(typeId,
size_u.id);
uint32_t coordUnorm = m_module.opFMul(typeId, coord.id, size_f);
// Apply offset
auto offset_s = emitLoadTexOffset(ins);
uint32_t offset_f = m_module.opConvertStoF(typeId,
offset_s.id);
uint32_t coordAdjusted = m_module.opFAdd(typeId,
coordUnorm,
offset_f);
// Normalized texel coordinate
result.id = m_module.opFDiv(typeId,
coordAdjusted,
size_f);
return result;
}
GcnRegisterValue GcnCompiler::emitRecoverCubeCoord(
const GcnRegisterValue& coord)
{
@ -960,9 +1018,9 @@ namespace sce::gcn
}
else
{
auto type = flags.test(GcnMimgModifier::Offset)
? GcnScalarType::Uint32
: GcnScalarType::Float32;
type = flags.test(GcnMimgModifier::Offset)
? GcnScalarType::Sint32
: GcnScalarType::Float32;
}
return emitRegisterBitcast(emitVgprLoad(reg), type);
}

View file

@ -418,15 +418,16 @@ namespace sce::vlt
// Modify and add request features as development goes.
// Create pNext chain for additional device features
enabled.core.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
enabled.core.pNext = nullptr;
enabled.core.features.geometryShader = VK_TRUE;
enabled.core.features.samplerAnisotropy = supported.core.features.samplerAnisotropy;
enabled.core.features.shaderFloat64 = VK_TRUE;
enabled.core.features.shaderInt64 = VK_TRUE;
enabled.core.features.tessellationShader = VK_TRUE;
enabled.core.features.logicOp = VK_TRUE;
enabled.core.features.imageCubeArray = VK_TRUE;
enabled.core.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
enabled.core.pNext = nullptr;
enabled.core.features.geometryShader = VK_TRUE;
enabled.core.features.samplerAnisotropy = supported.core.features.samplerAnisotropy;
enabled.core.features.shaderFloat64 = VK_TRUE;
enabled.core.features.shaderInt64 = VK_TRUE;
enabled.core.features.tessellationShader = VK_TRUE;
enabled.core.features.logicOp = VK_TRUE;
enabled.core.features.imageCubeArray = VK_TRUE;
enabled.core.features.shaderImageGatherExtended = VK_TRUE;
enabled.vk11.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
enabled.vk11.pNext = std::exchange(enabled.core.pNext, &enabled.vk11);