GPCS4/GPCS4/Graphics/Gcn/GcnCompilerVectorMemory.cpp

#include "GcnCompiler.h"
#include "GcnInstructionUtil.h"
#include "UtilVector.h"

using namespace sce::vlt;

LOG_CHANNEL(Graphic.Gcn.GcnCompiler);

namespace sce::gcn
{
	void GcnCompiler::emitVectorMemory(const GcnShaderInstruction& ins)
	{
		auto opClass = ins.opClass;
		switch (opClass)
		{
			case GcnInstClass::VectorMemBufNoFmt:
				this->emitVectorMemBufNoFmt(ins);
				break;
			case GcnInstClass::VectorMemBufFmt:
				this->emitVectorMemBufFmt(ins);
				break;
			case GcnInstClass::VectorMemBufAtomic:
				this->emitVectorMemBufAtomic(ins);
				break;
			case GcnInstClass::VectorMemImgNoSmp:
				this->emitVectorMemImgNoSmp(ins);
				break;
			case GcnInstClass::VectorMemImgSmp:
				this->emitVectorMemImgSmp(ins);
				break;
			case GcnInstClass::VectorMemImgUt:
				this->emitVectorMemImgUt(ins);
				break;
			case GcnInstClass::VectorMemL1Cache:
				this->emitVectorMemL1Cache(ins);
				break;
		}
	}

	void GcnCompiler::emitVectorMemBufNoFmt(const GcnShaderInstruction& ins)
	{
		emitVectorMemBuffer(ins);
	}

	void GcnCompiler::emitVectorMemBufFmt(const GcnShaderInstruction& ins)
	{
		emitVectorMemBuffer(ins);
	}

	void GcnCompiler::emitVectorMemBufAtomic(const GcnShaderInstruction& ins)
	{
		auto src     = emitRegisterLoad(ins.src[1]);
		auto ptrList = emitGetBufferComponentPtr(ins, false);

		LOG_ASSERT(ins.control.mubuf.slc == 0, "TODO: support GLC and SLC.");

		GcnRegisterValuePair dst = {};
		dst.low.type.ctype       = getDestinationType(ins.src[1].type);
		dst.low.type.ccount      = 1;
		dst.high.type            = dst.low.type;

		const uint32_t typeId = getScalarTypeId(dst.low.type.ctype);

		// should we use device scope?
		uint32_t scope     = spv::ScopeDevice;
		uint32_t semantics = spv::MemorySemanticsUniformMemoryMask |
							 spv::MemorySemanticsAcquireReleaseMask;

		const uint32_t scopeId     = m_module.constu32(scope);
		const uint32_t semanticsId = m_module.constu32(semantics);

		auto op = ins.opcode;
		switch (op)
		{
			case GcnOpcode::BUFFER_ATOMIC_ADD:
				dst.low.id = m_module.opAtomicIAdd(typeId,
												   ptrList[0].id,
												   scopeId, semanticsId,
												   src.low.id);
				break;
			default:
				LOG_GCN_UNHANDLED_INST();
				break;
		}

		bool saveOriginal = ins.control.mubuf.glc != 0;
		if (saveOriginal)
		{
			emitRegisterStore(ins.src[1], dst);
		}
	}

	void GcnCompiler::emitVectorMemImgNoSmp(const GcnShaderInstruction& ins)
	{
		auto op = ins.opcode;
		switch (op)
		{
			case GcnOpcode::IMAGE_LOAD_MIP:
				emitStorageImageLoad(ins);
				break;
			default:
				LOG_GCN_UNHANDLED_INST();
				break;
		}
	}

	void GcnCompiler::emitVectorMemImgSmp(const GcnShaderInstruction& ins)
	{
		auto op = ins.opcode;
		switch (op)
		{
			case GcnOpcode::IMAGE_SAMPLE:
			case GcnOpcode::IMAGE_SAMPLE_L:
			case GcnOpcode::IMAGE_SAMPLE_LZ_O:
			case GcnOpcode::IMAGE_SAMPLE_LZ:
			case GcnOpcode::IMAGE_SAMPLE_C:
				emitTextureSample(ins);
				break;
			default:
				LOG_GCN_UNHANDLED_INST();
				break;
		}
	}

	void GcnCompiler::emitVectorMemImgUt(const GcnShaderInstruction& ins)
	{
		auto op = ins.opcode;
		switch (op)
		{
			case GcnOpcode::IMAGE_GET_RESINFO:
				this->emitQueryTextureInfo(ins);
				break;
			case GcnOpcode::IMAGE_GET_LOD:
				this->emitQueryTextureLod(ins);
				break;
			default:
				LOG_GCN_UNHANDLED_INST();
				break;
		}
	}

	void GcnCompiler::emitVectorMemL1Cache(const GcnShaderInstruction& ins)
	{
		LOG_GCN_UNHANDLED_INST();
	}

	void GcnCompiler::emitVectorMemBuffer(const GcnShaderInstruction& ins)
	{
		auto op = ins.opcode;
		switch (op)
		{
			case GcnOpcode::BUFFER_LOAD_FORMAT_X:
			case GcnOpcode::BUFFER_LOAD_FORMAT_XY:
			case GcnOpcode::BUFFER_LOAD_FORMAT_XYZ:
			case GcnOpcode::BUFFER_LOAD_FORMAT_XYZW:
			case GcnOpcode::TBUFFER_LOAD_FORMAT_X:
			case GcnOpcode::TBUFFER_LOAD_FORMAT_XY:
			case GcnOpcode::TBUFFER_LOAD_FORMAT_XYZ:
			case GcnOpcode::TBUFFER_LOAD_FORMAT_XYZW:
				emitBufferLoadStoreFmt(ins, true);
				break;
			case GcnOpcode::BUFFER_STORE_FORMAT_X:
			case GcnOpcode::BUFFER_STORE_FORMAT_XY:
			case GcnOpcode::BUFFER_STORE_FORMAT_XYZ:
			case GcnOpcode::BUFFER_STORE_FORMAT_XYZW:
			case GcnOpcode::TBUFFER_STORE_FORMAT_X:
			case GcnOpcode::TBUFFER_STORE_FORMAT_XY:
			case GcnOpcode::TBUFFER_STORE_FORMAT_XYZ:
			case GcnOpcode::TBUFFER_STORE_FORMAT_XYZW:
				emitBufferLoadStoreFmt(ins, false);
				break;
			case GcnOpcode::BUFFER_LOAD_UBYTE:
			case GcnOpcode::BUFFER_LOAD_SBYTE:
			case GcnOpcode::BUFFER_LOAD_USHORT:
			case GcnOpcode::BUFFER_LOAD_SSHORT:
			case GcnOpcode::BUFFER_LOAD_DWORD:
			case GcnOpcode::BUFFER_LOAD_DWORDX2:
			case GcnOpcode::BUFFER_LOAD_DWORDX4:
			case GcnOpcode::BUFFER_LOAD_DWORDX3:
				emitBufferLoadStoreNoFmt(ins, true);
				break;
			case GcnOpcode::BUFFER_STORE_BYTE:
			case GcnOpcode::BUFFER_STORE_SHORT:
			case GcnOpcode::BUFFER_STORE_DWORD:
			case GcnOpcode::BUFFER_STORE_DWORDX2:
			case GcnOpcode::BUFFER_STORE_DWORDX4:
			case GcnOpcode::BUFFER_STORE_DWORDX3:
				emitBufferLoadStoreNoFmt(ins, false);
				break;
			default:
				LOG_GCN_UNHANDLED_INST();
				break;
		}
	}

	GcnRegisterValue GcnCompiler::emitQueryTextureSize(
		const GcnShaderInstruction& ins,
		const GcnRegisterValue&     lod)
	{
		const GcnInstOperand& textureReg = ins.src[2];
		const uint32_t        textureId  = textureReg.code * 4;
		const GcnTexture&     info       = m_textures.at(textureId);

		GcnRegisterValue result;
		result.type.ctype  = GcnScalarType::Uint32;
		result.type.ccount = getTexSizeDim(info.imageInfo);

		if (info.imageInfo.ms == 0 && info.imageInfo.sampled == 1)
		{
			result.id = m_module.opImageQuerySizeLod(
				getVectorTypeId(result.type),
				m_module.opLoad(info.imageTypeId, info.varId),
				lod.id);
		}
		else
		{
			result.id = m_module.opImageQuerySize(
				getVectorTypeId(result.type),
				m_module.opLoad(info.imageTypeId, info.varId));
		}

		return result;
	}

	GcnRegisterValue GcnCompiler::emitQueryTextureLevels(const GcnShaderInstruction& ins)
	{
		const GcnInstOperand& textureReg = ins.src[2];
		const uint32_t        textureId  = textureReg.code * 4;
		const GcnTexture&     info       = m_textures.at(textureId);

		GcnRegisterValue result;
		result.type.ctype  = GcnScalarType::Uint32;
		result.type.ccount = 1;

		if (info.imageInfo.sampled == 1)
		{
			result.id = m_module.opImageQueryLevels(
				getVectorTypeId(result.type),
				m_module.opLoad(info.imageTypeId, info.varId));
		}
		else
		{
			// Report one LOD in case of UAVs
			result.id = m_module.constu32(1);
		}
		return result;
	}

	void GcnCompiler::emitQueryTextureInfo(const GcnShaderInstruction& ins)
	{
		GcnImageResFlags flags = GcnImageResFlags(ins.control.mimg.dmask);

		auto             lod          = emitRegisterLoad(ins.src[0]);
		GcnRegisterValue textureSize  = emitQueryTextureSize(ins, lod.low);
		GcnRegisterValue textureLevel = {};

		if (flags.test(GcnImageResComponent::MipCount))
		{
			textureLevel = emitQueryTextureLevels(ins);
		}

		auto     vdata = ins.src[1];
		uint32_t index = vdata.code;
		if (flags.test(GcnImageResComponent::Width))
		{
			vdata.code = index++;
			auto value = emitRegisterExtract(textureSize, GcnRegMask::select(0));
			emitVgprStore(vdata, value);
		}

		if (flags.test(GcnImageResComponent::Height))
		{
			vdata.code = index++;
			auto value = emitRegisterExtract(textureSize, GcnRegMask::select(1));
			emitVgprStore(vdata, value);
		}

		if (flags.test(GcnImageResComponent::Depth))
		{
			vdata.code = index++;
			auto value = emitRegisterExtract(textureSize, GcnRegMask::select(2));
			emitVgprStore(vdata, value);
		}

		if (flags.test(GcnImageResComponent::MipCount))
		{
			vdata.code = index++;
			emitVgprStore(vdata, textureLevel);
		}
	}

	void GcnCompiler::emitQueryTextureLod(const GcnShaderInstruction& ins)
	{
		LOG_GCN_UNHANDLED_INST();
	}

	GcnRegisterValue GcnCompiler::emitCalcBufferAddress(
		const GcnShaderInstruction& ins)
	{
		bool     idxen     = false;
		bool     offen     = false;
		uint32_t optOffset = 0;
		if (ins.encoding == GcnInstEncoding::MUBUF)
		{
			idxen     = ins.control.mubuf.idxen;
			offen     = ins.control.mubuf.offen;
			optOffset = ins.control.mubuf.offset;
		}
		else
		{
			idxen     = ins.control.mtbuf.idxen;
			offen     = ins.control.mtbuf.offen;
			optOffset = ins.control.mtbuf.offset;
		}

		const uint32_t zero       = m_module.constu32(0);
		auto           bufferInfo = getBufferType(ins.src[2]);

		const uint32_t typdId = getScalarTypeId(GcnScalarType::Uint32);

		auto soff = emitRegisterLoad(ins.src[3]);
		// sV#.base is zero in our case.
		uint32_t base  = soff.low.id;
		uint32_t index = idxen ? emitRegisterLoad(ins.src[0]).low.id : zero;

		GcnInstOperand offsetReg = ins.src[0];
		offsetReg.code += static_cast<uint32_t>(idxen);

		uint32_t offset = offen ? emitRegisterLoad(offsetReg).low.id : zero;
		offset          = m_module.opIAdd(typdId, offset, m_module.constu32(optOffset));

		uint32_t stride = m_module.constu32(bufferInfo.buffer.stride);

		LOG_ASSERT(bufferInfo.buffer.isSwizzle == false, "TODO: support swizzle buffer.");

		// Note the returned address is in bytes.
		GcnRegisterValue result;
		result.type.ctype  = GcnScalarType::Uint32;
		result.type.ccount = 1;

		result.id = m_module.opIAdd(typdId,
									m_module.opIAdd(typdId, base, offset),
									m_module.opIMul(typdId, index, stride));
		return result;
	}

	std::vector<GcnRegisterPointer>
	GcnCompiler::emitGetBufferComponentPtr(const GcnShaderInstruction& ins,
										   bool                        isFormat)
	{
		auto op = ins.opcode;

		auto bufferInfo = getBufferType(ins.src[2]);

		Gnm::BufferFormat      dfmt;
		Gnm::BufferChannelType nfmt;
		uint32_t               size = 0;

		if (ins.encoding == GcnInstEncoding::MUBUF)
		{
			dfmt = bufferInfo.buffer.dfmt;
			nfmt = bufferInfo.buffer.nfmt;
			size = ins.control.mubuf.size;
		}
		else
		{
			dfmt = (Gnm::BufferFormat)ins.control.mtbuf.dfmt;
			nfmt = (Gnm::BufferChannelType)ins.control.mtbuf.nfmt;
		}

		GcnRegisterInfo info;
		info.type.ctype   = GcnScalarType::Uint32;
		info.type.ccount  = 1;
		info.type.alength = 0;
		info.sclass       = spv::StorageClassUniform;

		uint32_t ptrTypeId  = getPointerTypeId(info);
		uint32_t uintTypeId = getScalarTypeId(GcnScalarType::Uint32);

		uint32_t dataCount = 0;
		if (isFormat)
		{
			auto bufferFormat = getBufferFormat(dfmt, nfmt);
			dataCount         = bufferFormat.sizeInBytes < 4 ? 1 : bufferFormat.sizeInBytes / 4;
		}
		else
		{
			LOG_ASSERT(size != 0, "error instruction size.");
			dataCount = size > 4 ? size / 4 : 1;
		}

		LOG_ASSERT(nfmt == Gnm::kBufferChannelTypeFloat ||
					   nfmt == Gnm::kBufferChannelTypeUInt ||
					   nfmt == Gnm::kBufferChannelTypeSInt,
				   "TODO: support encoded channel type.");

		auto address = emitCalcBufferAddress(ins);

		std::vector<GcnRegisterPointer> ptrList;
		if (bufferInfo.isSsbo)
		{
			ptrList = emitStorageBufferAccess(bufferInfo.varId,
											  address.id,
											  dataCount);
		}
		else
		{
			ptrList = emitUniformBufferAccess(bufferInfo.varId,
											  address.id,
											  dataCount);
		}
		return ptrList;
	}

	void GcnCompiler::emitBufferLoadStoreNoFmt(const GcnShaderInstruction& ins,
											   bool                        isLoad)
	{
		auto     op         = ins.opcode;
		auto     bufferInfo = getBufferType(ins.src[2]);
		uint32_t size       = ins.control.mubuf.size;

		bool isSigned = op == GcnOpcode::BUFFER_LOAD_SBYTE ||
						op == GcnOpcode::BUFFER_LOAD_SSHORT;

		GcnScalarType dataType   = bufferInfo.isSsbo ? GcnScalarType::Uint32 : GcnScalarType::Float32;
		uint32_t      dataTypeId = getScalarTypeId(dataType);

		auto ptrList = emitGetBufferComponentPtr(ins, false);

		uint32_t vgprCount = size > 4 ? size / 4 : 1;
		for (uint32_t i = 0; i != vgprCount; ++i)
		{
			const auto&    ptr = ptrList[i];
			GcnInstOperand reg = ins.src[1];
			reg.code += i;

			if (isLoad)
			{
				uint32_t         dataId = m_module.opLoad(dataTypeId,
														  ptr.id);
				GcnRegisterValue value;
				value.type.ctype  = dataType;
				value.type.ccount = 1;

				if (size < 4)
				{
					uint32_t uintTypeId = getScalarTypeId(GcnScalarType::Uint32);
					uint32_t sintTypeId = getScalarTypeId(GcnScalarType::Sint32);

					uint32_t src = dataId;
					if (isSigned)
					{
						src = m_module.opBitcast(sintTypeId, src);
					}
					else if (dataType == GcnScalarType::Float32)
					{
						src = m_module.opBitcast(uintTypeId, src);
					}

					value.id = isSigned ? m_module.opBitFieldSExtract(sintTypeId,
																	  src,
																	  m_module.constu32(0),
																	  m_module.constu32(8 * size))
										: m_module.opBitFieldUExtract(uintTypeId,
																	  src,
																	  m_module.constu32(0),
																	  m_module.constu32(8 * size));
				}
				else
				{
					value.id = dataId;
				}

				emitVgprStore(reg, value);
			}
			else
			{
				auto value = emitVgprLoad(reg);
				if (size < 4)
				{
					LOG_ASSERT(false, "support byte and ushort store.");
				}
				else
				{
					value = emitRegisterBitcast(value, dataType);
				}
				m_module.opStore(ptr.id, value.id);
			}
		}
	}

	void GcnCompiler::emitBufferLoadStoreFmt(const GcnShaderInstruction& ins, bool isLoad)
	{
		auto bufferInfo = getBufferType(ins.src[2]);

		uint32_t               count = 0;
		Gnm::BufferFormat      dfmt;
		Gnm::BufferChannelType nfmt;

		if (ins.encoding == GcnInstEncoding::MUBUF)
		{
			count = ins.control.mubuf.count;
			dfmt  = bufferInfo.buffer.dfmt;
			nfmt  = bufferInfo.buffer.nfmt;
		}
		else
		{
			count = ins.control.mtbuf.count;
			dfmt  = (Gnm::BufferFormat)ins.control.mtbuf.dfmt;
			nfmt  = (Gnm::BufferChannelType)ins.control.mtbuf.nfmt;
		}

		auto bufferFormat = getBufferFormat(dfmt, nfmt);

		GcnScalarType dataType   = bufferInfo.isSsbo ? GcnScalarType::Uint32 : GcnScalarType::Float32;
		uint32_t      dataTypeId = getScalarTypeId(dataType);

		auto     dataPtr   = emitGetBufferComponentPtr(ins, true);
		uint32_t vgprCount = count;
		for (uint32_t c = 0; c != bufferFormat.channelCount; ++c)
		{
			if (isLoad)
			{
				switch (dfmt)
				{
					case Gnm::kBufferFormatInvalid:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat8:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat16:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat8_8:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32:
					{
						GcnInstOperand reg = ins.src[1];

						uint32_t itemId = m_module.opLoad(dataTypeId, dataPtr[c].id);

						GcnRegisterValue value;
						value.type.ctype  = dataType;
						value.type.ccount = 1;
						value.id          = itemId;

						emitVgprStore(reg, value);
					}
					break;
					case Gnm::kBufferFormat16_16:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat10_11_11:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat11_11_10:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat10_10_10_2:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat2_10_10_10:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat8_8_8_8:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32_32:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat16_16_16_16:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32_32_32:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32_32_32_32:
					{
						if (c >= vgprCount)
						{
							// discard extra channel data
							break;
						}

						GcnInstOperand reg = ins.src[1];
						reg.code += c;

						uint32_t itemId = m_module.opLoad(dataTypeId, dataPtr[c].id);

						GcnRegisterValue value;
						value.type.ctype  = dataType;
						value.type.ccount = 1;
						value.id          = itemId;

						emitVgprStore(reg, value);
					}
					break;
				}
			}
			else
			{
				switch (dfmt)
				{
					case Gnm::kBufferFormatInvalid:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat8:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat16:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat8_8:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32:
					{
						GcnInstOperand reg   = ins.src[1];
						auto           value = emitVgprLoad(reg);
						value                = emitRegisterBitcast(value, dataType);
						m_module.opStore(dataPtr[c].id, value.id);
					}
					break;
					case Gnm::kBufferFormat16_16:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat10_11_11:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat11_11_10:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat10_10_10_2:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat2_10_10_10:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat8_8_8_8:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32_32:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat16_16_16_16:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32_32_32:
						LOG_GCN_UNHANDLED_INST();
						break;
					case Gnm::kBufferFormat32_32_32_32:
					{
						uint32_t valueId = 0;
						if (c >= vgprCount)
						{
							valueId = m_module.constu32(0.0);
						}
						else
						{
							GcnInstOperand reg = ins.src[1];
							reg.code += c;

							auto value = emitVgprLoad(reg);
							value      = emitRegisterBitcast(value, dataType);
							valueId    = value.id;
						}

						m_module.opStore(dataPtr[c].id, valueId);
					}
					break;
				}
			}
		}

		// TODO:
		// Zero the extra vgpr
		if (isLoad && vgprCount > bufferFormat.channelCount)
		{
			uint32_t zeroCount = vgprCount - bufferFormat.channelCount;
		}
	}

	void GcnCompiler::emitTextureSample(const GcnShaderInstruction& ins)
	{
		auto                  mimg        = gcnInstructionAs<GcnShaderInstMIMG>(ins);
		const GcnInstOperand& texCoordReg = mimg.vaddr;
		const GcnInstOperand& textureReg  = mimg.srsrc;
		const GcnInstOperand& samplerReg  = mimg.ssamp;
		auto                  flags       = GcnMimgModifierFlags(mimg.control.mod);

		// Texture and sampler register IDs
		// These registers are 4-GPR aligned, so multiplied by 4
		const uint32_t textureId = textureReg.code * 4;
		const uint32_t samplerId = samplerReg.code * 4;

		// Image type, which stores the image dimensions etc.
		const GcnImageInfo imageType = m_textures.at(textureId).imageInfo;

		// Load the texture coordinates. SPIR-V allows these
		// to be float4 even if not all components are used.
		GcnRegisterValue coord = emitLoadTexCoord(ins);

		auto op             = ins.opcode;
		bool isDepthCompare = flags.test(GcnMimgModifier::Pcf);

		const GcnRegisterValue referenceValue = isDepthCompare
													? emitLoadAddrComponent(GcnImageAddrComponent::Zpcf, ins)
													: GcnRegisterValue();

		// LOD for certain sample operations
		const bool hasLod = flags.test(GcnMimgModifier::Lod);

		const GcnRegisterValue lod = hasLod
										 ? emitLoadAddrComponent(GcnImageAddrComponent::Lod, ins)
										 : GcnRegisterValue();

		// Accumulate additional image operands. These are
		// not part of the actual operand token in SPIR-V.
		SpirvImageOperands imageOperands = {};

		if (flags.test(GcnMimgModifier::Offset))
		{
			coord = emitApplyTexOffset(ins, coord, lod);
		}

		// Combine the texture and the sampler into a sampled image
		const uint32_t sampledImageId = emitLoadSampledImage(m_textures.at(textureId),
															 m_samplers.at(samplerId),
															 isDepthCompare);

		// Sampling an image always returns a four-component
		// vector, whereas depth-compare ops return a scalar.
		GcnRegisterValue result = {};
		result.type.ctype       = m_textures.at(textureId).sampledType;
		result.type.ccount      = isDepthCompare ? 1 : 4;

		switch (op)
		{
			// Simple image sample operation
			case GcnOpcode::IMAGE_SAMPLE:
			{
				result.id = m_module.opImageSampleImplicitLod(
					getVectorTypeId(result.type),
					sampledImageId, coord.id,
					imageOperands);
			}
				break;
			case GcnOpcode::IMAGE_SAMPLE_L:
			{
				imageOperands.flags |= spv::ImageOperandsLodMask;
				imageOperands.sLod = lod.id;

				result.id = m_module.opImageSampleExplicitLod(
					getVectorTypeId(result.type), sampledImageId, coord.id,
					imageOperands);
			}
				break;
			case GcnOpcode::IMAGE_SAMPLE_LZ:
			case GcnOpcode::IMAGE_SAMPLE_LZ_O:
			{
				imageOperands.flags |= spv::ImageOperandsLodMask;
				imageOperands.sLod = m_module.constf32(0.0);

				result.id = m_module.opImageSampleExplicitLod(
					getVectorTypeId(result.type), sampledImageId, coord.id,
					imageOperands);
			}
				break;
			case GcnOpcode::IMAGE_SAMPLE_C:
			{
				result.id = m_module.opImageSampleDrefImplicitLod(
					getVectorTypeId(result.type), sampledImageId, coord.id,
					referenceValue.id, imageOperands);
			}
				break;
			default:
				LOG_GCN_UNHANDLED_INST();
				break;
		}

		auto colorMask = GcnRegMask(mimg.control.dmask);
		result         = emitRegisterExtract(result, colorMask);
		emitVgprArrayStore(mimg.vdata,
						   result,
						   colorMask);
	}

	void GcnCompiler::emitStorageImageLoad(const GcnShaderInstruction& ins)
	{
		auto mimg = gcnInstructionAs<GcnShaderInstMIMG>(ins);

		const uint32_t    registerId = mimg.srsrc.code << 2;
		const GcnTexture& typeInfo   = m_textures.at(registerId);

		// Load texture coordinates
		GcnRegisterValue coord = emitLoadTexCoord(ins);

		// Additional image operands. This will store
		// the LOD and other information if present.
		SpirvImageOperands imageOperands;

		auto op = ins.opcode;
		switch (op)
		{
			case GcnOpcode::IMAGE_LOAD_MIP:
			{
				GcnRegisterValue imageLod =
					emitLoadAddrComponent(GcnImageAddrComponent::Lod, ins);

				imageOperands.flags |= spv::ImageOperandsLodMask;
				imageOperands.sLod = imageLod.id;
			}
			break;
		}

		// Load source value from the storage image.
		GcnRegisterValue result;
		result.type.ctype  = typeInfo.sampledType;
		result.type.ccount = 4;
		result.id          = m_module.opImageFetch(
					 getVectorTypeId(result.type),
					 m_module.opLoad(typeInfo.imageTypeId, typeInfo.varId),
					 coord.id, imageOperands);

		// Apply component swizzle and mask
		auto colorMask = GcnRegMask(mimg.control.dmask);
		result         = emitRegisterExtract(result, colorMask);
		emitVgprArrayStore(mimg.vdata,
						   result,
						   colorMask);
	}

	GcnRegisterValue GcnCompiler::emitCalcTexCoord(
		GcnRegisterValue    coordVector,
		const GcnImageInfo& imageInfo)
	{
		const uint32_t dim = getTexCoordDim(imageInfo);

		if (dim != coordVector.type.ccount)
		{
			coordVector = emitRegisterExtract(
				coordVector, GcnRegMask::firstN(dim));
		}

		return coordVector;
	}

	GcnRegisterValue GcnCompiler::emitLoadTexCoord(
		const GcnShaderInstruction& ins)
	{
		GcnInstOperand addrReg = ins.src[0];

		const GcnImageInfo imageInfo = getImageInfo(ins);

		uint32_t dim        = getTexCoordDim(imageInfo);
		uint32_t coordIndex = calcAddrComponentIndex(
			GcnImageAddrComponent::X, ins);

		addrReg.code += coordIndex;
		GcnRegisterValue coord = emitVgprArrayLoad(
			addrReg, GcnRegMask::firstN(dim));

		auto result = emitCalcTexCoord(coord, imageInfo);
		// Some non-sampling image instructions use
		// integer coordinate offset, we need to do a cast.
		result = emitRegisterBitcast(result, addrReg.type);

		if (imageInfo.dim == spv::DimCube)
		{
			// Why do we need recover?
			// See comments in emitCubeCalculate.
			result = emitRecoverCubeCoord(result);
		}
		return result;
	}

	GcnRegisterValue GcnCompiler::emitLoadTexOffset(const GcnShaderInstruction& ins)
	{
		const uint32_t typeId  = getScalarTypeId(GcnScalarType::Sint32);
		auto           offsets = emitLoadAddrComponent(GcnImageAddrComponent::Offsets, ins);

		const GcnImageInfo imageInfo = getImageInfo(ins);
		uint32_t           dim       = getTexCoordDim(imageInfo);

		util::static_vector<uint32_t, 3> components;
		for (uint32_t i = 0; i != dim; ++i)
		{
			uint32_t offsetId = m_module.opBitFieldSExtract(typeId,
															offsets.id,
															m_module.constu32(i * 8),
															m_module.constu32(6));
			components.push_back(offsetId);
		}

		GcnRegisterValue result = {};
		result.type.ctype       = GcnScalarType::Sint32;
		result.type.ccount      = components.size();
		result.id               = m_module.opCompositeConstruct(getVectorTypeId(result.type),
																components.size(), components.data());
		return result;
	}

	GcnRegisterValue GcnCompiler::emitCalcQueryLod(
		const GcnRegisterValue& lod)
	{
		GcnRegisterValue result;
		result.type.ctype  = GcnScalarType::Uint32;
		result.type.ccount = 1;
		if (result.id == 0)
		{
			// If instruction doesn't have lod component,
			// we need to feed a zero lod for image size query.
			result.id = m_module.constu32(0);
		}
		else
		{
			result.id = m_module.opConvertFtoU(
				getVectorTypeId(result.type),
				lod.id);
		}
		return result;
	}

	GcnRegisterValue GcnCompiler::emitApplyTexOffset(
		const GcnShaderInstruction& ins,
		const GcnRegisterValue&     coord,
		const GcnRegisterValue&     lod)
	{
		// Spir-v doesn't allow non-constant texture offset for OpImageSample*,
		// so we need to calculate the coordinate manually.

		GcnRegisterValue result;

		// Calculate lod used to query image size
		auto queryLod = emitCalcQueryLod(lod);

		// Unnormalized texel coordinate
		auto size_u = emitQueryTextureSize(ins, queryLod);

		result.type.ctype     = GcnScalarType::Float32;
		result.type.ccount    = size_u.type.ccount;
		const uint32_t typeId = getVectorTypeId(result.type);

		uint32_t size_f     = m_module.opConvertUtoF(typeId,
													 size_u.id);
		uint32_t coordUnorm = m_module.opFMul(typeId, coord.id, size_f);

		// Apply offset
		auto     offset_s      = emitLoadTexOffset(ins);
		uint32_t offset_f      = m_module.opConvertStoF(typeId,
														offset_s.id);
		uint32_t coordAdjusted = m_module.opFAdd(typeId,
												 coordUnorm,
												 offset_f);

		// Normalized texel coordinate
		result.id = m_module.opFDiv(typeId,
									coordAdjusted,
									size_f);

		return result;
	}

	GcnRegisterValue GcnCompiler::emitRecoverCubeCoord(
		const GcnRegisterValue& coord)
	{
		LOG_ASSERT(coord.type.ccount == 3, "cube coordinate must be vec3.");
		auto s = emitRegisterExtract(coord, GcnRegMask::select(0));
		auto t = emitRegisterExtract(coord, GcnRegMask::select(1));
		auto z = emitRegisterExtract(coord, GcnRegMask::select(2));

		const uint32_t typeId = getScalarTypeId(GcnScalarType::Float32);

		// We need to fix x and y coordinate,
		// because the s and t coordinate will be scaled and plus 1.5
		// by v_madak_f32.
		// We already force the scale value to be 1.0 when handling v_cubema_f32,
		// here we subtract 1.5 to recover the original value.
		auto x = m_module.opFSub(typeId, s.id, m_module.constf32(1.5));
		auto y = m_module.opFSub(typeId, t.id, m_module.constf32(1.5));

		std::array<uint32_t, 3> direction = { x, y, z.id };

		GcnRegisterValue result;
		result.type = coord.type;
		result.id   = m_module.opCompositeConstruct(getVectorTypeId(result.type),
													direction.size(),
													direction.data());
		return result;
	}

	GcnRegisterValue GcnCompiler::emitLoadAddrComponent(
		GcnImageAddrComponent       component,
		const GcnShaderInstruction& ins)
	{
		const GcnInstOperand& addrReg    = ins.src[0];
		const GcnInstOperand& textureReg = ins.src[2];
		auto                  flags      = GcnMimgModifierFlags(ins.control.mimg.mod);

		// These registers are 4-GPR aligned, so multiplied by 4
		const uint32_t textureId = textureReg.code * 4;

		// Image type, which stores the image dimensions etc.
		const GcnImageInfo imageInfo = m_textures.at(textureId).imageInfo;

		uint32_t index = calcAddrComponentIndex(component, ins);
		auto     reg   = addrReg;
		reg.code += index;

		GcnScalarType type = GcnScalarType::Float32;
		if (isImageAccessNoSampling(ins))
		{
			// Non-sampling instructions always use
			// integer components
			type = GcnScalarType::Uint32;
		}
		else
		{
			type = flags.test(GcnMimgModifier::Offset)
					   ? GcnScalarType::Sint32
					   : GcnScalarType::Float32;
		}
		return emitRegisterBitcast(emitVgprLoad(reg), type);
	}

	uint32_t GcnCompiler::emitLoadSampledImage(
		const GcnTexture& textureResource,
		const GcnSampler& samplerResource,
		bool              isDepthCompare)
	{
		const uint32_t sampledImageType = isDepthCompare
											  ? m_module.defSampledImageType(textureResource.depthTypeId)
											  : m_module.defSampledImageType(textureResource.colorTypeId);

		return m_module.opSampledImage(sampledImageType,
									   m_module.opLoad(textureResource.imageTypeId, textureResource.varId),
									   m_module.opLoad(samplerResource.typeId, samplerResource.varId));
	}


	GcnBufferInfo GcnCompiler::getBufferType(
		const GcnInstOperand& reg)
	{
		uint32_t regIdx = reg.code << 2;

		GcnBufferMeta* meta = nullptr;
		switch (m_programInfo.type())
		{
			case GcnProgramType::VertexShader: meta = &m_meta.vs.bufferInfos[regIdx]; break;
			case GcnProgramType::PixelShader: meta = &m_meta.ps.bufferInfos[regIdx]; break;
			case GcnProgramType::ComputeShader: meta = &m_meta.cs.bufferInfos[regIdx]; break;
			case GcnProgramType::GeometryShader: meta = &m_meta.gs.bufferInfos[regIdx]; break;
			case GcnProgramType::HullShader: meta = &m_meta.hs.bufferInfos[regIdx]; break;
			case GcnProgramType::DomainShader: meta = &m_meta.ds.bufferInfos[regIdx]; break;
		}

		auto& buffer = m_buffers[regIdx];

		GcnBufferInfo result = {};
		result.varId         = buffer.varId;
		result.isSsbo        = buffer.asSsbo;
		result.buffer        = *meta;
		result.image         = GcnImageInfo();

		return result;
	}

	GcnImageInfo GcnCompiler::getImageType(
		Gnm::TextureType textureType,
		bool             isUav,
		bool             isDepth) const
	{
		uint32_t     depth    = isDepth ? 1u : 0u;
		uint32_t     sampled  = isUav ? 2u : 1u;
		GcnImageInfo typeInfo = [textureType, depth, sampled]() -> GcnImageInfo
		{
			switch (textureType)
			{
				case Gnm::kTextureType1d:
					return { spv::Dim1D, depth, 0, 0, sampled, VK_IMAGE_VIEW_TYPE_1D };
				case Gnm::kTextureType2d:
					return { spv::Dim2D, depth, 0, 0, sampled, VK_IMAGE_VIEW_TYPE_2D };
				case Gnm::kTextureType3d:
					return { spv::Dim3D, depth, 0, 0, sampled, VK_IMAGE_VIEW_TYPE_3D };
				case Gnm::kTextureTypeCubemap:
					return { spv::DimCube, depth, 0, 0, sampled, VK_IMAGE_VIEW_TYPE_CUBE };
				case Gnm::kTextureType1dArray:
					return { spv::Dim1D, depth, 1, 0, sampled, VK_IMAGE_VIEW_TYPE_1D_ARRAY };
				case Gnm::kTextureType2dArray:
					return { spv::Dim2D, depth, 1, 0, sampled, VK_IMAGE_VIEW_TYPE_2D_ARRAY };
				case Gnm::kTextureType2dMsaa:
					return { spv::Dim2D, depth, 0, 1, sampled, VK_IMAGE_VIEW_TYPE_2D };
				case Gnm::kTextureType2dArrayMsaa:
					return { spv::Dim2D, depth, 1, 1, sampled, VK_IMAGE_VIEW_TYPE_2D_ARRAY };
				default:
					Logger::exception(util::str::formatex("GcnCompiler: Unsupported resource type: ", textureType));
			}
		}();

		return typeInfo;
	}

	GcnImageInfo GcnCompiler::getImageInfo(const GcnShaderInstruction& ins) const
	{
		const GcnInstOperand& textureReg = ins.src[2];
		const uint32_t        textureId  = textureReg.code * 4;
		GcnImageInfo          imageInfo  = m_textures.at(textureId).imageInfo;
		return imageInfo;
	}

	uint32_t GcnCompiler::getTexSizeDim(const GcnImageInfo& imageType) const
	{
		switch (imageType.dim)
		{
			case spv::DimBuffer: return 1 + imageType.array;
			case spv::Dim1D: return 1 + imageType.array;
			case spv::Dim2D: return 2 + imageType.array;
			case spv::Dim3D: return 3 + imageType.array;
			case spv::DimCube: return 2 + imageType.array;
			default: Logger::exception("DxbcCompiler: getTexLayerDim: Unsupported image dimension");
		}
	}

	uint32_t GcnCompiler::getTexLayerDim(const GcnImageInfo& imageType) const
	{
		switch (imageType.dim)
		{
			case spv::DimBuffer: return 1;
			case spv::Dim1D: return 1;
			case spv::Dim2D: return 2;
			case spv::Dim3D: return 3;
			case spv::DimCube: return 3;
			default: Logger::exception("DxbcCompiler: getTexLayerDim: Unsupported image dimension");
		}
	}

	uint32_t GcnCompiler::getTexCoordDim(const GcnImageInfo& imageType) const
	{
		return getTexLayerDim(imageType) + imageType.array;
	}

	uint32_t GcnCompiler::calcAddrComponentIndex(
		GcnImageAddrComponent       component,
		const GcnShaderInstruction& ins)
	{
		int32_t index      = -1;
		auto    flags      = GcnMimgModifierFlags(ins.control.mimg.mod);
		auto    imageInfo  = getImageInfo(ins);
		auto    dim        = imageInfo.dim;
		auto    msaa       = imageInfo.ms;
		bool    noSampling = isImageAccessNoSampling(ins);

		// clang-format off
		switch (component)
		{
			case GcnImageAddrComponent::Clamp:
				if (flags.test(GcnMimgModifier::LodClamp)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::Lod:
				if (flags.test(GcnMimgModifier::Lod)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::FragId:
				if (noSampling &&
					dim == spv::Dim2D &&
					msaa != 0) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::FaceId:
				if (dim == spv::DimCube) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::Slice:
				if (ins.control.mimg.da != 0 &&
					dim != spv::DimCube) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::Z:
				if (dim == spv::Dim3D) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::Y:
				if (dim == spv::Dim2D ||
					dim == spv::Dim3D ||
					dim == spv::DimCube) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::X:
				++index;
				[[fallthrough]];
			case GcnImageAddrComponent::DzDv:
				if (flags.any(GcnMimgModifier::Derivative,
							  GcnMimgModifier::CoarseDerivative) &&
					dim == spv::Dim3D) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::DyDv:
				if (flags.any(GcnMimgModifier::Derivative,
							  GcnMimgModifier::CoarseDerivative) &&
				    (dim == spv::Dim2D ||
					 dim == spv::Dim3D ||
					 dim == spv::DimCube)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::DxDv:
				if (flags.any(GcnMimgModifier::Derivative,
							  GcnMimgModifier::CoarseDerivative)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::DzDh:
				if (flags.any(GcnMimgModifier::Derivative,
							  GcnMimgModifier::CoarseDerivative) &&
					dim == spv::Dim3D) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::DyDh:
				if (flags.any(GcnMimgModifier::Derivative,
							  GcnMimgModifier::CoarseDerivative) &&
				    (dim == spv::Dim2D ||
					 dim == spv::Dim3D ||
					 dim == spv::DimCube)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::DxDh:
				if (flags.any(GcnMimgModifier::Derivative,
							  GcnMimgModifier::CoarseDerivative)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::Zpcf:
				if (flags.test(GcnMimgModifier::Pcf)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::Bias:
				if (flags.test(GcnMimgModifier::LodBias)) ++index;
				[[fallthrough]];
			case GcnImageAddrComponent::Offsets:
				if (flags.test(GcnMimgModifier::Offset)) ++index;
		}
		// clang-format on

		LOG_ASSERT(index != -1, "Get vaddr component failed.");
		return static_cast<uint32_t>(index);
	}

	GcnBufferFormat GcnCompiler::getBufferFormat(
		Gnm::BufferFormat dfmt, Gnm::BufferChannelType nfmt)
	{
		GcnBufferFormat format;
		switch (dfmt)
		{
			case Gnm::kBufferFormatInvalid:
			{
				format.sizeInBytes  = 0;
				format.channelCount = 0;
			}
			break;
			case Gnm::kBufferFormat8:
			{
				format.sizeInBytes  = 1;
				format.channelCount = 1;
			}
			break;
			case Gnm::kBufferFormat16:
			{
				format.sizeInBytes  = 2;
				format.channelCount = 1;
			}
			break;
			case Gnm::kBufferFormat8_8:
			{
				format.sizeInBytes  = 2;
				format.channelCount = 2;
			}
			break;
			case Gnm::kBufferFormat32:
			{
				format.sizeInBytes  = 4;
				format.channelCount = 1;
			}
			break;
			case Gnm::kBufferFormat16_16:
			{
				format.sizeInBytes  = 4;
				format.channelCount = 2;
			}
			break;
			case Gnm::kBufferFormat10_11_11:
			case Gnm::kBufferFormat11_11_10:
			{
				format.sizeInBytes  = 4;
				format.channelCount = 3;
			}
			break;
			case Gnm::kBufferFormat10_10_10_2:
			case Gnm::kBufferFormat2_10_10_10:
			case Gnm::kBufferFormat8_8_8_8:
			{
				format.sizeInBytes  = 4;
				format.channelCount = 4;
			}
			break;
			case Gnm::kBufferFormat32_32:
			{
				format.sizeInBytes  = 8;
				format.channelCount = 2;
			}
			break;
			case Gnm::kBufferFormat16_16_16_16:
			{
				format.sizeInBytes  = 8;
				format.channelCount = 4;
			}
			break;
			case Gnm::kBufferFormat32_32_32:
			{
				format.sizeInBytes  = 12;
				format.channelCount = 3;
			}
			break;
			case Gnm::kBufferFormat32_32_32_32:
			{
				format.sizeInBytes  = 16;
				format.channelCount = 4;
			}
			break;
			default:
				LOG_ASSERT(false, "error dfmt passed.");
				break;
		}

		switch (nfmt)
		{
			case Gnm::kBufferChannelTypeUNorm:
			case Gnm::kBufferChannelTypeUScaled:
			case Gnm::kBufferChannelTypeUInt:
				format.channelType = GcnScalarType::Uint32;
				break;
			case Gnm::kBufferChannelTypeSNorm:
			case Gnm::kBufferChannelTypeSNormNoZero:
			case Gnm::kBufferChannelTypeSScaled:
			case Gnm::kBufferChannelTypeSInt:
				format.channelType = GcnScalarType::Sint32;
				break;
			case Gnm::kBufferChannelTypeFloat:
				format.channelType = GcnScalarType::Float32;
				break;
			default:
				LOG_ASSERT(false, "error nfmt passed.");
				break;
		}
		return format;
	}

}  // namespace sce::gcn