mirror of
https://github.com/Inori/GPCS4.git
synced 2024-05-20 13:07:58 -04:00
265 lines
13 KiB
C++
265 lines
13 KiB
C++
/*
|
|
* CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
|
|
* Copyright (C) 2014-2018 Mateusz Szpakowski
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef __CLRX_GCNINTERNALS_H__
|
|
#define __CLRX_GCNINTERNALS_H__
|
|
|
|
#include <CLRX/Config.h>
|
|
#include <cstdint>
|
|
#include <string>
|
|
#include <CLRX/utils/Utilities.h>
|
|
#include <CLRX/utils/GPUId.h>
|
|
|
|
namespace CLRX
|
|
{
|
|
|
|
// enums for GCN encodings
|
|
enum : cxbyte
|
|
{
|
|
GCNENC_NONE,
|
|
GCNENC_SOPC, /* 0x17e<<23, opcode = (7bit)<<16 */
|
|
GCNENC_SOPP, /* 0x17f<<23, opcode = (7bit)<<16 */
|
|
GCNENC_SOP1, /* 0x17d<<23, opcode = (8bit)<<8 */
|
|
GCNENC_SOP2, /* 0x2<<30, opcode = (7bit)<<23 */
|
|
GCNENC_SOPK, /* 0xb<<28, opcode = (5bit)<<23 */
|
|
GCNENC_SMRD, /* 0x18<<27, opcode = (6bit)<<22 */
|
|
GCNENC_SMEM = GCNENC_SMRD, /* 0x18<<27, opcode = (6bit)<<22 */
|
|
GCNENC_VOPC, /* 0x3e<<25, opcode = (8bit)<<17 */
|
|
GCNENC_VOP1, /* 0x3f<<25, opcode = (8bit)<<9 */
|
|
GCNENC_VOP2, /* 0x0<<31, opcode = (6bit)<<25 */
|
|
GCNENC_VOP3A, /* 0x34<<26, opcode = (9bit)<<17 */
|
|
GCNENC_VOP3B, /* 0x34<<26, opcode = (9bit)<<17 */
|
|
GCNENC_VINTRP, /* 0x32<<26, opcode = (2bit)<<16 */
|
|
GCNENC_DS, /* 0x36<<26, opcode = (8bit)<<18 */
|
|
GCNENC_MUBUF, /* 0x38<<26, opcode = (7bit)<<18 */
|
|
GCNENC_MTBUF, /* 0x3a<<26, opcode = (3bit)<<16 */
|
|
GCNENC_MIMG, /* 0x3c<<26, opcode = (7bit)<<18 */
|
|
GCNENC_EXP, /* 0x3e<<26, opcode = none */
|
|
GCNENC_FLAT, /* 0x37<<26, opcode = (8bit)<<18 (???8bit) */
|
|
GCNENC_VOP3P,
|
|
GCNENC_MAXVAL = GCNENC_FLAT
|
|
};
|
|
|
|
|
|
typedef uint32_t GCNInsnMode;
|
|
|
|
// modes for GCN instructions
|
|
enum : GCNInsnMode
|
|
{
|
|
GCN_STDMODE = 0, /// standard mode
|
|
GCN_REG_ALL_64 = 15, /// all register operand is 64-bit
|
|
GCN_REG_DST_64 = 1, /// destination is 64-bit
|
|
GCN_REG_SRC0_64 = 2, /// source0 is 64-bit
|
|
GCN_REG_SRC1_64 = 4, /// source1 is 64-bit
|
|
GCN_REG_SRC2_64 = 8, /// source2 is 64-bit
|
|
GCN_REG_DS0_64 = 3, /// destination and source0 is 64-bit
|
|
GCN_REG_DS1_64 = 5, /// destination and source1 is 64-bit
|
|
GCN_REG_DS2_64 = 9, /// destination and source2 is 64-bit
|
|
/* SOP */
|
|
GCN_IMM_NONE = 0x10, // used in Scall insns
|
|
GCN_ARG_NONE = 0x20, /// no arguments (operands)
|
|
GCN_DST_NONE = 0x20, /// omit destination argument
|
|
GCN_IMM_REL = 0x30, /// SOPK, immediate as relative address
|
|
GCN_IMM_LOCKS = 0x40, /// SOPK, s_waitcnt locks
|
|
GCN_IMM_MSGS = 0x50, /// SOPK, s_sendmsg* message function
|
|
GCN_IMM_SREG = 0x60, /// SOPK, hwreg function
|
|
GCN_SRC_NONE = 0x70, /// SOP1, omit source
|
|
GCN_DST_SRC = 0x80, /// SOPK, SDST is first source
|
|
GCN_IMM_DST = 0x100, /// immediate is first (destplace), destination is second
|
|
GCN_SOPK_CONST = 0x200, /// constant instead destination
|
|
GCN_SOPK_SRIMM32 = 0x300, /// GCN_IMM_DST and GCN_SOPK_CONST together
|
|
/* SOPC */
|
|
GCN_SRC1_IMM = 0x10, /// treat source1 as immediate value
|
|
/* VOP */
|
|
GCN_SRC2_NONE = 0x10, /// omit source2
|
|
GCN_DS2_VCC = 0x20, /// extra sgpr or VCC in extra destination and in source2
|
|
GCN_SRC12_NONE = 0x30, /// omit source1 and source2
|
|
GCN_ARG1_IMM = 0x40, /// immediate value as second operand
|
|
GCN_ARG2_IMM = 0x50, /// immediate value as third operand
|
|
GCN_S0EQS12 = 0x60, /// source0 must be equal to source1 or source2
|
|
GCN_DST_VCC = 0x70, /// extra sgpr or VCC in extra destination
|
|
GCN_SRC2_VCC = 0x80, /// sgpr or VCC in source2
|
|
GCN_DST_VCC_VSRC2 = 0x90, /// sgpr or VCC as dest operand and source2
|
|
GCN_DS1_SGPR = 0xa0, /// destination and source1 is SGPR
|
|
GCN_SRC1_SGPR = 0xb0, /// source1 is SGPR
|
|
GCN_DST_SGPR = 0xc0, /// destination is SGPR
|
|
GCN_VOP_ARG_NONE = 0xd0, /// no argument for VOP encodings
|
|
GCN_NEW_OPCODE = 0xe0, /// unique opcode that doesn't exists in VINTRP
|
|
GCN_P0_P10_P20 = 0xf0, /// second operand is param (P0,P10,P20)
|
|
GCN_VOPC_NOVCC = 0x80, /// no VCC in VOPC instruction
|
|
GCN_VOP3_VOP2 = 0x100, /// VOP2 encoded as VOP3
|
|
GCN_VOP3_VOP1 = 0x200, /// VOP1 encoded as VOP3
|
|
GCN_VOP3_VINTRP = 0x300, /// VINTRP encoded as VOP3
|
|
GCN_VOP3_DS2_128 = 0x7000, /// VOP3 with DST 128 and SRC2 128
|
|
GCN_VOP3_VINTRP_NEW = 0x3e0, /// new VINTRP instructions encoded as VOP3
|
|
GCN_VOP3_VOP2_DS01 = 0x110, /// VOP2 in VOP3, destination and two sources
|
|
GCN_VOP3_VOP1_DS0 = 0x230, /// VOP1 in VOP3, destination and one source
|
|
GCN_VOP3_DST_SGPR = 0x400, /// VOPX in VOP3, and dst is SGPR (if mask1 used)
|
|
GCN_VOP3_SRC1_SGPR = 0x800, /// VOPX in VOP3, and src1 is SGPR (if mask1 used)
|
|
GCN_VOP3_DS1_SGPR = 0xc00, /// VOPX in VOP3, and dst and src1 is SGPR (if mask1 used)
|
|
GCN_VOP3_MASK2 = 0x8300, // mask for VOPx in VOP2 encodings
|
|
GCN_VINTRP_SRC2 = 0x1000, /// VOP3/VINTRP with source2 (third source)
|
|
GCN_VOP3_MASK3 = 0x3000, /// mask for VINTRP in VOP2 encodings
|
|
GCN_VOP3_VOP3P = 0x8000, /// VOP3P encoding
|
|
GCN_VOP3_NODST = 0x1000000, /// VOP3 - no DST
|
|
GCN_VOP_NODPP = 0x10000, /// VOP instruction can not have DPP
|
|
GCN_VOP_NOSDWA = 0x20000, /// VOP instruction can not have SDWA
|
|
GCN_VOP_NODPPSDWA = 0x30000, /// VOP instruction can not have DPP and SDWA
|
|
GCN_VOP_NOSDWAVEGA = 0x40000, /// VOP instruction can not have SDWA for VEGA
|
|
GCN_VCC_IMPL_READ = 0x80000, /// VOP instruction with implicit VCC read
|
|
GCN_VCC_IMPL_WRITE = 0x100000, /// VOP instruction with implicit VCC write
|
|
GCN_VOP_NOWVSZ = 0x200000, // VOP SDST/VCC size doesn't not depend on wavesize
|
|
// DS encoding modes
|
|
GCN_ADDR_STD = 0x0, /// standard place of address
|
|
GCN_ADDR_DST = 0x10, /// address operand in destination place
|
|
GCN_ADDR_SRC = 0x20, /// address operand in source
|
|
GCN_ADDR_DST64 = 0x1f, /// address operand in destination place and all is 64-bit
|
|
GCN_ADDR_SRC64 = 0x2f, /// address operand in source and all is 64-bit
|
|
GCN_ADDR_SRC_D64 = 0x21, /// address operand in source and dest is bit (not addr)
|
|
GCN_2OFFSETS = 0x100, /// two 8-bit offsets
|
|
GCN_VDATA2 = 0x140, /* two datas, two offsets */
|
|
GCN_NOSRC = 0x80, /* only address, no source */
|
|
GCN_2SRCS = 0x40, // two (source or dest) datas (DATA), DATA1)
|
|
GCN_NOSRC_2OFF = 0x180, /* only address */
|
|
GCN_SRC_ADDR2 = 0x200, /// for ds_XXX_src2_XXX instructions
|
|
GCN_SRC_ADDR2_64 = 0x20f, /// for ds_XXX_src2_XXX instructions, all is 64-bit
|
|
GCN_DS_96 = 0x800, /// 96-bit dest and source
|
|
GCN_DS_128 = 0x1000, /// 128-bit dest and source
|
|
GCN_ONLYDST = 0x400, /* only vdst */
|
|
GCN_DSMASK = 0x3f0, // mask for DS encoding
|
|
GCN_DSMASK2 = 0x3c0, // mask for DS encoding 2
|
|
GCN_SRCS_MASK = 0xc0, /// only srcs mask
|
|
GCN_ONLYGDS = 0x2000, /// instruction must have GDS
|
|
GCN_DST128 = 0x4000, // dest is 128-bit
|
|
GCN_ONLY_SRC = 0x8000,
|
|
// others
|
|
GCN_SBASE4 = 0x10, /// SBASE requires 4 registers
|
|
GCN_FLOATLIT = 0x40000000U, /// float literal
|
|
GCN_F16LIT = 0x80000000U, /// half literal
|
|
GCN_LITMASK = 0xc0000000U,
|
|
GCN_SMRD_ONLYDST = 0x30, // only destination (no other operands)
|
|
GCN_SMEM_SDATA_IMM = 0x40, // treat SDATA as immediate
|
|
GCN_SMEM_NOSDATA = 0x80, // no destination
|
|
GCN_MEMOP_MX1 = 0x0, /// sdst/sdata requires 1 register
|
|
GCN_MEMOP_MX2 = 0x100, /// sdst/sdata requires 2 registers
|
|
GCN_MEMOP_MX4 = 0x200, /// sdst/sdata requires 4 registers
|
|
GCN_MEMOP_MX8 = 0x300, /// sdst/sdata requires 8 registers
|
|
GCN_MEMOP_MX16 = 0x400, /// sdst/sdata requires 16 registers
|
|
GCN_MUBUF_X = 0x0, /// vdata requires 1 register
|
|
GCN_MUBUF_NOVAD = 0x10, // no vaddr and vdata
|
|
GCN_MUBUF_XY = 0x100, /// vdata requires 2 registers
|
|
GCN_MUBUF_XYZ = 0x200, /// vdata requires 3 registers
|
|
GCN_MUBUF_XYZW = 0x300, /// vdata requires 4 registers
|
|
GCN_MUBUF_MX1 = 0x0, /// vdata requires 1 register
|
|
GCN_MUBUF_MX2 = 0x100, /// vdata requires 2 registers
|
|
GCN_MUBUF_MX3 = 0x200, /// vdata requires 3 registers
|
|
GCN_MUBUF_MX4 = 0x300, /// vdata requires 4 registers
|
|
GCN_MUBUF_D16 = 0x800, /// vdata size depends on arch (GCN 1.4 - half of size)
|
|
GCN_MUBUF_X_D16 = 0x800, /// vdata requires 1 register
|
|
GCN_MUBUF_XY_D16 = 0x900, /// vdata requires 2 registers (or 1 for GCN 1.4)
|
|
GCN_MUBUF_XYZ_D16 = 0xa00, /// vdata requires 3 registers (or 2 for GCN 1.4)
|
|
GCN_MUBUF_XYZW_D16 = 0xb00, /// vdata requires 4 registers (or 2 for GCN 1.4)
|
|
GCN_MIMG_SAMPLE = 0x100, /// last operand is SSAMP (4 sregisters)
|
|
GCN_MIMG_VDATA4 = 0x200, /// gather requires 4 vdata registers
|
|
GCN_MIMG_GATHER = 0x300, /// gather requires 4 vdata registers
|
|
GCN_MIMG_VAGE1 = 0x0, /// vaddr requires 1 or more registers
|
|
GCN_MIMG_VAGE2 = 0x1, /// vaddr requires 2 or more registers
|
|
GCN_MIMG_VAGE3 = 0x2, /// vaddr requires 3 or more registers
|
|
GCN_MIMG_VAGE4 = 0x3, /// vaddr requires 4 or more registers
|
|
GCN_MIMG_VAGE5 = 0x4, /// vaddr requires 5 or more registers
|
|
GCN_MIMG_VAGE6 = 0x5, /// vaddr requires 6 or more registers
|
|
GCN_MIMG_VADERIV = 0x10, /// vaddr holds user derivatives
|
|
GCN_MIMG_VAGE2D = 0x11, /// vaddr requires 2 or more registers and holds user derivs
|
|
GCN_MIMG_VAGE3D = 0x12, /// vaddr requires 3 or more registers and holds user derivs
|
|
GCN_MIMG_VAGE4D = 0x13, /// vaddr requires 4 or more registers and holds user derivs
|
|
GCN_MIMG_VAGE5D = 0x14, /// vaddr requires 5 or more registers and holds user derivs
|
|
GCN_MIMG_VAGE6D = 0x15, /// vaddr requires 6 or more registers and holds user derivs
|
|
GCN_MIMG_VA_O = 0x20, // vaddr *O*
|
|
GCN_MIMG_VA_B = 0x40, // vaddr *B*
|
|
GCN_MIMG_VA_C = 0x80, // vaddr *C*
|
|
GCN_MIMG_VA_L = 0x400, // vaddr *L*
|
|
GCN_MIMG_VA_CL = 0x800, // vaddr *CL*
|
|
GCN_MIMG_VA_B_O = GCN_MIMG_VA_B|GCN_MIMG_VA_O, // vaddr *B* and *O*
|
|
GCN_MIMG_VA_B_CL = GCN_MIMG_VA_B|GCN_MIMG_VA_CL, // vaddr *B* and *CL*
|
|
// vaddr *B* and *CL* and *O*
|
|
GCN_MIMG_VA_B_CL_O = GCN_MIMG_VA_B|GCN_MIMG_VA_CL|GCN_MIMG_VA_O,
|
|
GCN_MIMG_VA_C_B = GCN_MIMG_VA_C|GCN_MIMG_VA_B, // vaddr *C* and *B*
|
|
// vaddr *C* and *B* and *CL*
|
|
GCN_MIMG_VA_C_B_CL = GCN_MIMG_VA_C|GCN_MIMG_VA_B|GCN_MIMG_VA_CL,
|
|
GCN_MIMG_VA_L_O = GCN_MIMG_VA_L|GCN_MIMG_VA_O, // vaddr *L* anc *O*
|
|
GCN_MIMG_VA_C_L = GCN_MIMG_VA_C|GCN_MIMG_VA_L, // vaddr *C* and *L*
|
|
GCN_MIMG_VA_C_CL = GCN_MIMG_VA_C|GCN_MIMG_VA_CL, // vaddr *C* and *CLL*
|
|
GCN_MIMG_VA_C_O = GCN_MIMG_VA_C|GCN_MIMG_VA_O, // vaddr *C* and *O*
|
|
GCN_MIMG_VA_CL_O = GCN_MIMG_VA_CL|GCN_MIMG_VA_O, // vaddr *CL* and *O*
|
|
// vaddr *C* and *CL* and *O*
|
|
GCN_MIMG_VA_C_CL_O = GCN_MIMG_VA_C|GCN_MIMG_VA_CL|GCN_MIMG_VA_O,
|
|
// vaddr *C* and *CL* and *B* and *O*
|
|
GCN_MIMG_VA_C_B_CL_O = GCN_MIMG_VA_C|GCN_MIMG_VA_CL|GCN_MIMG_VA_B|GCN_MIMG_VA_O,
|
|
// vaddr *C* and *L* and *O*
|
|
GCN_MIMG_VA_C_L_O = GCN_MIMG_VA_C|GCN_MIMG_VA_L|GCN_MIMG_VA_O,
|
|
// vaddr *C* and *B* and *O*
|
|
GCN_MIMG_VA_C_B_O = GCN_MIMG_VA_C|GCN_MIMG_VA_B|GCN_MIMG_VA_O,
|
|
GCN_MIMG_VA_MIP = 0x10000, // vaddr _MIP
|
|
GCN_MIMG_VA_MASK = 0xf,
|
|
GCN_MLOAD = 0x1000, // instruction load data to vgprs
|
|
GCN_MATOMIC = 0x2000, // instruction perform atomics and returns data if glc==1
|
|
GCN_MHALFWRITE = 0x4000,
|
|
GCN_MCMPSWAP = 0x6000,
|
|
GCN_FLAT_DDST = 0x00, // destination as first operand
|
|
GCN_FLAT_ADST = 0x10, /// first address, second is DST
|
|
GCN_FLAT_NODATA = 0x20, /// omit DATA
|
|
GCN_FLAT_NODST = 0x40, /// omit DST
|
|
GCN_FLAT_STORE = 0x50, /// store instruction
|
|
GCN_CMPSWAP = 0x80, /// ???
|
|
GCN_ACMPSWAP = 0x6080, /// ???
|
|
GCN_FLAT_FLAT = 0,
|
|
GCN_FLAT_SCRATCH = 1,
|
|
GCN_FLAT_GLOBAL = 2,
|
|
GCN_FLAT_MODEMASK = 7,
|
|
GCN_MASK1 = 0xf0,
|
|
GCN_MASK2 = 0xf00,
|
|
GCN_DSIZE_MASK = 0x700, /// dsize mask
|
|
GCN_SHIFT2 = 8
|
|
};
|
|
|
|
struct CLRX_INTERNAL GCNInstruction
|
|
{
|
|
const char* mnemonic;
|
|
cxbyte encoding;
|
|
GCNInsnMode mode;
|
|
uint16_t code;
|
|
GPUArchMask archMask; // mask of architectures whose have instruction
|
|
};
|
|
|
|
// version GCNInstruction for assembler (with two code: for VOPX and VOP3)
|
|
struct CLRX_INTERNAL GCNAsmInstruction
|
|
{
|
|
const char* mnemonic;
|
|
cxbyte encoding;
|
|
GCNInsnMode mode;
|
|
uint16_t code1, code2; // code1 - first code, code2 - VOP3 encoding code
|
|
GPUArchMask archMask; // mask of architectures whose have instruction
|
|
};
|
|
|
|
CLRX_INTERNAL extern const GCNInstruction gcnInstrsTable[];
|
|
|
|
};
|
|
|
|
#endif
|