Compare commits

...

5 commits

Author SHA1 Message Date
Radosław Gliński a3810ba43b
Merge 79d2d3ba55 into a90f83d44c 2024-05-12 16:01:15 +02:00
Triang3l a90f83d44c [Vulkan] Non-seamless cube map filtering 2024-05-05 15:20:23 +03:00
Triang3l e9f7a8bd48 [Vulkan] Optional functionality usage improvements
Functional changes:
- Enable only actually used features, as drivers may take more optimal
  paths when certain features are disabled.
- Support VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE.
- Fix the separateStencilMaskRef check doing the opposite.
- Support shaderRoundingModeRTEFloat32.
- Fix vkGetDeviceBufferMemoryRequirements pointer not passed to the Vulkan
  Memory Allocator.

Stylistic changes:
- Move all device extensions, properties and features to one structure,
  especially simplifying portability subset feature checks, and also making
  it easier to request new extension functionality in the future.
- Remove extension suffixes from usage of promoted extensions.
2024-05-04 22:47:14 +03:00
Triang3l f87c6afdeb [Vulkan] Update headers to 1.3.278 2024-05-04 19:59:28 +03:00
Triang3l 9ebe25fd77 [GPU] Declare unused register fields explicitly 2024-05-02 23:31:13 +03:00
21 changed files with 1218 additions and 1083 deletions

View file

@ -28,6 +28,13 @@ DEFINE_bool(
"the real reason why they're invalid is found.",
"GPU");
DEFINE_bool(
non_seamless_cube_map, true,
"Disable filtering between cube map faces near edges where possible "
"(Vulkan with VK_EXT_non_seamless_cube_map) to reproduce the Direct3D 9 "
"behavior.",
"GPU");
// Extremely bright screen borders in 4D5307E6.
// Reading between texels with half-pixel offset in 58410954.
DEFINE_bool(

View file

@ -20,6 +20,8 @@ DECLARE_bool(vsync);
DECLARE_bool(gpu_allow_invalid_fetch_constants);
DECLARE_bool(non_seamless_cube_map);
DECLARE_bool(half_pixel_offset);
DECLARE_int32(query_occlusion_fake_sample_count);

View file

@ -21,6 +21,9 @@
// Some registers were added on Adreno specifically and are not referenced in
// game .pdb files and never set by games.
// All unused bits are intentionally declared as named fields for stable
// comparisons when register values are constructed or modified by Xenia itself.
// Only 32-bit types (uint32_t, int32_t, float or enums with uint32_t / int32_t
// as the underlying type) are allowed in the bit fields here, as Visual C++
// restarts packing when a field requires different alignment than the previous
@ -58,11 +61,11 @@ union alignas(uint32_t) COHER_STATUS_HOST {
uint32_t dest_base_5_ena : 1; // +14
uint32_t dest_base_6_ena : 1; // +15
uint32_t dest_base_7_ena : 1; // +16
uint32_t : 7; // +17
uint32_t _pad_17 : 7; // +17
uint32_t vc_action_ena : 1; // +24
uint32_t tc_action_ena : 1; // +25
uint32_t pglb_action_ena : 1; // +26
uint32_t : 4; // +27
uint32_t _pad_27 : 4; // +27
uint32_t status : 1; // +31
};
static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST;
@ -72,22 +75,23 @@ static_assert_size(COHER_STATUS_HOST, sizeof(uint32_t));
union alignas(uint32_t) WAIT_UNTIL {
uint32_t value;
struct {
uint32_t : 1; // +0
uint32_t _pad_0 : 1; // +0
uint32_t wait_re_vsync : 1; // +1
uint32_t wait_fe_vsync : 1; // +2
uint32_t wait_vsync : 1; // +3
uint32_t wait_dsply_id0 : 1; // +4
uint32_t wait_dsply_id1 : 1; // +5
uint32_t wait_dsply_id2 : 1; // +6
uint32_t : 3; // +7
uint32_t _pad_7 : 3; // +7
uint32_t wait_cmdfifo : 1; // +10
uint32_t : 3; // +11
uint32_t _pad_11 : 3; // +11
uint32_t wait_2d_idle : 1; // +14
uint32_t wait_3d_idle : 1; // +15
uint32_t wait_2d_idleclean : 1; // +16
uint32_t wait_3d_idleclean : 1; // +17
uint32_t : 2; // +18
uint32_t _pad_18 : 2; // +18
uint32_t cmdfifo_entries : 4; // +20
uint32_t _pad_24 : 8; // +24
};
static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL;
};
@ -129,7 +133,7 @@ union alignas(uint32_t) SQ_CONTEXT_MISC {
uint32_t inst_pred_optimize : 1; // +0
uint32_t sc_output_screen_xy : 1; // +1
xenos::SampleControl sc_sample_cntl : 2; // +2
uint32_t : 4; // +4
uint32_t _pad_4 : 4; // +4
// Pixel shader interpolator (according to the XNA microcode validator -
// limited to the interpolator count, 16, not the total register count of
// 64) index to write pixel parameters to.
@ -209,6 +213,7 @@ union alignas(uint32_t) SQ_CONTEXT_MISC {
uint32_t perfcounter_ref : 1; // +16
uint32_t yeild_optimize : 1; // +17 sic
uint32_t tx_cache_sel : 1; // +18
uint32_t _pad_19 : 13; // +19
};
static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC;
};
@ -229,10 +234,11 @@ static_assert_size(SQ_INTERPOLATOR_CNTL, sizeof(uint32_t));
union alignas(uint32_t) SQ_VS_CONST {
uint32_t value;
struct {
uint32_t base : 9; // +0
uint32_t : 3; // +9
uint32_t base : 9; // +0
uint32_t _pad_9 : 3; // +9
// Vec4 count minus one.
uint32_t size : 9; // 12
uint32_t size : 9; // +12
uint32_t _pad_21 : 11; // +21
};
static constexpr Register register_index = XE_GPU_REG_SQ_VS_CONST;
};
@ -242,10 +248,11 @@ static_assert_size(SQ_VS_CONST, sizeof(uint32_t));
union alignas(uint32_t) SQ_PS_CONST {
uint32_t value;
struct {
uint32_t base : 9; // +0
uint32_t : 3; // +9
uint32_t base : 9; // +0
uint32_t _pad_9 : 3; // +9
// Vec4 count minus one.
uint32_t size : 9; // 12
uint32_t size : 9; // +12
uint32_t _pad_21 : 11; // +21
};
static constexpr Register register_index = XE_GPU_REG_SQ_PS_CONST;
};
@ -273,7 +280,7 @@ union alignas(uint32_t) VGT_DMA_SIZE {
uint32_t value;
struct {
uint32_t num_words : 24; // +0
uint32_t : 6; // +24
uint32_t _pad_24 : 6; // +24
xenos::Endian swap_mode : 2; // +30
};
static constexpr Register register_index = XE_GPU_REG_VGT_DMA_SIZE;
@ -286,10 +293,10 @@ union alignas(uint32_t) VGT_DRAW_INITIATOR {
xenos::PrimitiveType prim_type : 6; // +0
xenos::SourceSelect source_select : 2; // +6
xenos::MajorMode major_mode : 2; // +8
uint32_t : 1; // +10
uint32_t _pad_10 : 1; // +10
xenos::IndexFormat index_size : 1; // +11
uint32_t not_eop : 1; // +12
uint32_t : 3; // +13
uint32_t _pad_13 : 3; // +13
uint32_t num_indices : 16; // +16
};
static constexpr Register register_index = XE_GPU_REG_VGT_DRAW_INITIATOR;
@ -317,7 +324,8 @@ union alignas(uint32_t) VGT_MULTI_PRIM_IB_RESET_INDX {
// the restart index check likely only involves the lower 24 bit of the
// vertex index - therefore, if reset_indx is 0xFFFFFF, likely 0xFFFFFF,
// 0x1FFFFFF, 0xFFFFFFFF all cause primitive reset.
uint32_t reset_indx : 24;
uint32_t reset_indx : 24; // +0
uint32_t _pad_24 : 8; // +24
};
static constexpr Register register_index =
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX;
@ -334,7 +342,8 @@ union alignas(uint32_t) VGT_INDX_OFFSET {
// sign-extending on the host. Direct3D 9 just writes BaseVertexIndex as a
// signed int32 to the entire register, but the upper 8 bits are ignored
// anyway, and that has no effect on offsets that fit in 24 bits.
uint32_t indx_offset : 24;
uint32_t indx_offset : 24; // +0
uint32_t _pad_24 : 8; // +24
};
static constexpr Register register_index = XE_GPU_REG_VGT_INDX_OFFSET;
};
@ -343,7 +352,8 @@ static_assert_size(VGT_INDX_OFFSET, sizeof(uint32_t));
union alignas(uint32_t) VGT_MIN_VTX_INDX {
uint32_t value;
struct {
uint32_t min_indx : 24;
uint32_t min_indx : 24; // +0
uint32_t _pad_24 : 8; // +24
};
static constexpr Register register_index = XE_GPU_REG_VGT_MIN_VTX_INDX;
};
@ -353,7 +363,8 @@ union alignas(uint32_t) VGT_MAX_VTX_INDX {
uint32_t value;
struct {
// Usually 0xFFFF or 0xFFFFFF.
uint32_t max_indx : 24;
uint32_t max_indx : 24; // +0
uint32_t _pad_24 : 8; // +24
};
static constexpr Register register_index = XE_GPU_REG_VGT_MAX_VTX_INDX;
};
@ -363,6 +374,7 @@ union alignas(uint32_t) VGT_OUTPUT_PATH_CNTL {
uint32_t value;
struct {
xenos::VGTOutputPath path_select : 2; // +0
uint32_t _pad_2 : 30; // +2
};
static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL;
};
@ -372,6 +384,7 @@ union alignas(uint32_t) VGT_HOS_CNTL {
uint32_t value;
struct {
xenos::TessellationMode tess_mode : 2; // +0
uint32_t _pad_2 : 30; // +2
};
static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL;
};
@ -430,19 +443,20 @@ union alignas(uint32_t) PA_SU_SC_MODE_CNTL {
uint32_t poly_offset_front_enable : 1; // +11
uint32_t poly_offset_back_enable : 1; // +12
uint32_t poly_offset_para_enable : 1; // +13
uint32_t : 1; // +14
uint32_t _pad_14 : 1; // +14
uint32_t msaa_enable : 1; // +15
uint32_t vtx_window_offset_enable : 1; // +16
// LINE_STIPPLE_ENABLE was added on Adreno.
uint32_t : 2; // +17
uint32_t _pad_17 : 2; // +17
uint32_t provoking_vtx_last : 1; // +19
uint32_t persp_corr_dis : 1; // +20
uint32_t multi_prim_ib_ena : 1; // +21
uint32_t : 1; // +22
uint32_t _pad_22 : 1; // +22
uint32_t quad_order_enable : 1; // +23
uint32_t sc_one_quad_per_clock : 1; // +24
// WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on
// Adreno.
uint32_t _pad_25 : 7; // +25
};
static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL;
};
@ -455,6 +469,7 @@ union alignas(uint32_t) PA_SU_VTX_CNTL {
uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL).
uint32_t round_mode : 2; // +1
uint32_t quant_mode : 3; // +3
uint32_t _pad_6 : 26; // +6
};
static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL;
};
@ -464,7 +479,7 @@ union alignas(uint32_t) PA_SC_MPASS_PS_CNTL {
uint32_t value;
struct {
uint32_t mpass_pix_vec_per_pass : 20; // +0
uint32_t : 11; // +20
uint32_t _pad_20 : 11; // +20
uint32_t mpass_ps_ena : 1; // +31
};
static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL;
@ -482,6 +497,7 @@ union alignas(uint32_t) PA_SC_VIZ_QUERY {
uint32_t kill_pix_post_hi_z : 1; // +7
// not used with d3d
uint32_t kill_pix_post_detail_mask : 1; // +8
uint32_t _pad_9 : 23; // +9
};
static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY;
};
@ -497,7 +513,7 @@ union alignas(uint32_t) PA_CL_CLIP_CNTL {
uint32_t ucp_ena_3 : 1; // +3
uint32_t ucp_ena_4 : 1; // +4
uint32_t ucp_ena_5 : 1; // +5
uint32_t : 8; // +6
uint32_t _pad_6 : 8; // +6
uint32_t ps_ucp_mode : 2; // +14
uint32_t clip_disable : 1; // +16
uint32_t ucp_cull_only_ena : 1; // +17
@ -508,6 +524,7 @@ union alignas(uint32_t) PA_CL_CLIP_CNTL {
uint32_t xy_nan_retain : 1; // +22
uint32_t z_nan_retain : 1; // +23
uint32_t w_nan_retain : 1; // +24
uint32_t _pad_25 : 7; // +25
};
struct {
uint32_t ucp_ena : 6;
@ -526,11 +543,12 @@ union alignas(uint32_t) PA_CL_VTE_CNTL {
uint32_t vport_y_offset_ena : 1; // +3
uint32_t vport_z_scale_ena : 1; // +4
uint32_t vport_z_offset_ena : 1; // +5
uint32_t : 2; // +6
uint32_t _pad_6 : 2; // +6
uint32_t vtx_xy_fmt : 1; // +8
uint32_t vtx_z_fmt : 1; // +9
uint32_t vtx_w0_fmt : 1; // +10
uint32_t perfcounter_ref : 1; // +11
uint32_t _pad_12 : 20; // +12
};
static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL;
};
@ -539,9 +557,10 @@ static_assert_size(PA_CL_VTE_CNTL, sizeof(uint32_t));
union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_TL {
uint32_t value;
struct {
int32_t tl_x : 15; // +0
uint32_t : 1; // +15
int32_t tl_y : 15; // +16
int32_t tl_x : 15; // +0
uint32_t _pad_15 : 1; // +15
int32_t tl_y : 15; // +16
uint32_t _pad_31 : 1; // +31
};
static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL;
};
@ -550,9 +569,10 @@ static_assert_size(PA_SC_SCREEN_SCISSOR_TL, sizeof(uint32_t));
union alignas(uint32_t) PA_SC_SCREEN_SCISSOR_BR {
uint32_t value;
struct {
int32_t br_x : 15; // +0
uint32_t : 1; // +15
int32_t br_y : 15; // +16
int32_t br_x : 15; // +0
uint32_t _pad_15 : 1; // +15
int32_t br_y : 15; // +16
uint32_t _pad_31 : 1; // +31
};
static constexpr Register register_index = XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR;
};
@ -562,8 +582,9 @@ union alignas(uint32_t) PA_SC_WINDOW_OFFSET {
uint32_t value;
struct {
int32_t window_x_offset : 15; // +0
uint32_t : 1; // +15
uint32_t _pad_15 : 1; // +15
int32_t window_y_offset : 15; // +16
uint32_t _pad_31 : 1; // +31
};
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET;
};
@ -573,9 +594,9 @@ union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_TL {
uint32_t value;
struct {
uint32_t tl_x : 14; // +0
uint32_t : 2; // +14
uint32_t _pad_14 : 2; // +14
uint32_t tl_y : 14; // +16
uint32_t : 1; // +30
uint32_t _pad_30 : 1; // +30
uint32_t window_offset_disable : 1; // +31
};
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL;
@ -585,9 +606,10 @@ static_assert_size(PA_SC_WINDOW_SCISSOR_TL, sizeof(uint32_t));
union alignas(uint32_t) PA_SC_WINDOW_SCISSOR_BR {
uint32_t value;
struct {
uint32_t br_x : 14; // +0
uint32_t : 2; // +14
uint32_t br_y : 14; // +16
uint32_t br_x : 14; // +0
uint32_t _pad_14 : 2; // +14
uint32_t br_y : 14; // +16
uint32_t _pad_30 : 2; // +30
};
static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR;
};
@ -610,6 +632,7 @@ union alignas(uint32_t) RB_MODECONTROL {
uint32_t value;
struct {
xenos::ModeControl edram_mode : 3; // +0
uint32_t _pad_3 : 29; // +3
};
static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL;
};
@ -619,7 +642,7 @@ union alignas(uint32_t) RB_SURFACE_INFO {
uint32_t value;
struct {
uint32_t surface_pitch : 14; // +0 in pixels.
uint32_t : 2; // +14
uint32_t _pad_14 : 2; // +14
xenos::MsaaSamples msaa_samples : 2; // +16
uint32_t hiz_pitch : 14; // +18
};
@ -634,7 +657,7 @@ union alignas(uint32_t) RB_COLORCONTROL {
uint32_t alpha_test_enable : 1; // +3
uint32_t alpha_to_mask_enable : 1; // +4
// Everything in between was added on Adreno.
uint32_t : 19; // +5
uint32_t _pad_5 : 19; // +5
// TODO(Triang3l): Redo these tests and possibly flip these vertically in
// the comment and in the actual implementation. It appears that
// gl_FragCoord.y is mirrored as opposed to the actual screen coordinates in
@ -689,9 +712,10 @@ union alignas(uint32_t) RB_COLOR_INFO {
// for convenience and to avoid mistakes.
uint32_t color_base : 11; // +0 in tiles.
uint32_t color_base_bit_11 : 1; // +11
uint32_t : 4; // +12
uint32_t _pad_12 : 4; // +12
xenos::ColorRenderTargetFormat color_format : 4; // +16
int32_t color_exp_bias : 6; // +20
uint32_t _pad_26 : 6; // +26
};
static constexpr Register register_index = XE_GPU_REG_RB_COLOR_INFO;
// RB_COLOR[1-3]_INFO also use this format.
@ -718,6 +742,7 @@ union alignas(uint32_t) RB_COLOR_MASK {
uint32_t write_green3 : 1; // +13
uint32_t write_blue3 : 1; // +14
uint32_t write_alpha3 : 1; // +15
uint32_t _pad_16 : 16; // +16
};
static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK;
};
@ -729,11 +754,12 @@ union alignas(uint32_t) RB_BLENDCONTROL {
xenos::BlendFactor color_srcblend : 5; // +0
xenos::BlendOp color_comb_fcn : 3; // +5
xenos::BlendFactor color_destblend : 5; // +8
uint32_t : 3; // +13
uint32_t _pad_13 : 3; // +13
xenos::BlendFactor alpha_srcblend : 5; // +16
xenos::BlendOp alpha_comb_fcn : 3; // +21
xenos::BlendFactor alpha_destblend : 5; // +24
// BLEND_FORCE_ENABLE and BLEND_FORCE were added on Adreno.
uint32_t _pad_29 : 3; // +29
};
// RB_BLENDCONTROL[0-3] use this format.
static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0;
@ -748,7 +774,7 @@ union alignas(uint32_t) RB_DEPTHCONTROL {
uint32_t z_enable : 1; // +1
uint32_t z_write_enable : 1; // +2
// EARLY_Z_ENABLE was added on Adreno.
uint32_t : 1; // +3
uint32_t _pad_3 : 1; // +3
xenos::CompareFunction zfunc : 3; // +4
uint32_t backface_enable : 1; // +7
xenos::CompareFunction stencilfunc : 3; // +8
@ -770,6 +796,7 @@ union alignas(uint32_t) RB_STENCILREFMASK {
uint32_t stencilref : 8; // +0
uint32_t stencilmask : 8; // +8
uint32_t stencilwritemask : 8; // +16
uint32_t _pad_24 : 8; // +24
};
static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK;
// RB_STENCILREFMASK_BF also uses this format.
@ -784,8 +811,9 @@ union alignas(uint32_t) RB_DEPTH_INFO {
// for convenience and to avoid mistakes.
uint32_t depth_base : 11; // +0 in tiles.
uint32_t depth_base_bit_11 : 1; // +11
uint32_t : 4; // +12
uint32_t _pad_12 : 4; // +12
xenos::DepthRenderTargetFormat depth_format : 1; // +16
uint32_t _pad_17 : 15; // +17
};
static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO;
};
@ -797,13 +825,14 @@ union alignas(uint32_t) RB_COPY_CONTROL {
uint32_t value;
struct {
uint32_t copy_src_select : 3; // +0 Depth is 4.
uint32_t : 1; // +3
uint32_t _pad_3 : 1; // +3
xenos::CopySampleSelect copy_sample_select : 3; // +4
uint32_t : 1; // +7
uint32_t _pad_7 : 1; // +7
uint32_t color_clear_enable : 1; // +8
uint32_t depth_clear_enable : 1; // +9
uint32_t : 10; // +10
uint32_t _pad_10 : 10; // +10
xenos::CopyCommand copy_command : 2; // +20
uint32_t _pad_22 : 10; // +22
};
static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL;
};
@ -818,8 +847,9 @@ union alignas(uint32_t) RB_COPY_DEST_INFO {
xenos::ColorFormat copy_dest_format : 6; // +7
xenos::SurfaceNumberFormat copy_dest_number : 3; // +13
int32_t copy_dest_exp_bias : 6; // +16
uint32_t : 2; // +22
uint32_t _pad_22 : 2; // +22
uint32_t copy_dest_swap : 1; // +24
uint32_t _pad_25 : 7; // +25
};
static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO;
};
@ -829,8 +859,9 @@ union alignas(uint32_t) RB_COPY_DEST_PITCH {
uint32_t value;
struct {
uint32_t copy_dest_pitch : 14; // +0
uint32_t : 2; // +14
uint32_t _pad_14 : 2; // +14
uint32_t copy_dest_height : 14; // +16
uint32_t _pad_30 : 2; // +30
};
static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH;
};
@ -856,6 +887,7 @@ union alignas(uint32_t) DC_LUT_RW_INDEX {
// absolute index, without the lower or upper 10 bits selection in the
// bit 0. For PWL, the bit 7 is ignored.
uint32_t rw_index : 8; // +0
uint32_t _pad_8 : 24; // +8
};
static constexpr Register register_index = XE_GPU_REG_DC_LUT_RW_INDEX;
};
@ -865,6 +897,7 @@ union alignas(uint32_t) DC_LUT_SEQ_COLOR {
uint32_t value;
struct {
uint32_t seq_color : 16; // +0, bits 0:5 are hardwired to zero
uint32_t _pad_16 : 16; // +16
};
static constexpr Register register_index = XE_GPU_REG_DC_LUT_SEQ_COLOR;
};
@ -893,6 +926,7 @@ union alignas(uint32_t) DC_LUT_30_COLOR {
uint32_t color_10_blue : 10; // +0
uint32_t color_10_green : 10; // +10
uint32_t color_10_red : 10; // +20
uint32_t _pad_30 : 2; // +30
};
static constexpr Register register_index = XE_GPU_REG_DC_LUT_30_COLOR;
};

View file

@ -37,56 +37,32 @@ SpirvShaderTranslator::Features::Features(bool all)
full_draw_index_uint32(all),
image_view_format_swizzle(all),
signed_zero_inf_nan_preserve_float32(all),
denorm_flush_to_zero_float32(all) {}
denorm_flush_to_zero_float32(all),
rounding_mode_rte_float32(all) {}
SpirvShaderTranslator::Features::Features(
const ui::vulkan::VulkanProvider& provider)
: max_storage_buffer_range(
provider.device_properties().limits.maxStorageBufferRange),
clip_distance(provider.device_features().shaderClipDistance),
cull_distance(provider.device_features().shaderCullDistance),
demote_to_helper_invocation(
provider.device_extensions().ext_shader_demote_to_helper_invocation &&
provider.device_shader_demote_to_helper_invocation_features()
.shaderDemoteToHelperInvocation),
const ui::vulkan::VulkanProvider::DeviceInfo& device_info)
: max_storage_buffer_range(device_info.maxStorageBufferRange),
clip_distance(device_info.shaderClipDistance),
cull_distance(device_info.shaderCullDistance),
demote_to_helper_invocation(device_info.shaderDemoteToHelperInvocation),
fragment_shader_sample_interlock(
provider.device_extensions().ext_fragment_shader_interlock &&
provider.device_fragment_shader_interlock_features()
.fragmentShaderSampleInterlock),
full_draw_index_uint32(provider.device_features().fullDrawIndexUint32) {
uint32_t device_version = provider.device_properties().apiVersion;
const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions =
provider.device_extensions();
if (device_version >= VK_MAKE_VERSION(1, 2, 0)) {
device_info.fragmentShaderSampleInterlock),
full_draw_index_uint32(device_info.fullDrawIndexUint32),
image_view_format_swizzle(device_info.imageViewFormatSwizzle),
signed_zero_inf_nan_preserve_float32(
device_info.shaderSignedZeroInfNanPreserveFloat32),
denorm_flush_to_zero_float32(device_info.shaderDenormFlushToZeroFloat32),
rounding_mode_rte_float32(device_info.shaderRoundingModeRTEFloat32) {
if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) {
spirv_version = spv::Spv_1_5;
} else if (device_extensions.khr_spirv_1_4) {
} else if (device_info.ext_1_2_VK_KHR_spirv_1_4) {
spirv_version = spv::Spv_1_4;
} else if (device_version >= VK_MAKE_VERSION(1, 1, 0)) {
} else if (device_info.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) {
spirv_version = spv::Spv_1_3;
} else {
spirv_version = spv::Spv_1_0;
}
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (device_portability_subset_features) {
image_view_format_swizzle =
bool(device_portability_subset_features->imageViewFormatSwizzle);
} else {
image_view_format_swizzle = true;
}
if (spirv_version >= spv::Spv_1_4 ||
device_extensions.khr_shader_float_controls) {
const VkPhysicalDeviceFloatControlsPropertiesKHR&
float_controls_properties = provider.device_float_controls_properties();
signed_zero_inf_nan_preserve_float32 =
bool(float_controls_properties.shaderSignedZeroInfNanPreserveFloat32);
denorm_flush_to_zero_float32 =
bool(float_controls_properties.shaderDenormFlushToZeroFloat32);
} else {
signed_zero_inf_nan_preserve_float32 = false;
denorm_flush_to_zero_float32 = false;
}
}
uint64_t SpirvShaderTranslator::GetDefaultVertexShaderModification(
@ -168,7 +144,8 @@ void SpirvShaderTranslator::StartTranslation() {
: spv::CapabilityShader);
if (features_.spirv_version < spv::Spv_1_4) {
if (features_.signed_zero_inf_nan_preserve_float32 ||
features_.denorm_flush_to_zero_float32) {
features_.denorm_flush_to_zero_float32 ||
features_.rounding_mode_rte_float32) {
builder_->addExtension("SPV_KHR_float_controls");
}
}
@ -724,6 +701,11 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
builder_->addExecutionMode(function_main_,
spv::ExecutionModeSignedZeroInfNanPreserve, 32);
}
if (features_.rounding_mode_rte_float32) {
builder_->addCapability(spv::CapabilityRoundingModeRTE);
builder_->addExecutionMode(function_main_,
spv::ExecutionModeRoundingModeRTE, 32);
}
spv::Instruction* entry_point =
builder_->addEntryPoint(execution_model, function_main_, "main");
for (spv::Id interface_id : main_interface_) {

View file

@ -320,7 +320,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
static constexpr uint32_t kSpirvMagicToolId = 26;
struct Features {
explicit Features(const ui::vulkan::VulkanProvider& provider);
explicit Features(
const ui::vulkan::VulkanProvider::DeviceInfo& device_info);
explicit Features(bool all = false);
unsigned int spirv_version;
uint32_t max_storage_buffer_range;
@ -332,6 +333,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
bool image_view_format_swizzle;
bool signed_zero_inf_nan_preserve_float32;
bool denorm_flush_to_zero_float32;
bool rounding_mode_rte_float32;
};
SpirvShaderTranslator(const Features& features,

View file

@ -138,7 +138,8 @@ bool VulkanCommandProcessor::SetupContext() {
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
// The unconditional inclusion of the vertex shader stage also covers the case
// of manual index / factor buffer fetch (the system constants and the shared
@ -147,12 +148,12 @@ bool VulkanCommandProcessor::SetupContext() {
guest_shader_pipeline_stages_ = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
guest_shader_vertex_stages_ = VK_SHADER_STAGE_VERTEX_BIT;
if (device_features.tessellationShader) {
if (device_info.tessellationShader) {
guest_shader_pipeline_stages_ |=
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
guest_shader_vertex_stages_ |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
}
if (!device_features.vertexPipelineStoresAndAtomics) {
if (!device_info.vertexPipelineStoresAndAtomics) {
// For memory export from vertex shaders converted to compute shaders.
guest_shader_pipeline_stages_ |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
guest_shader_vertex_stages_ |= VK_SHADER_STAGE_COMPUTE_BIT;
@ -160,14 +161,11 @@ bool VulkanCommandProcessor::SetupContext() {
// 16384 is bigger than any single uniform buffer that Xenia needs, but is the
// minimum maxUniformBufferRange, thus the safe minimum amount.
VkDeviceSize uniform_buffer_alignment = std::max(
provider.device_properties().limits.minUniformBufferOffsetAlignment,
VkDeviceSize(1));
uniform_buffer_pool_ = std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize,
size_t(16384)),
size_t(uniform_buffer_alignment)));
size_t(device_info.minUniformBufferOffsetAlignment)));
// Descriptor set layouts that don't depend on the setup of other subsystems.
VkShaderStageFlags guest_shader_stages =
@ -201,10 +199,9 @@ bool VulkanCommandProcessor::SetupContext() {
[SpirvShaderTranslator::kConstantBufferSystem]
.stageFlags =
guest_shader_stages |
(device_features.tessellationShader
? VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT
: 0) |
(device_features.geometryShader ? VK_SHADER_STAGE_GEOMETRY_BIT : 0);
(device_info.tessellationShader ? VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT
: 0) |
(device_info.geometryShader ? VK_SHADER_STAGE_GEOMETRY_BIT : 0);
descriptor_set_layout_bindings_constants
[SpirvShaderTranslator::kConstantBufferFloatVertex]
.stageFlags = guest_shader_vertex_stages_;
@ -283,7 +280,7 @@ bool VulkanCommandProcessor::SetupContext() {
uint32_t shared_memory_binding_count_log2 =
SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2(
provider.device_properties().limits.maxStorageBufferRange);
device_info.maxStorageBufferRange);
uint32_t shared_memory_binding_count = UINT32_C(1)
<< shared_memory_binding_count_log2;
@ -487,14 +484,14 @@ bool VulkanCommandProcessor::SetupContext() {
&gamma_ramp_host_visible_buffer_memory_requirements);
uint32_t gamma_ramp_host_visible_buffer_memory_types =
gamma_ramp_host_visible_buffer_memory_requirements.memoryTypeBits &
(provider.memory_types_device_local() &
provider.memory_types_host_visible());
(device_info.memory_types_device_local &
device_info.memory_types_host_visible);
VkMemoryAllocateInfo gamma_ramp_host_visible_buffer_memory_allocate_info;
// Prefer a host-uncached (because it's write-only) memory type, but try a
// host-cached host-visible device-local one as well.
if (xe::bit_scan_forward(
gamma_ramp_host_visible_buffer_memory_types &
~provider.memory_types_host_cached(),
~device_info.memory_types_host_cached,
&(gamma_ramp_host_visible_buffer_memory_allocate_info
.memoryTypeIndex)) ||
xe::bit_scan_forward(
@ -509,16 +506,16 @@ bool VulkanCommandProcessor::SetupContext() {
gamma_ramp_host_visible_buffer_memory_allocate_info.pNext = nullptr;
gamma_ramp_host_visible_buffer_memory_allocate_info.allocationSize =
gamma_ramp_host_visible_buffer_memory_requirements.size;
VkMemoryDedicatedAllocateInfoKHR
VkMemoryDedicatedAllocateInfo
gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info;
if (provider.device_extensions().khr_dedicated_allocation) {
if (device_info.ext_1_1_VK_KHR_dedicated_allocation) {
gamma_ramp_host_visible_buffer_memory_allocate_info_last->pNext =
&gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info;
gamma_ramp_host_visible_buffer_memory_allocate_info_last =
reinterpret_cast<VkMemoryAllocateInfo*>(
&gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info);
gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.pNext =
nullptr;
gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.image =
@ -2419,10 +2416,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
current_guest_graphics_pipeline_layout_ = pipeline_layout;
}
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const VkPhysicalDeviceLimits& device_limits =
provider.device_properties().limits;
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
GetVulkanProvider().device_info();
bool host_render_targets_used = render_target_cache_->GetPath() ==
RenderTargetCache::Path::kHostRenderTargets;
@ -2446,8 +2441,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
// interlocks case completely - apply the viewport and the scissor offset
// directly to pixel address and to things like ps_param_gen.
draw_util::GetHostViewportInfo(
regs, 1, 1, false, device_limits.maxViewportDimensions[0],
device_limits.maxViewportDimensions[1], true, normalized_depth_control,
regs, 1, 1, false, device_info.maxViewportDimensions[0],
device_info.maxViewportDimensions[1], true, normalized_depth_control,
false, host_render_targets_used,
pixel_shader && pixel_shader->writes_depth(), viewport_info);
@ -2461,7 +2456,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
// indirectly in the vertex shader if full 32-bit indices are not supported by
// the host.
bool shader_32bit_index_dma =
!device_features.fullDrawIndexUint32 &&
!device_info.fullDrawIndexUint32 &&
primitive_processing_result.index_buffer_type ==
PrimitiveProcessor::ProcessedIndexBufferType::kGuestDMA &&
vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32 &&
@ -3315,21 +3310,16 @@ void VulkanCommandProcessor::UpdateDynamicState(
if (normalized_depth_control.stencil_enable) {
Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg;
if (primitive_polygonal && normalized_depth_control.backface_enable) {
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (!device_portability_subset_features ||
device_portability_subset_features->separateStencilMaskRef) {
if (GetVulkanProvider().device_info().separateStencilMaskRef) {
stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK;
stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
} else {
// Choose the back face values only if drawing only back faces.
stencil_ref_mask_front_reg =
regs.Get<reg::PA_SU_SC_MODE_CNTL>().cull_front
? XE_GPU_REG_RB_STENCILREFMASK_BF
: XE_GPU_REG_RB_STENCILREFMASK;
stencil_ref_mask_back_reg = stencil_ref_mask_front_reg;
} else {
stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK;
stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
}
} else {
stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK;
@ -3681,12 +3671,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
}
// Texture host swizzle in the shader.
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (device_portability_subset_features &&
!device_portability_subset_features->imageViewFormatSwizzle) {
if (!GetVulkanProvider().device_info().imageViewFormatSwizzle) {
uint32_t textures_remaining = used_texture_mask;
uint32_t texture_index;
while (xe::bit_scan_forward(textures_remaining, &texture_index)) {
@ -3968,8 +3953,8 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
kAllConstantBuffersMask) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants);
size_t uniform_buffer_alignment = size_t(
provider.device_properties().limits.minUniformBufferOffsetAlignment);
size_t uniform_buffer_alignment =
size_t(provider.device_info().minUniformBufferOffsetAlignment);
// System constants.
if (!(current_constant_buffers_up_to_date_ &
(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem))) {
@ -4348,8 +4333,7 @@ uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding(
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
uint8_t* mapping = uniform_buffer_pool_->Request(
frame_current_, size,
size_t(
provider.device_properties().limits.minUniformBufferOffsetAlignment),
size_t(provider.device_info().minUniformBufferOffsetAlignment),
descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset);
if (!mapping) {
return nullptr;

View file

@ -59,7 +59,7 @@ bool VulkanPipelineCache::Initialize() {
RenderTargetCache::Path::kPixelShaderInterlock;
shader_translator_ = std::make_unique<SpirvShaderTranslator>(
SpirvShaderTranslator::Features(provider),
SpirvShaderTranslator::Features(provider.device_info()),
render_target_cache_.msaa_2x_attachments_supported(),
render_target_cache_.msaa_2x_no_attachments_supported(),
edram_fragment_shader_interlock);
@ -471,13 +471,9 @@ void VulkanPipelineCache::WritePipelineRenderTargetDescription(
render_target_out.dst_alpha_blend_factor =
kBlendFactorMap[uint32_t(blend_control.alpha_destblend)];
render_target_out.alpha_blend_op = blend_control.alpha_comb_fcn;
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (device_portability_subset_features &&
!device_portability_subset_features->constantAlphaColorBlendFactors) {
if (!command_processor_.GetVulkanProvider()
.device_info()
.constantAlphaColorBlendFactors) {
if (blend_control.color_srcblend == xenos::BlendFactor::kConstantAlpha) {
render_target_out.src_color_blend_factor =
PipelineBlendFactor::kConstantColor;
@ -516,12 +512,8 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
PipelineDescription& description_out) const {
description_out.Reset();
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
const RegisterFile& regs = register_file_;
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
@ -556,8 +548,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
break;
case xenos::PrimitiveType::kTriangleFan:
// The check should be performed at primitive processing time.
assert_true(!device_portability_subset_features ||
device_portability_subset_features->triangleFans);
assert_true(device_info.triangleFans);
primitive_topology = PipelinePrimitiveTopology::kTriangleFan;
break;
case xenos::PrimitiveType::kTriangleStrip:
@ -581,8 +572,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
primitive_processing_result.host_primitive_reset_enabled;
description_out.depth_clamp_enable =
device_features.depthClamp &&
regs.Get<reg::PA_CL_CLIP_CNTL>().clip_disable;
device_info.depthClamp && regs.Get<reg::PA_CL_CLIP_CNTL>().clip_disable;
// TODO(Triang3l): Tessellation.
bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs);
@ -597,7 +587,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
bool cull_back = pa_su_sc_mode_cntl.cull_back;
description_out.cull_front = cull_front;
description_out.cull_back = cull_back;
if (device_features.fillModeNonSolid) {
if (device_info.fillModeNonSolid) {
xenos::PolygonType polygon_type = xenos::PolygonType::kTriangles;
if (!cull_front) {
polygon_type =
@ -614,11 +604,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
case xenos::PolygonType::kPoints:
// When points are not supported, use lines instead, preserving
// debug-like purpose.
description_out.polygon_mode =
(!device_portability_subset_features ||
device_portability_subset_features->pointPolygons)
? PipelinePolygonMode::kPoint
: PipelinePolygonMode::kLine;
description_out.polygon_mode = device_info.pointPolygons
? PipelinePolygonMode::kPoint
: PipelinePolygonMode::kLine;
break;
case xenos::PolygonType::kLines:
description_out.polygon_mode = PipelinePolygonMode::kLine;
@ -683,7 +671,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
// Color blending and write masks (filled only for the attachments present
// in the render pass object).
uint32_t render_pass_color_rts = render_pass_key.depth_and_color_used >> 1;
if (device_features.independentBlend) {
if (device_info.independentBlend) {
uint32_t render_pass_color_rts_remaining = render_pass_color_rts;
uint32_t color_rt_index;
while (xe::bit_scan_forward(render_pass_color_rts_remaining,
@ -779,63 +767,35 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet(
return false;
}
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (device_portability_subset_features) {
if (description.primitive_topology ==
PipelinePrimitiveTopology::kTriangleFan &&
!device_portability_subset_features->triangleFans) {
return false;
}
if (description.polygon_mode == PipelinePolygonMode::kPoint &&
!device_portability_subset_features->pointPolygons) {
return false;
}
if (!device_portability_subset_features->constantAlphaColorBlendFactors) {
uint32_t color_rts_remaining =
description.render_pass_key.depth_and_color_used >> 1;
uint32_t color_rt_index;
while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) {
color_rts_remaining &= ~(uint32_t(1) << color_rt_index);
const PipelineRenderTarget& color_rt =
description.render_targets[color_rt_index];
if (color_rt.src_color_blend_factor ==
PipelineBlendFactor::kConstantAlpha ||
color_rt.src_color_blend_factor ==
PipelineBlendFactor::kOneMinusConstantAlpha ||
color_rt.dst_color_blend_factor ==
PipelineBlendFactor::kConstantAlpha ||
color_rt.dst_color_blend_factor ==
PipelineBlendFactor::kOneMinusConstantAlpha) {
return false;
}
}
}
}
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
if (!device_features.geometryShader &&
if (!device_info.geometryShader &&
description.geometry_shader != PipelineGeometryShader::kNone) {
return false;
}
if (!device_features.depthClamp && description.depth_clamp_enable) {
if (!device_info.triangleFans &&
description.primitive_topology ==
PipelinePrimitiveTopology::kTriangleFan) {
return false;
}
if (!device_features.fillModeNonSolid &&
if (!device_info.depthClamp && description.depth_clamp_enable) {
return false;
}
if (!device_info.pointPolygons &&
description.polygon_mode == PipelinePolygonMode::kPoint) {
return false;
}
if (!device_info.fillModeNonSolid &&
description.polygon_mode != PipelinePolygonMode::kFill) {
return false;
}
if (!device_features.independentBlend) {
if (!device_info.independentBlend) {
uint32_t color_rts_remaining =
description.render_pass_key.depth_and_color_used >> 1;
uint32_t first_color_rt_index;
@ -865,6 +825,27 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet(
}
}
if (!device_info.constantAlphaColorBlendFactors) {
uint32_t color_rts_remaining =
description.render_pass_key.depth_and_color_used >> 1;
uint32_t color_rt_index;
while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) {
color_rts_remaining &= ~(uint32_t(1) << color_rt_index);
const PipelineRenderTarget& color_rt =
description.render_targets[color_rt_index];
if (color_rt.src_color_blend_factor ==
PipelineBlendFactor::kConstantAlpha ||
color_rt.src_color_blend_factor ==
PipelineBlendFactor::kOneMinusConstantAlpha ||
color_rt.dst_color_blend_factor ==
PipelineBlendFactor::kConstantAlpha ||
color_rt.dst_color_blend_factor ==
PipelineBlendFactor::kOneMinusConstantAlpha) {
return false;
}
}
}
return true;
}
@ -1913,7 +1894,8 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
bool edram_fragment_shader_interlock =
render_target_cache_.GetPath() ==
@ -2222,7 +2204,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
}
color_blend_attachment.colorWriteMask =
VkColorComponentFlags(color_rt.color_write_mask);
if (!device_features.independentBlend) {
if (!device_info.independentBlend) {
// For non-independent blend, the pAttachments element for the first
// actually used color will be replicated into all.
break;
@ -2231,7 +2213,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
}
color_blend_state.attachmentCount = 32 - xe::lzcnt(color_rts_used);
color_blend_state.pAttachments = color_blend_attachments;
if (color_rts_used && !device_features.independentBlend) {
if (color_rts_used && !device_info.independentBlend) {
// "If the independent blending feature is not enabled, all elements of
// pAttachments must be identical."
uint32_t first_color_rt_index;

View file

@ -27,18 +27,12 @@ namespace vulkan {
VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); }
bool VulkanPrimitiveProcessor::Initialize() {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (!InitializeCommon(device_features.fullDrawIndexUint32,
!device_portability_subset_features ||
device_portability_subset_features->triangleFans,
false, device_features.geometryShader,
device_features.geometryShader,
device_features.geometryShader)) {
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
if (!InitializeCommon(device_info.fullDrawIndexUint32,
device_info.triangleFans, false,
device_info.geometryShader, device_info.geometryShader,
device_info.geometryShader)) {
Shutdown();
return false;
}

View file

@ -213,8 +213,8 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) {
VkPhysicalDevice physical_device = provider.physical_device();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDeviceLimits& device_limits =
provider.device_properties().limits;
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
if (cvars::render_target_path_vulkan == "fsi") {
path_ = Path::kPixelShaderInterlock;
@ -226,11 +226,6 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) {
// OpenGL ES 3.1. Thus, it's fine to demand a wide range of other optional
// features for the fragment shader interlock backend to work.
if (path_ == Path::kPixelShaderInterlock) {
const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT&
device_fragment_shader_interlock_features =
provider.device_fragment_shader_interlock_features();
const VkPhysicalDeviceFeatures& device_features =
provider.device_features();
// Interlocking between fragments with common sample coverage is enough, but
// interlocking more is acceptable too (fragmentShaderShadingRateInterlock
// would be okay too, but it's unlikely that an implementation would
@ -248,16 +243,13 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) {
// between, for instance, the ability to vfetch and memexport in fragment
// shaders, and the usage of fragment shader interlock, prefer the former
// for simplicity.
if (!provider.device_extensions().ext_fragment_shader_interlock ||
!(device_fragment_shader_interlock_features
.fragmentShaderSampleInterlock ||
device_fragment_shader_interlock_features
.fragmentShaderPixelInterlock) ||
!device_features.fragmentStoresAndAtomics ||
!device_features.sampleRateShading ||
!device_limits.standardSampleLocations ||
if (!(device_info.fragmentShaderSampleInterlock ||
device_info.fragmentShaderPixelInterlock) ||
!device_info.fragmentStoresAndAtomics ||
!device_info.sampleRateShading ||
!device_info.standardSampleLocations ||
shared_memory_binding_count >=
device_limits.maxDescriptorSetStorageBuffers) {
device_info.maxPerStageDescriptorStorageBuffers) {
path_ = Path::kHostRenderTargets;
}
}
@ -279,18 +271,17 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) {
if (cvars::native_2x_msaa) {
// Multisampled integer sampled images are optional in Vulkan and in Xenia.
msaa_2x_attachments_supported_ =
(device_limits.framebufferColorSampleCounts &
device_limits.framebufferDepthSampleCounts &
device_limits.framebufferStencilSampleCounts &
device_limits.sampledImageColorSampleCounts &
device_limits.sampledImageDepthSampleCounts &
device_limits.sampledImageStencilSampleCounts &
VK_SAMPLE_COUNT_2_BIT) &&
(device_limits.sampledImageIntegerSampleCounts &
(device_info.framebufferColorSampleCounts &
device_info.framebufferDepthSampleCounts &
device_info.framebufferStencilSampleCounts &
device_info.sampledImageColorSampleCounts &
device_info.sampledImageDepthSampleCounts &
device_info.sampledImageStencilSampleCounts & VK_SAMPLE_COUNT_2_BIT) &&
(device_info.sampledImageIntegerSampleCounts &
(VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT)) !=
VK_SAMPLE_COUNT_4_BIT;
msaa_2x_no_attachments_supported_ =
(device_limits.framebufferNoAttachmentsSampleCounts &
(device_info.framebufferNoAttachmentsSampleCounts &
VK_SAMPLE_COUNT_2_BIT) != 0;
} else {
msaa_2x_attachments_supported_ = false;
@ -847,10 +838,10 @@ bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) {
fsi_framebuffer_create_info.pAttachments = nullptr;
fsi_framebuffer_create_info.width = std::min(
xenos::kTexture2DCubeMaxWidthHeight * draw_resolution_scale_x(),
device_limits.maxFramebufferWidth);
device_info.maxFramebufferWidth);
fsi_framebuffer_create_info.height = std::min(
xenos::kTexture2DCubeMaxWidthHeight * draw_resolution_scale_y(),
device_limits.maxFramebufferHeight);
device_info.maxFramebufferHeight);
fsi_framebuffer_create_info.layers = 1;
if (dfn.vkCreateFramebuffer(device, &fsi_framebuffer_create_info, nullptr,
&fsi_framebuffer_.framebuffer) != VK_SUCCESS) {
@ -1680,17 +1671,17 @@ VulkanRenderTargetCache::VulkanRenderTarget::~VulkanRenderTarget() {
}
uint32_t VulkanRenderTargetCache::GetMaxRenderTargetWidth() const {
const VkPhysicalDeviceLimits& device_limits =
command_processor_.GetVulkanProvider().device_properties().limits;
return std::min(device_limits.maxFramebufferWidth,
device_limits.maxImageDimension2D);
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
return std::min(device_info.maxFramebufferWidth,
device_info.maxImageDimension2D);
}
uint32_t VulkanRenderTargetCache::GetMaxRenderTargetHeight() const {
const VkPhysicalDeviceLimits& device_limits =
command_processor_.GetVulkanProvider().device_properties().limits;
return std::min(device_limits.maxFramebufferHeight,
device_limits.maxImageDimension2D);
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
return std::min(device_info.maxFramebufferHeight,
device_info.maxImageDimension2D);
}
RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget(
@ -2084,8 +2075,8 @@ VulkanRenderTargetCache::GetHostRenderTargetsFramebuffer(
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDeviceLimits& device_limits =
provider.device_properties().limits;
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
VkRenderPass render_pass = GetHostRenderTargetsRenderPass(render_pass_key);
if (render_pass == VK_NULL_HANDLE) {
@ -2134,9 +2125,9 @@ VulkanRenderTargetCache::GetHostRenderTargetsFramebuffer(
// there's no limit imposed by the sizes of the attachments that have been
// created successfully.
host_extent.width = std::min(host_extent.width * draw_resolution_scale_x(),
device_limits.maxFramebufferWidth);
device_info.maxFramebufferWidth);
host_extent.height = std::min(host_extent.height * draw_resolution_scale_y(),
device_limits.maxFramebufferHeight);
device_info.maxFramebufferHeight);
framebuffer_create_info.width = host_extent.width;
framebuffer_create_info.height = host_extent.height;
framebuffer_create_info.layers = 1;
@ -2161,7 +2152,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
std::vector<spv::Id> id_vector_temp;
std::vector<unsigned int> uint_vector_temp;
@ -2249,7 +2241,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
// Outputs.
bool shader_uses_stencil_reference_output =
mode.output == TransferOutput::kDepth &&
provider.device_extensions().ext_shader_stencil_export;
provider.device_info().ext_VK_EXT_shader_stencil_export;
bool dest_color_is_uint = false;
uint32_t dest_color_component_count = 0;
spv::Id type_fragment_data_component = spv::NoResult;
@ -2485,7 +2477,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
spv::Id input_sample_id = spv::NoResult;
spv::Id spec_const_sample_id = spv::NoResult;
if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) {
if (device_features.sampleRateShading) {
if (device_info.sampleRateShading) {
// One draw for all samples.
builder.addCapability(spv::CapabilitySampleRateShading);
input_sample_id = builder.createVariable(
@ -2579,7 +2571,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
// Load the destination sample index.
spv::Id dest_sample_id = spv::NoResult;
if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) {
if (device_features.sampleRateShading) {
if (device_info.sampleRateShading) {
assert_true(input_sample_id != spv::NoResult);
dest_sample_id = builder.createUnaryOp(
spv::OpBitcast, type_uint,
@ -4242,12 +4234,13 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines(
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
uint32_t dest_sample_count = uint32_t(1)
<< uint32_t(key.shader_key.dest_msaa_samples);
bool dest_is_masked_sample =
dest_sample_count > 1 && !device_features.sampleRateShading;
dest_sample_count > 1 && !device_info.sampleRateShading;
VkPipelineShaderStageCreateInfo shader_stages[2];
shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
@ -4339,7 +4332,7 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines(
? VK_SAMPLE_COUNT_4_BIT
: VkSampleCountFlagBits(dest_sample_count);
if (dest_sample_count > 1) {
if (device_features.sampleRateShading) {
if (device_info.sampleRateShading) {
multisample_state.sampleShadingEnable = VK_TRUE;
multisample_state.minSampleShading = 1.0f;
if (dest_sample_count == 2 && !msaa_2x_attachments_supported_) {
@ -4370,7 +4363,7 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines(
: VK_COMPARE_OP_ALWAYS;
}
if ((mode.output == TransferOutput::kDepth &&
provider.device_extensions().ext_shader_stencil_export) ||
provider.device_info().ext_VK_EXT_shader_stencil_export) ||
mode.output == TransferOutput::kStencilBit) {
depth_stencil_state.stencilTestEnable = VK_TRUE;
depth_stencil_state.front.failOp = VK_STENCIL_OP_KEEP;
@ -4398,7 +4391,7 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines(
32 - xe::lzcnt(key.render_pass_key.depth_and_color_used >> 1);
color_blend_state.pAttachments = color_blend_attachments;
if (mode.output == TransferOutput::kColor) {
if (device_features.independentBlend) {
if (device_info.independentBlend) {
// State the intention more explicitly.
color_blend_attachments[key.shader_key.dest_color_rt_index]
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
@ -4505,13 +4498,8 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
const Transfer::Rectangle* resolve_clear_rectangle) {
assert_true(GetPath() == Path::kHostRenderTargets);
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceLimits& device_limits =
provider.device_properties().limits;
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
bool shader_stencil_export =
provider.device_extensions().ext_shader_stencil_export;
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
uint64_t current_submission = command_processor_.GetCurrentSubmission();
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
@ -4826,7 +4814,7 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
// Gather shader keys and sort to reduce pipeline state and binding
// switches. Also gather stencil rectangles to clear if needed.
bool need_stencil_bit_draws =
dest_rt_key.is_depth && !shader_stencil_export;
dest_rt_key.is_depth && !device_info.ext_VK_EXT_shader_stencil_export;
current_transfer_invocations_.clear();
current_transfer_invocations_.reserve(
current_transfers.size() << uint32_t(need_stencil_bit_draws));
@ -5018,10 +5006,10 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
transfer_viewport.y = 0.0f;
transfer_viewport.width =
float(std::min(xe::next_pow2(transfer_framebuffer->host_extent.width),
device_limits.maxViewportDimensions[0]));
device_info.maxViewportDimensions[0]));
transfer_viewport.height = float(
std::min(xe::next_pow2(transfer_framebuffer->host_extent.height),
device_limits.maxViewportDimensions[1]));
device_info.maxViewportDimensions[1]));
transfer_viewport.minDepth = 0.0f;
transfer_viewport.maxDepth = 1.0f;
command_processor_.SetViewport(transfer_viewport);
@ -5072,7 +5060,7 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
kTransferPipelineLayoutInfos[size_t(
transfer_pipeline_layout_index)];
uint32_t transfer_sample_pipeline_count =
device_features.sampleRateShading
device_info.sampleRateShading
? 1
: uint32_t(1) << uint32_t(dest_rt_key.msaa_samples);
bool transfer_is_stencil_bit =

View file

@ -51,7 +51,8 @@ bool VulkanSharedMemory::Initialize() {
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
const VkBufferCreateFlags sparse_flags =
VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
@ -69,16 +70,14 @@ bool VulkanSharedMemory::Initialize() {
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_create_info.queueFamilyIndexCount = 0;
buffer_create_info.pQueueFamilyIndices = nullptr;
if (cvars::vulkan_sparse_shared_memory &&
provider.IsSparseBindingSupported() &&
device_features.sparseResidencyBuffer) {
if (cvars::vulkan_sparse_shared_memory && device_info.sparseResidencyBuffer) {
if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) ==
VK_SUCCESS) {
VkMemoryRequirements buffer_memory_requirements;
dfn.vkGetBufferMemoryRequirements(device, buffer_,
&buffer_memory_requirements);
if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
provider.memory_types_device_local(),
device_info.memory_types_device_local,
&buffer_memory_type_)) {
uint32_t allocation_size_log2;
xe::bit_scan_forward(
@ -131,7 +130,7 @@ bool VulkanSharedMemory::Initialize() {
dfn.vkGetBufferMemoryRequirements(device, buffer_,
&buffer_memory_requirements);
if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
provider.memory_types_device_local(),
device_info.memory_types_device_local,
&buffer_memory_type_)) {
XELOGE(
"Shared memory: Failed to get a device-local Vulkan memory type for "
@ -147,15 +146,15 @@ bool VulkanSharedMemory::Initialize() {
buffer_memory_allocate_info.allocationSize =
buffer_memory_requirements.size;
buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info;
if (provider.device_extensions().khr_dedicated_allocation) {
VkMemoryDedicatedAllocateInfo buffer_memory_dedicated_allocate_info;
if (provider.device_info().ext_1_1_VK_KHR_dedicated_allocation) {
buffer_memory_allocate_info_last->pNext =
&buffer_memory_dedicated_allocate_info;
buffer_memory_allocate_info_last =
reinterpret_cast<VkMemoryAllocateInfo*>(
&buffer_memory_dedicated_allocate_info);
buffer_memory_dedicated_allocate_info.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
buffer_memory_dedicated_allocate_info.pNext = nullptr;
buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE;
buffer_memory_dedicated_allocate_info.buffer = buffer_;
@ -366,7 +365,7 @@ bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange(
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
if (provider.device_features().tessellationShader) {
if (provider.device_info().tessellationShader) {
bind_wait_stage_mask |=
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
}

View file

@ -17,6 +17,7 @@
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/texture_util.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
@ -144,17 +145,17 @@ const VulkanTextureCache::HostFormatPair
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG,
true},
// k_Cr_Y1_Cb_Y0_REP
// VK_FORMAT_G8B8G8R8_422_UNORM_KHR (added in
// VK_FORMAT_G8B8G8R8_422_UNORM (added in
// VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is
// optional.
{{kLoadShaderIndex32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, true},
{{kLoadShaderIndex32bpb, VK_FORMAT_G8B8G8R8_422_UNORM, true},
{kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM},
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_Y1_Cr_Y0_Cb_REP
// VK_FORMAT_B8G8R8G8_422_UNORM_KHR (added in
// VK_FORMAT_B8G8R8G8_422_UNORM (added in
// VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is
// optional.
{{kLoadShaderIndex32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, true},
{{kLoadShaderIndex32bpb, VK_FORMAT_B8G8R8G8_422_UNORM, true},
{kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM},
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_16_16_EDRAM
@ -760,9 +761,11 @@ VkSampler VulkanTextureCache::UseSampler(SamplerParameters parameters,
// GetSamplerParameters.
VkSamplerCreateInfo sampler_create_info = {};
sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
// TODO(Triang3l): VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT if
// VK_EXT_non_seamless_cube_map and the nonSeamlessCubeMap feature are
// supported.
if (provider.device_info().nonSeamlessCubeMap &&
cvars::non_seamless_cube_map) {
sampler_create_info.flags |=
VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
}
sampler_create_info.magFilter =
parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
sampler_create_info.minFilter =
@ -778,15 +781,15 @@ VkSampler VulkanTextureCache::UseSampler(SamplerParameters parameters,
// kClampToEdge
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
// kMirrorClampToEdge
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR,
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE,
// kClampToHalfway
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
// kMirrorClampToHalfway
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR,
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE,
// kClampToBorder
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
// kMirrorClampToBorder
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR,
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE,
};
sampler_create_info.addressModeU =
kAddressModeMap[uint32_t(parameters.clamp_x)];
@ -938,19 +941,17 @@ uint32_t VulkanTextureCache::GetHostFormatSwizzle(TextureKey key) const {
uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight(
xenos::DataDimension dimension) const {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceLimits& device_limits =
provider.device_properties().limits;
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
switch (dimension) {
case xenos::DataDimension::k1D:
case xenos::DataDimension::k2DOrStacked:
// 1D and 2D are emulated as 2D arrays.
return device_limits.maxImageDimension2D;
return device_info.maxImageDimension2D;
case xenos::DataDimension::k3D:
return device_limits.maxImageDimension3D;
return device_info.maxImageDimension3D;
case xenos::DataDimension::kCube:
return device_limits.maxImageDimensionCube;
return device_info.maxImageDimensionCube;
default:
assert_unhandled_case(dimension);
return 0;
@ -959,17 +960,15 @@ uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight(
uint32_t VulkanTextureCache::GetMaxHostTextureDepthOrArraySize(
xenos::DataDimension dimension) const {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceLimits& device_limits =
provider.device_properties().limits;
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
command_processor_.GetVulkanProvider().device_info();
switch (dimension) {
case xenos::DataDimension::k1D:
case xenos::DataDimension::k2DOrStacked:
// 1D and 2D are emulated as 2D arrays.
return device_limits.maxImageArrayLayers;
return device_info.maxImageArrayLayers;
case xenos::DataDimension::k3D:
return device_limits.maxImageDimension3D;
return device_info.maxImageDimension3D;
case xenos::DataDimension::kCube:
// Not requesting the imageCubeArray feature, and the Xenos doesn't
// support cube map arrays.
@ -1049,14 +1048,14 @@ std::unique_ptr<TextureCache::Texture> VulkanTextureCache::CreateTexture(
image_create_info.queueFamilyIndexCount = 0;
image_create_info.pQueueFamilyIndices = nullptr;
image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImageFormatListCreateInfoKHR image_format_list_create_info;
VkImageFormatListCreateInfo image_format_list_create_info;
if (formats[1] != VK_FORMAT_UNDEFINED &&
provider.device_extensions().khr_image_format_list) {
provider.device_info().ext_1_2_VK_KHR_image_format_list) {
image_create_info_last->pNext = &image_format_list_create_info;
image_create_info_last =
reinterpret_cast<VkImageCreateInfo*>(&image_format_list_create_info);
image_format_list_create_info.sType =
VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR;
VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO;
image_format_list_create_info.pNext = nullptr;
image_format_list_create_info.viewFormatCount = 2;
image_format_list_create_info.pViewFormats = formats;
@ -1635,11 +1634,7 @@ VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed,
const ui::vulkan::VulkanProvider& provider =
vulkan_texture_cache.command_processor_.GetVulkanProvider();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (device_portability_subset_features &&
!device_portability_subset_features->imageViewFormatSwizzle) {
if (!provider.device_info().imageViewFormatSwizzle) {
host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA;
}
view_key.host_swizzle = host_swizzle;
@ -1716,9 +1711,8 @@ bool VulkanTextureCache::Initialize() {
VkPhysicalDevice physical_device = provider.physical_device();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
const ui::vulkan::VulkanProvider::DeviceInfo& device_info =
provider.device_info();
// Vulkan Memory Allocator.
@ -2476,15 +2470,15 @@ bool VulkanTextureCache::Initialize() {
null_image_memory_requirements_2d_array_cube_.size;
null_image_memory_allocate_info.memoryTypeIndex =
null_image_memory_type_2d_array_cube_;
VkMemoryDedicatedAllocateInfoKHR null_image_memory_dedicated_allocate_info;
if (provider.device_extensions().khr_dedicated_allocation) {
VkMemoryDedicatedAllocateInfo null_image_memory_dedicated_allocate_info;
if (device_info.ext_1_1_VK_KHR_dedicated_allocation) {
null_image_memory_allocate_info_last->pNext =
&null_image_memory_dedicated_allocate_info;
null_image_memory_allocate_info_last =
reinterpret_cast<VkMemoryAllocateInfo*>(
&null_image_memory_dedicated_allocate_info);
null_image_memory_dedicated_allocate_info.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
null_image_memory_dedicated_allocate_info.pNext = nullptr;
null_image_memory_dedicated_allocate_info.image =
null_image_2d_array_cube_;
@ -2538,10 +2532,8 @@ bool VulkanTextureCache::Initialize() {
// constant components instead of the real texels. The image will be cleared
// to (0, 0, 0, 0) anyway.
VkComponentSwizzle null_image_view_swizzle =
(!device_portability_subset_features ||
device_portability_subset_features->imageViewFormatSwizzle)
? VK_COMPONENT_SWIZZLE_ZERO
: VK_COMPONENT_SWIZZLE_IDENTITY;
device_info.imageViewFormatSwizzle ? VK_COMPONENT_SWIZZLE_ZERO
: VK_COMPONENT_SWIZZLE_IDENTITY;
null_image_view_create_info.components.r = null_image_view_swizzle;
null_image_view_create_info.components.g = null_image_view_swizzle;
null_image_view_create_info.components.b = null_image_view_swizzle;
@ -2574,10 +2566,6 @@ bool VulkanTextureCache::Initialize() {
// Samplers.
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const VkPhysicalDeviceLimits& device_limits =
provider.device_properties().limits;
// Some MoltenVK devices have a maximum of 2048, 1024, or even 96 samplers,
// below Vulkan's minimum requirement of 4000.
// Assuming that the current VulkanTextureCache is the only one on this
@ -2585,15 +2573,14 @@ bool VulkanTextureCache::Initialize() {
// allocation slots exclusively.
// Also leaving a few slots for use by things like overlay applications.
sampler_max_count_ =
device_limits.maxSamplerAllocationCount -
device_info.maxSamplerAllocationCount -
uint32_t(ui::vulkan::VulkanProvider::HostSampler::kCount) - 16;
if (device_features.samplerAnisotropy) {
if (device_info.samplerAnisotropy) {
max_anisotropy_ = xenos::AnisoFilter(
uint32_t(xenos::AnisoFilter::kMax_1_1) +
(31 -
xe::lzcnt(uint32_t(std::min(
16.0f, std::max(1.0f, device_limits.maxSamplerAnisotropy))))));
(31 - xe::lzcnt(uint32_t(std::min(
16.0f, std::max(1.0f, device_info.maxSamplerAnisotropy))))));
} else {
max_anisotropy_ = xenos::AnisoFilter::kDisabled;
}
@ -2656,10 +2643,12 @@ xenos::ClampMode VulkanTextureCache::NormalizeClampMode(
if (clamp_mode == xenos::ClampMode::kMirrorClampToEdge ||
clamp_mode == xenos::ClampMode::kMirrorClampToHalfway ||
clamp_mode == xenos::ClampMode::kMirrorClampToBorder) {
// TODO(Triang3l): VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR if
// VK_KHR_sampler_mirror_clamp_to_edge (or Vulkan 1.2) and the
// samplerMirrorClampToEdge feature are supported.
return xenos::ClampMode::kMirroredRepeat;
// No equivalents for anything other than kMirrorClampToEdge in Vulkan.
return command_processor_.GetVulkanProvider()
.device_info()
.samplerMirrorClampToEdge
? xenos::ClampMode::kMirrorClampToEdge
: xenos::ClampMode::kMirroredRepeat;
}
return clamp_mode;
}

View file

@ -1067,8 +1067,9 @@ union alignas(uint32_t) LoopConstant {
// The resulting aL is `iterator * step + start`, 10-bit, and has the real
// range of [-256, 256], according to the IPR2015-00325 sequencer
// specification.
uint32_t start : 8; // +8
int32_t step : 8; // +16
uint32_t start : 8; // +8
int32_t step : 8; // +16
uint32_t _pad_24 : 8; // +24
};
};
static_assert_size(LoopConstant, sizeof(uint32_t));
@ -1208,7 +1209,7 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t {
union { // dword_2
struct {
uint32_t width : 24;
uint32_t : 8;
uint32_t _pad_88 : 8;
} size_1d;
struct {
uint32_t width : 13;

View file

@ -866,9 +866,6 @@ bool VulkanImmediateDrawer::CreateTextureResource(
size_t& pending_upload_index_out) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider_.device_portability_subset_features();
// Create the image and the descriptor.
@ -913,8 +910,7 @@ bool VulkanImmediateDrawer::CreateTextureResource(
// data == nullptr is a special case for (1, 1, 1, 1), though the image will
// be cleared to (1, 1, 1, 1) anyway, just a micro-optimization.
VkComponentSwizzle swizzle =
(data || (device_portability_subset_features &&
!device_portability_subset_features->imageViewFormatSwizzle))
(data || !provider_.device_info().imageViewFormatSwizzle)
? VK_COMPONENT_SWIZZLE_IDENTITY
: VK_COMPONENT_SWIZZLE_ONE;
image_view_create_info.components.r = swizzle;

View file

@ -27,8 +27,7 @@ VmaAllocator CreateVmaAllocator(const VulkanProvider& provider,
const VulkanProvider::DeviceFunctions& dfn = provider.dfn();
const VulkanProvider::InstanceExtensions& instance_extensions =
provider.instance_extensions();
const VulkanProvider::DeviceExtensions& device_extensions =
provider.device_extensions();
const VulkanProvider::DeviceInfo& device_info = provider.device_info();
VmaVulkanFunctions vma_vulkan_functions = {};
VmaAllocatorCreateInfo allocator_create_info = {};
@ -58,31 +57,33 @@ VmaAllocator CreateVmaAllocator(const VulkanProvider& provider,
vma_vulkan_functions.vkCreateImage = dfn.vkCreateImage;
vma_vulkan_functions.vkDestroyImage = dfn.vkDestroyImage;
vma_vulkan_functions.vkCmdCopyBuffer = dfn.vkCmdCopyBuffer;
if (device_extensions.khr_get_memory_requirements2) {
if (device_info.ext_1_1_VK_KHR_get_memory_requirements2) {
vma_vulkan_functions.vkGetBufferMemoryRequirements2KHR =
dfn.vkGetBufferMemoryRequirements2KHR;
dfn.vkGetBufferMemoryRequirements2;
vma_vulkan_functions.vkGetImageMemoryRequirements2KHR =
dfn.vkGetImageMemoryRequirements2KHR;
if (device_extensions.khr_dedicated_allocation) {
dfn.vkGetImageMemoryRequirements2;
if (device_info.ext_1_1_VK_KHR_dedicated_allocation) {
allocator_create_info.flags |=
VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
}
}
if (device_extensions.khr_bind_memory2) {
vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2KHR;
vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2KHR;
if (device_info.ext_1_1_VK_KHR_bind_memory2) {
vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2;
vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2;
allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT;
}
if (instance_extensions.khr_get_physical_device_properties2) {
vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties2KHR =
ifn.vkGetPhysicalDeviceMemoryProperties2KHR;
if (device_extensions.ext_memory_budget) {
ifn.vkGetPhysicalDeviceMemoryProperties2;
if (device_info.ext_VK_EXT_memory_budget) {
allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
}
}
if (device_extensions.khr_maintenance4) {
if (device_info.ext_1_3_VK_KHR_maintenance4) {
vma_vulkan_functions.vkGetDeviceBufferMemoryRequirements =
dfn.vkGetDeviceBufferMemoryRequirements;
vma_vulkan_functions.vkGetDeviceImageMemoryRequirements =
dfn.vkGetDeviceImageMemoryRequirementsKHR;
dfn.vkGetDeviceImageMemoryRequirements;
}
if (externally_synchronized) {
@ -93,8 +94,7 @@ VmaAllocator CreateVmaAllocator(const VulkanProvider& provider,
allocator_create_info.device = provider.device();
allocator_create_info.pVulkanFunctions = &vma_vulkan_functions;
allocator_create_info.instance = provider.instance();
allocator_create_info.vulkanApiVersion =
provider.device_properties().apiVersion;
allocator_create_info.vulkanApiVersion = device_info.apiVersion;
VmaAllocator allocator;
if (vmaCreateAllocator(&allocator_create_info, &allocator) != VK_SUCCESS) {
XELOGE("Failed to create a Vulkan Memory Allocator instance");

View file

@ -208,7 +208,7 @@ VulkanPresenter::~VulkanPresenter() {
}
Surface::TypeFlags VulkanPresenter::GetSupportedSurfaceTypes() const {
if (!provider_.device_extensions().khr_swapchain) {
if (!provider_.device_info().ext_VK_KHR_swapchain) {
return 0;
}
return GetSurfaceTypesSupportedByInstance(provider_.instance_extensions());

File diff suppressed because it is too large Load diff

View file

@ -57,6 +57,160 @@ namespace vulkan {
class VulkanProvider : public GraphicsProvider {
public:
struct DeviceInfo {
// "ext_1_X"-prefixed extension fields are set to true not only if the
// extension itself is actually exposed, but also if it was promoted to the
// device's API version. Therefore, merely the field being set to true
// doesn't imply that all the required features in the extension are
// supported - actual properties and features must be checked rather than
// the extension itself where they matter.
// Vulkan 1.0.
uint32_t memory_types_device_local;
uint32_t memory_types_host_visible;
uint32_t memory_types_host_coherent;
uint32_t memory_types_host_cached;
uint32_t apiVersion;
uint32_t maxImageDimension2D;
uint32_t maxImageDimension3D;
uint32_t maxImageDimensionCube;
uint32_t maxImageArrayLayers;
uint32_t maxStorageBufferRange;
uint32_t maxSamplerAllocationCount;
uint32_t maxPerStageDescriptorSamplers;
uint32_t maxPerStageDescriptorStorageBuffers;
uint32_t maxPerStageDescriptorSampledImages;
uint32_t maxPerStageResources;
uint32_t maxVertexOutputComponents;
uint32_t maxTessellationEvaluationOutputComponents;
uint32_t maxGeometryInputComponents;
uint32_t maxGeometryOutputComponents;
uint32_t maxGeometryTotalOutputComponents;
uint32_t maxFragmentInputComponents;
uint32_t maxFragmentCombinedOutputResources;
float maxSamplerAnisotropy;
uint32_t maxViewportDimensions[2];
float viewportBoundsRange[2];
VkDeviceSize minUniformBufferOffsetAlignment;
VkDeviceSize minStorageBufferOffsetAlignment;
uint32_t maxFramebufferWidth;
uint32_t maxFramebufferHeight;
VkSampleCountFlags framebufferColorSampleCounts;
VkSampleCountFlags framebufferDepthSampleCounts;
VkSampleCountFlags framebufferStencilSampleCounts;
VkSampleCountFlags framebufferNoAttachmentsSampleCounts;
VkSampleCountFlags sampledImageColorSampleCounts;
VkSampleCountFlags sampledImageIntegerSampleCounts;
VkSampleCountFlags sampledImageDepthSampleCounts;
VkSampleCountFlags sampledImageStencilSampleCounts;
VkSampleCountFlags standardSampleLocations;
VkDeviceSize optimalBufferCopyOffsetAlignment;
VkDeviceSize optimalBufferCopyRowPitchAlignment;
VkDeviceSize nonCoherentAtomSize;
bool fullDrawIndexUint32;
bool independentBlend;
bool geometryShader;
bool tessellationShader;
bool sampleRateShading;
bool depthClamp;
bool fillModeNonSolid;
bool samplerAnisotropy;
bool vertexPipelineStoresAndAtomics;
bool fragmentStoresAndAtomics;
bool shaderClipDistance;
bool shaderCullDistance;
bool sparseBinding;
bool sparseResidencyBuffer;
// VK_KHR_swapchain (#2).
bool ext_VK_KHR_swapchain;
// VK_KHR_sampler_mirror_clamp_to_edge (#15, Vulkan 1.2).
bool ext_1_2_VK_KHR_sampler_mirror_clamp_to_edge;
bool samplerMirrorClampToEdge;
// VK_KHR_dedicated_allocation (#128, Vulkan 1.1).
bool ext_1_1_VK_KHR_dedicated_allocation;
// VK_EXT_shader_stencil_export (#141).
bool ext_VK_EXT_shader_stencil_export;
// VK_KHR_get_memory_requirements2 (#147, Vulkan 1.1).
bool ext_1_1_VK_KHR_get_memory_requirements2;
// VK_KHR_image_format_list (#148, Vulkan 1.2).
bool ext_1_2_VK_KHR_image_format_list;
// VK_KHR_sampler_ycbcr_conversion (#157, Vulkan 1.1).
bool ext_1_1_VK_KHR_sampler_ycbcr_conversion;
// VK_KHR_bind_memory2 (#158, Vulkan 1.1).
bool ext_1_1_VK_KHR_bind_memory2;
// VK_KHR_portability_subset (#164).
bool ext_VK_KHR_portability_subset;
bool constantAlphaColorBlendFactors;
bool imageViewFormatReinterpretation;
bool imageViewFormatSwizzle;
bool pointPolygons;
bool separateStencilMaskRef;
bool shaderSampleRateInterpolationFunctions;
bool triangleFans;
// VK_KHR_shader_float_controls (#198, Vulkan 1.2).
bool ext_1_2_VK_KHR_shader_float_controls;
bool shaderSignedZeroInfNanPreserveFloat32;
bool shaderDenormFlushToZeroFloat32;
bool shaderRoundingModeRTEFloat32;
// VK_KHR_spirv_1_4 (#237, Vulkan 1.2).
bool ext_1_2_VK_KHR_spirv_1_4;
// VK_EXT_memory_budget (#238).
bool ext_VK_EXT_memory_budget;
// VK_EXT_fragment_shader_interlock (#252).
bool ext_VK_EXT_fragment_shader_interlock;
bool fragmentShaderSampleInterlock;
bool fragmentShaderPixelInterlock;
// VK_EXT_shader_demote_to_helper_invocation (#277, Vulkan 1.3).
bool ext_1_3_VK_EXT_shader_demote_to_helper_invocation;
bool shaderDemoteToHelperInvocation;
// VK_KHR_maintenance4 (#414, Vulkan 1.3).
bool ext_1_3_VK_KHR_maintenance4;
// VK_EXT_non_seamless_cube_map (#423).
bool ext_VK_EXT_non_seamless_cube_map;
bool nonSeamlessCubeMap;
};
~VulkanProvider();
static std::unique_ptr<VulkanProvider> Create(bool is_surface_required);
@ -106,7 +260,7 @@ class VulkanProvider : public GraphicsProvider {
struct InstanceFunctions {
#define XE_UI_VULKAN_FUNCTION(name) PFN_##name name;
#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \
PFN_##extension_name extension_name;
PFN_##core_name core_name;
#include "xenia/ui/vulkan/functions/instance_1_0.inc"
#include "xenia/ui/vulkan/functions/instance_ext_debug_utils.inc"
#include "xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc"
@ -124,61 +278,9 @@ class VulkanProvider : public GraphicsProvider {
const InstanceFunctions& ifn() const { return ifn_; }
VkPhysicalDevice physical_device() const { return physical_device_; }
const VkPhysicalDeviceProperties& device_properties() const {
return device_properties_;
}
const VkPhysicalDeviceFeatures& device_features() const {
return device_features_;
}
struct DeviceExtensions {
bool ext_fragment_shader_interlock;
bool ext_memory_budget;
// Core since 1.3.0.
bool ext_shader_demote_to_helper_invocation;
bool ext_shader_stencil_export;
// Core since 1.1.0.
bool khr_bind_memory2;
// Core since 1.1.0.
bool khr_dedicated_allocation;
// Core since 1.1.0.
bool khr_get_memory_requirements2;
// Core since 1.2.0.
bool khr_image_format_list;
// Core since 1.3.0.
bool khr_maintenance4;
// Requires the VK_KHR_get_physical_device_properties2 instance extension.
bool khr_portability_subset;
// Core since 1.1.0.
bool khr_sampler_ycbcr_conversion;
// Core since 1.2.0.
bool khr_shader_float_controls;
// Core since 1.2.0.
bool khr_spirv_1_4;
bool khr_swapchain;
};
const DeviceExtensions& device_extensions() const {
return device_extensions_;
}
// Returns nullptr if the device is fully compliant with Vulkan 1.0.
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features() const {
if (!device_extensions_.khr_portability_subset) {
return nullptr;
}
return &device_portability_subset_features_;
}
uint32_t memory_types_device_local() const {
return memory_types_device_local_;
}
uint32_t memory_types_host_visible() const {
return memory_types_host_visible_;
}
uint32_t memory_types_host_coherent() const {
return memory_types_host_coherent_;
}
uint32_t memory_types_host_cached() const {
return memory_types_host_cached_;
}
const DeviceInfo& device_info() const { return device_info_; }
struct QueueFamily {
uint32_t queue_first_index = 0;
uint32_t queue_count = 0;
@ -196,18 +298,6 @@ class VulkanProvider : public GraphicsProvider {
uint32_t queue_family_sparse_binding() const {
return queue_family_sparse_binding_;
}
const VkPhysicalDeviceFloatControlsPropertiesKHR&
device_float_controls_properties() const {
return device_float_controls_properties_;
}
const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT&
device_fragment_shader_interlock_features() const {
return device_fragment_shader_interlock_features_;
}
const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT&
device_shader_demote_to_helper_invocation_features() const {
return device_shader_demote_to_helper_invocation_features_;
}
struct Queue {
VkQueue queue = VK_NULL_HANDLE;
@ -235,7 +325,7 @@ class VulkanProvider : public GraphicsProvider {
struct DeviceFunctions {
#define XE_UI_VULKAN_FUNCTION(name) PFN_##name name;
#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \
PFN_##extension_name extension_name;
PFN_##core_name core_name;
#include "xenia/ui/vulkan/functions/device_1_0.inc"
#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc"
#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc"
@ -261,10 +351,6 @@ class VulkanProvider : public GraphicsProvider {
ifn_.vkSetDebugUtilsObjectNameEXT(device_, &name_info);
}
bool IsSparseBindingSupported() const {
return queue_family_sparse_binding_ != UINT32_MAX;
}
// Samplers that may be useful for host needs. Only these samplers should be
// used in host, non-emulation contexts, because the total number of samplers
// is heavily limited (4000) on Nvidia GPUs - the rest of samplers are
@ -298,6 +384,12 @@ class VulkanProvider : public GraphicsProvider {
const VkDebugUtilsMessengerCallbackDataEXT* callback_data,
void* user_data);
// For the current `physical_device_`, sets up the members obtained from the
// physical device info, and tries to create a device and get the needed
// queues.
// The call is successful if `device_` is not VK_NULL_HANDLE as a result.
void TryCreateDevice();
bool is_surface_required_;
RenderdocApi renderdoc_api_;
@ -313,30 +405,21 @@ class VulkanProvider : public GraphicsProvider {
InstanceExtensions instance_extensions_;
VkInstance instance_ = VK_NULL_HANDLE;
InstanceFunctions ifn_;
VkDebugUtilsMessengerEXT debug_messenger_ = VK_NULL_HANDLE;
bool debug_names_used_ = false;
VkPhysicalDevice physical_device_ = VK_NULL_HANDLE;
VkPhysicalDeviceProperties device_properties_;
VkPhysicalDeviceFeatures device_features_;
DeviceExtensions device_extensions_;
VkPhysicalDevicePortabilitySubsetFeaturesKHR
device_portability_subset_features_;
uint32_t memory_types_device_local_;
uint32_t memory_types_host_visible_;
uint32_t memory_types_host_coherent_;
uint32_t memory_types_host_cached_;
DeviceInfo device_info_ = {};
std::vector<QueueFamily> queue_families_;
uint32_t queue_family_graphics_compute_;
uint32_t queue_family_sparse_binding_;
VkPhysicalDeviceFloatControlsPropertiesKHR device_float_controls_properties_;
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT
device_fragment_shader_interlock_features_;
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT
device_shader_demote_to_helper_invocation_features_;
VkDevice device_ = VK_NULL_HANDLE;
DeviceFunctions dfn_ = {};
// Queues contain a mutex, can't use std::vector.
std::unique_ptr<Queue[]> queues_;

View file

@ -138,13 +138,13 @@ VulkanUploadBufferPool::CreatePageImplementation() {
memory_allocate_info.pNext = nullptr;
memory_allocate_info.allocationSize = allocation_size_;
memory_allocate_info.memoryTypeIndex = memory_type_;
VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info;
if (provider_.device_extensions().khr_dedicated_allocation) {
VkMemoryDedicatedAllocateInfo memory_dedicated_allocate_info;
if (provider_.device_info().ext_1_1_VK_KHR_dedicated_allocation) {
memory_allocate_info_last->pNext = &memory_dedicated_allocate_info;
memory_allocate_info_last = reinterpret_cast<VkMemoryAllocateInfo*>(
&memory_dedicated_allocate_info);
memory_dedicated_allocate_info.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
memory_dedicated_allocate_info.pNext = nullptr;
memory_dedicated_allocate_info.image = VK_NULL_HANDLE;
memory_dedicated_allocate_info.buffer = buffer;

View file

@ -27,8 +27,8 @@ void FlushMappedMemoryRange(const VulkanProvider& provider,
assert_false(size != VK_WHOLE_SIZE && memory_size == VK_WHOLE_SIZE);
assert_true(memory_size == VK_WHOLE_SIZE || offset <= memory_size);
assert_true(memory_size == VK_WHOLE_SIZE || size <= memory_size - offset);
if (!size ||
(provider.memory_types_host_coherent() & (uint32_t(1) << memory_type))) {
if (!size || (provider.device_info().memory_types_host_coherent &
(uint32_t(1) << memory_type))) {
return;
}
VkMappedMemoryRange range;
@ -38,7 +38,7 @@ void FlushMappedMemoryRange(const VulkanProvider& provider,
range.offset = offset;
range.size = size;
VkDeviceSize non_coherent_atom_size =
provider.device_properties().limits.nonCoherentAtomSize;
provider.device_info().nonCoherentAtomSize;
// On some Android implementations, nonCoherentAtomSize is 0, not 1.
if (non_coherent_atom_size > 1) {
range.offset = offset / non_coherent_atom_size * non_coherent_atom_size;
@ -89,13 +89,13 @@ bool CreateDedicatedAllocationBuffer(
memory_allocate_info.pNext = nullptr;
memory_allocate_info.allocationSize = memory_requirements.size;
memory_allocate_info.memoryTypeIndex = memory_type;
VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info;
if (provider.device_extensions().khr_dedicated_allocation) {
VkMemoryDedicatedAllocateInfo memory_dedicated_allocate_info;
if (provider.device_info().ext_1_1_VK_KHR_dedicated_allocation) {
memory_allocate_info_last->pNext = &memory_dedicated_allocate_info;
memory_allocate_info_last = reinterpret_cast<VkMemoryAllocateInfo*>(
&memory_dedicated_allocate_info);
memory_dedicated_allocate_info.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
memory_dedicated_allocate_info.pNext = nullptr;
memory_dedicated_allocate_info.image = VK_NULL_HANDLE;
memory_dedicated_allocate_info.buffer = buffer;
@ -154,13 +154,13 @@ bool CreateDedicatedAllocationImage(const VulkanProvider& provider,
memory_allocate_info.pNext = nullptr;
memory_allocate_info.allocationSize = memory_requirements.size;
memory_allocate_info.memoryTypeIndex = memory_type;
VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info;
if (provider.device_extensions().khr_dedicated_allocation) {
VkMemoryDedicatedAllocateInfo memory_dedicated_allocate_info;
if (provider.device_info().ext_1_1_VK_KHR_dedicated_allocation) {
memory_allocate_info_last->pNext = &memory_dedicated_allocate_info;
memory_allocate_info_last = reinterpret_cast<VkMemoryAllocateInfo*>(
&memory_dedicated_allocate_info);
memory_dedicated_allocate_info.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
memory_dedicated_allocate_info.pNext = nullptr;
memory_dedicated_allocate_info.image = image;
memory_dedicated_allocate_info.buffer = VK_NULL_HANDLE;

View file

@ -50,7 +50,7 @@ enum class MemoryPurpose {
inline VkDeviceSize GetMappableMemorySize(const VulkanProvider& provider,
VkDeviceSize size) {
VkDeviceSize non_coherent_atom_size =
provider.device_properties().limits.nonCoherentAtomSize;
provider.device_info().nonCoherentAtomSize;
// On some Android implementations, nonCoherentAtomSize is 0, not 1.
if (non_coherent_atom_size > 1) {
size = xe::round_up(size, non_coherent_atom_size, false);
@ -61,8 +61,8 @@ inline VkDeviceSize GetMappableMemorySize(const VulkanProvider& provider,
inline uint32_t ChooseHostMemoryType(const VulkanProvider& provider,
uint32_t supported_types,
bool is_readback) {
supported_types &= provider.memory_types_host_visible();
uint32_t host_cached = provider.memory_types_host_cached();
supported_types &= provider.device_info().memory_types_host_visible;
uint32_t host_cached = provider.device_info().memory_types_host_cached;
uint32_t memory_type;
// For upload, uncached is preferred so writes do not pollute the CPU cache.
// For readback, cached is preferred so multiple CPU reads are fast.
@ -107,12 +107,12 @@ void FlushMappedMemoryRange(const VulkanProvider& provider,
VkDeviceSize size = VK_WHOLE_SIZE);
inline VkExtent2D GetMax2DFramebufferExtent(const VulkanProvider& provider) {
const VkPhysicalDeviceLimits& limits = provider.device_properties().limits;
const VulkanProvider::DeviceInfo& device_info = provider.device_info();
VkExtent2D max_extent;
max_extent.width =
std::min(limits.maxFramebufferWidth, limits.maxImageDimension2D);
max_extent.height =
std::min(limits.maxFramebufferHeight, limits.maxImageDimension2D);
max_extent.width = std::min(device_info.maxFramebufferWidth,
device_info.maxImageDimension2D);
max_extent.height = std::min(device_info.maxFramebufferHeight,
device_info.maxImageDimension2D);
return max_extent;
}

@ -1 +1 @@
Subproject commit b32da5329b50e3cb96229aaecba9ded032fe29cc
Subproject commit 31aa7f634b052d87ede4664053e85f3f4d1d50d3