diff options
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 7 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 13 |
2 files changed, 18 insertions, 2 deletions
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index c107d551138..ddf94625972 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -4831,8 +4831,8 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline) unsigned max_waves_per_sh = 0; uint64_t va; - pipeline->cs.buf = malloc(20 * 4); - pipeline->cs.max_dw = 20; + pipeline->cs.max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 22 : 20; + pipeline->cs.buf = malloc(pipeline->cs.max_dw * 4); compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset; @@ -4844,6 +4844,9 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline) radeon_set_sh_reg_seq(&pipeline->cs, R_00B848_COMPUTE_PGM_RSRC1, 2); radeon_emit(&pipeline->cs, compute_shader->config.rsrc1); radeon_emit(&pipeline->cs, compute_shader->config.rsrc2); + if (device->physical_device->rad_info.chip_class >= GFX10) { + radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3); + } radeon_set_sh_reg(&pipeline->cs, R_00B860_COMPUTE_TMPRING_SIZE, S_00B860_WAVES(pipeline->max_waves) | diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 2f3d85d7bf4..0561b46ae85 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -701,10 +701,16 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs); /* +3 for scratch wave offset and VCC */ unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3); + unsigned num_shared_vgprs = config_in->num_shared_vgprs; + /* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */ + assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0) + || (pdevice->rad_info.chip_class < GFX10 && num_shared_vgprs == 0)); + unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8; *config_out = *config_in; config_out->num_vgprs = num_vgprs; config_out->num_sgprs = num_sgprs; + config_out->num_shared_vgprs = num_shared_vgprs; /* Enable 64-bit and 16-bit denormals, because there is no performance * cost. @@ -760,6 +766,7 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10); config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1); } + config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_TESS_CTRL: if (pdevice->rad_info.chip_class >= GFX9) { @@ -777,6 +784,7 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, } config_out->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10); + config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_VERTEX: if (info->is_ngg) { @@ -808,14 +816,17 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, } config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10); + config_out->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); } break; case MESA_SHADER_FRAGMENT: config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10); + config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_GEOMETRY: config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10); + config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_COMPUTE: config_out->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | @@ -828,6 +839,8 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, info->cs.uses_thread_id[1] ? 1 : 0) | S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | S_00B84C_LDS_SIZE(config_in->lds_size); + config_out->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks); + break; default: unreachable("unsupported shader type"); |