diff options
author | Marek Olšák <[email protected]> | 2020-01-23 15:52:01 -0500 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-17 20:47:48 +0000 |
commit | 56cc10bd27b24d513de88bf7fa94a6c8f43e348f (patch) | |
tree | 3c3effae8af6a1424a4dbea85d567f7341ad4ce4 | |
parent | e4959add2f44517b2227521af5aaf2919aaa6c3b (diff) |
ac: unify denorm setting enforcement
Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4196>
-rw-r--r-- | src/amd/common/ac_binary.c | 13 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_llvm.c | 19 |
3 files changed, 14 insertions, 32 deletions
diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c index 5f92a57d7bf..8761422bd6b 100644 --- a/src/amd/common/ac_binary.c +++ b/src/amd/common/ac_binary.c @@ -58,11 +58,13 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); + /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */ conf->float_mode = G_00B028_FLOAT_MODE(value); conf->rsrc1 = value; break; case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); + /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */ conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value); conf->rsrc2 = value; break; @@ -124,4 +126,15 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, /* sgprs spills aren't spilling */ conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4; } + + /* Enable 64-bit and 16-bit denormals, because there is no performance + * cost. + * + * Don't enable denormals for 32-bit floats, because: + * - denormals disable output modifiers + * - denormals break v_mad_f32 + * - GFX6 & GFX7 would be very slow + */ + conf->float_mode &= ~V_00B028_FP_ALL_DENORMS; + conf->float_mode |= V_00B028_FP_64_DENORMS; } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 98c98db5665..70a51ee01d0 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -966,20 +966,6 @@ radv_shader_variant_create(struct radv_device *device, return NULL; } - /* Enable 64-bit and 16-bit denormals, because there is no performance - * cost. - * - * If denormals are enabled, all floating-point output modifiers are - * ignored. - * - * Don't enable denormals for 32-bit floats, because: - * - Floating-point output modifiers would be ignored by the hw. - * - Some opcodes don't support denormals, such as v_mad_f32. We would - * have to stop using those. - * - GFX6 & GFX7 would be very slow. - */ - config.float_mode |= V_00B028_FP_64_DENORMS; - if (rtld_binary.lds_size > 0) { unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256; config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 12a6d846c35..dca604afe40 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -130,24 +130,7 @@ bool si_compile_llvm(struct si_screen *sscreen, bool ok = ac_rtld_read_config(&rtld, conf); ac_rtld_close(&rtld); - if (!ok) - return false; - - /* Enable 64-bit and 16-bit denormals, because there is no performance - * cost. - * - * If denormals are enabled, all floating-point output modifiers are - * ignored. - * - * Don't enable denormals for 32-bit floats, because: - * - Floating-point output modifiers would be ignored by the hw. - * - Some opcodes don't support denormals, such as v_mad_f32. We would - * have to stop using those. - * - GFX6 & GFX7 would be very slow. - */ - conf->float_mode |= V_00B028_FP_64_DENORMS; - - return true; + return ok; } void si_llvm_context_init(struct si_shader_context *ctx, |