diff options
author | Ilia Mirkin <[email protected]> | 2019-08-06 21:59:44 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2019-08-07 22:31:56 -0400 |
commit | a2bb7b26a1c4ed1c00c9d81bcd0318ff0acb141a (patch) | |
tree | 7391990d9d065996ea99168fcb0af0637f000e96 /src | |
parent | 582c86346d77538009d993c4cf8c7aa4021c0ad0 (diff) |
gallium: redefine ATOMINC_WRAP to be more hardware-friendly
Both AMD and NVIDIA hardware define it this way. Instead of replicating
the logic everywhere, just fix it up in one place.
Signed-off-by: Ilia Mirkin <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/docs/source/tgsi.rst | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 11 |
3 files changed, 11 insertions, 14 deletions
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 17ad097e85e..e72b047dbd5 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -2846,7 +2846,7 @@ These atomic operations may only be used with 32-bit integer image formats. dst_x = resource[offset] + 1 - resource[offset] = dst_x < src_x ? dst_x : 0 + resource[offset] = dst_x <= src_x ? dst_x : 0 .. opcode:: ATOMDEC_WRAP - Atomic decrement + wrap around diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 4a4ba43780a..f79ed2c57e1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -828,18 +828,6 @@ static void atomic_emit( args.data[num_data++] = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0)); - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMINC_WRAP) { - /* ATOMIC_INC instruction does: - * value = (value + 1) % (data + 1) - * but we want: - * value = (value + 1) % data - * So replace 'data' by 'data - 1'. - */ - args.data[0] = LLVMBuildSub(ctx->ac.builder, - args.data[0], - ctx->ac.i32_1, ""); - } - args.cache_policy = get_cache_policy(ctx, inst, true, false, false); if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ff2ec0726e8..9b982569490 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3938,9 +3938,18 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) case ir_intrinsic_image_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; break; - case ir_intrinsic_image_atomic_inc_wrap: + case ir_intrinsic_image_atomic_inc_wrap: { + /* There's a bit of disagreement between GLSL and the hardware. The + * hardware wants to wrap after the given wrap value, while GLSL + * wants to wrap at the value. Subtract 1 to make up the difference. + */ + st_src_reg wrap = get_temp(glsl_type::uint_type); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap), + arg1, st_src_reg_for_int(-1)); + arg1 = wrap; opcode = TGSI_OPCODE_ATOMINC_WRAP; break; + } case ir_intrinsic_image_atomic_dec_wrap: opcode = TGSI_OPCODE_ATOMDEC_WRAP; break; |