summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2019-08-06 21:59:44 -0400
committerIlia Mirkin <[email protected]>2019-08-07 22:31:56 -0400
commita2bb7b26a1c4ed1c00c9d81bcd0318ff0acb141a (patch)
tree7391990d9d065996ea99168fcb0af0637f000e96 /src
parent582c86346d77538009d993c4cf8c7aa4021c0ad0 (diff)
gallium: redefine ATOMINC_WRAP to be more hardware-friendly
Both AMD and NVIDIA hardware define it this way. Instead of replicating the logic everywhere, just fix it up in one place. Signed-off-by: Ilia Mirkin <[email protected]> Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/docs/source/tgsi.rst2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c12
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp11
3 files changed, 11 insertions, 14 deletions
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 17ad097e85e..e72b047dbd5 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2846,7 +2846,7 @@ These atomic operations may only be used with 32-bit integer image formats.
dst_x = resource[offset] + 1
- resource[offset] = dst_x < src_x ? dst_x : 0
+ resource[offset] = dst_x <= src_x ? dst_x : 0
.. opcode:: ATOMDEC_WRAP - Atomic decrement + wrap around
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 4a4ba43780a..f79ed2c57e1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -828,18 +828,6 @@ static void atomic_emit(
args.data[num_data++] =
ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0));
- if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMINC_WRAP) {
- /* ATOMIC_INC instruction does:
- * value = (value + 1) % (data + 1)
- * but we want:
- * value = (value + 1) % data
- * So replace 'data' by 'data - 1'.
- */
- args.data[0] = LLVMBuildSub(ctx->ac.builder,
- args.data[0],
- ctx->ac.i32_1, "");
- }
-
args.cache_policy = get_cache_policy(ctx, inst, true, false, false);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ff2ec0726e8..9b982569490 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3938,9 +3938,18 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
case ir_intrinsic_image_atomic_comp_swap:
opcode = TGSI_OPCODE_ATOMCAS;
break;
- case ir_intrinsic_image_atomic_inc_wrap:
+ case ir_intrinsic_image_atomic_inc_wrap: {
+ /* There's a bit of disagreement between GLSL and the hardware. The
+ * hardware wants to wrap after the given wrap value, while GLSL
+ * wants to wrap at the value. Subtract 1 to make up the difference.
+ */
+ st_src_reg wrap = get_temp(glsl_type::uint_type);
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap),
+ arg1, st_src_reg_for_int(-1));
+ arg1 = wrap;
opcode = TGSI_OPCODE_ATOMINC_WRAP;
break;
+ }
case ir_intrinsic_image_atomic_dec_wrap:
opcode = TGSI_OPCODE_ATOMDEC_WRAP;
break;