diff options
author | Gert Wollny <[email protected]> | 2018-05-26 18:48:31 +0200 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2018-05-28 02:57:46 +0100 |
commit | 42cd2810aa30dfd825d46a305134fbc8f239247c (patch) | |
tree | 4c239145f80d0bb759cb9adf52bc33332a47fd5d /src | |
parent | 58fb613a51994d111ee77a65bc7f3d60b155c687 (diff) |
r600: Correct IDIV if DST and SRC use the same temporary
In cases like
IDIV TEMP[0].xy TEMP[0].xx TEMP[1].yy
the result will be written to the same register that is also a source register.
Since the components are evaluated one by one, this may result in overwriting
the source value for a later operation. Work around this by adding another
temporary to store the result if the destination temporary index is equal to
one of the source temporary indices.
Fixes:
dEQP-GLES2.functional.shaders.operator.binary_operator.div.*
Signed-off-by: Gert Wollny <[email protected]>
Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 52 |
1 files changed, 49 insertions, 3 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index bd511c76ac2..da89bb28e7a 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5727,10 +5727,19 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) struct r600_bytecode_alu alu; int i, r, j; unsigned write_mask = inst->Dst[0].Register.WriteMask; + int lasti = tgsi_last_instruction(write_mask); int tmp0 = ctx->temp_reg; int tmp1 = r600_get_temp(ctx); int tmp2 = r600_get_temp(ctx); int tmp3 = r600_get_temp(ctx); + int tmp4 = 0; + + /* Use additional temp if dst register and src register are the same */ + if (inst->Src[0].Register.Index == inst->Dst[0].Register.Index || + inst->Src[1].Register.Index == inst->Dst[0].Register.Index) { + tmp4 = r600_get_temp(ctx); + } + /* Unsigned path: * * we need to represent src1 as src2*q + r, where q - quotient, r - remainder @@ -6345,7 +6354,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) alu.dst.chan = 2; alu.dst.write = 1; } else { - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (tmp4 > 0) { + alu.dst.sel = tmp4; + alu.dst.chan = i; + alu.dst.write = 1; + } else { + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + } } alu.src[0].sel = tmp1; @@ -6387,7 +6402,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (tmp4 > 0) { + alu.dst.sel = tmp4; + alu.dst.chan = i; + alu.dst.write = 1; + } else { + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + } r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = tmp0; @@ -6423,7 +6444,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (tmp4 > 0) { + alu.dst.sel = tmp4; + alu.dst.chan = i; + alu.dst.write = 1; + } else { + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + } alu.src[0].sel = tmp2; alu.src[0].chan = 2; @@ -6438,6 +6465,25 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } } } + + if (tmp4 > 0) { + for (i = 0; i <= lasti; ++i) { + if (!(write_mask & (1<<i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.src[0].sel = tmp4; + alu.src[0].chan = i; + + if (i == lasti) + alu.last = 1; + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) + return r; + } + } + return 0; } |