aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2017-09-15 18:34:48 +0200
committerNicolai Hähnle <[email protected]>2017-09-29 12:07:50 +0200
commit3c78215a1cf4e3d58295a6d3171a2c34c51875d5 (patch)
treed5850687d051945e6580725268d6c71062414a4a /src/gallium/drivers/r600
parentdbe7fc00d5715bcc08acc3141414da037938bbdd (diff)
tgsi: clarify the semantics of DFRACEXP
The status quo is quite the mess: 1. tgsi_exec will do a per-channel computation, and store the dst[0] result (significand) correctly for each channel. The dst[1] result (exponent) will be written to the first bit set in the writemask. So per-component calculation only works partially. 2. r600 will only do a single computation. It will replicate the exponent but not the significand. 3. The docs pretend that there's per-component calculation, but even get dst[0] and dst[1] confused. 4. Luckily, st_glsl_to_tgsi only ever emits single-component instructions, and kind-of assumes that everything is replicated, generating this for the dvec4 case: DFRACEXP TEMP[0].xy, TEMP[1].x, CONST[0][0].xyxy DFRACEXP TEMP[0].zw, TEMP[1].y, CONST[0][0].zwzw DFRACEXP TEMP[2].xy, TEMP[1].z, CONST[0][1].xyxy DFRACEXP TEMP[2].zw, TEMP[1].w, CONST[0][1].zwzw Settle on the simplest behavior, which is single-component calculation with replication, document it, and adjust tgsi_exec and r600. Reviewed-by: Marek Olšák <[email protected]> Tested-by: Dieter Nützel <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/r600_shader.c14
1 files changed, 8 insertions, 6 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c9c922fc02b..188fbc9d47d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4045,7 +4045,6 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
struct r600_bytecode_alu alu;
unsigned write_mask = inst->Dst[0].Register.WriteMask;
int i, j, r;
- int firsti = write_mask == 0xc ? 2 : 0;
for (i = 0; i <= 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -4066,15 +4065,18 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
return r;
}
- /* MOV first two channels to writemask dst0 */
- for (i = 0; i <= 1; i++) {
+ /* Replicate significand result across channels. */
+ for (i = 0; i <= 3; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
- alu.src[0].chan = i + 2;
+ alu.src[0].chan = (i & 1) + 2;
alu.src[0].sel = ctx->temp_reg;
- tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)