From 3c78215a1cf4e3d58295a6d3171a2c34c51875d5 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Fri, 15 Sep 2017 18:34:48 +0200 Subject: tgsi: clarify the semantics of DFRACEXP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The status quo is quite the mess: 1. tgsi_exec will do a per-channel computation, and store the dst[0] result (significand) correctly for each channel. The dst[1] result (exponent) will be written to the first bit set in the writemask. So per-component calculation only works partially. 2. r600 will only do a single computation. It will replicate the exponent but not the significand. 3. The docs pretend that there's per-component calculation, but even get dst[0] and dst[1] confused. 4. Luckily, st_glsl_to_tgsi only ever emits single-component instructions, and kind-of assumes that everything is replicated, generating this for the dvec4 case: DFRACEXP TEMP[0].xy, TEMP[1].x, CONST[0][0].xyxy DFRACEXP TEMP[0].zw, TEMP[1].y, CONST[0][0].zwzw DFRACEXP TEMP[2].xy, TEMP[1].z, CONST[0][1].xyxy DFRACEXP TEMP[2].zw, TEMP[1].w, CONST[0][1].zwzw Settle on the simplest behavior, which is single-component calculation with replication, document it, and adjust tgsi_exec and r600. Reviewed-by: Marek Olšák Tested-by: Dieter Nützel --- src/gallium/drivers/r600/r600_shader.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/r600') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c9c922fc02b..188fbc9d47d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4045,7 +4045,6 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx) struct r600_bytecode_alu alu; unsigned write_mask = inst->Dst[0].Register.WriteMask; int i, j, r; - int firsti = write_mask == 0xc ? 2 : 0; for (i = 0; i <= 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -4066,15 +4065,18 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx) return r; } - /* MOV first two channels to writemask dst0 */ - for (i = 0; i <= 1; i++) { + /* Replicate significand result across channels. */ + for (i = 0; i <= 3; i++) { + if (!(write_mask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; - alu.src[0].chan = i + 2; + alu.src[0].chan = (i & 1) + 2; alu.src[0].sel = ctx->temp_reg; - tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = 1; alu.last = 1; r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) -- cgit v1.2.3