summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2018-01-29 10:55:15 +1000
committerDave Airlie <[email protected]>2018-02-06 08:21:48 +1000
commit35b430157776e8a87a1879dbc136f3cb3f599df1 (patch)
tree51736cad32cee515d39e64a4efc871127fad93aa /src/gallium
parent0170ae1e2351d9ced0069f5cb48b58cabb23926b (diff)
r600/fp64: fix integer->double conversion
Doing a straight uint/int->fp32->fp64 conversion causes some precision issues, Roland suggested splitting the integer into two portions and doing two separate int->fp32->fp64 conversions then adding the results. This passes the tests in CTS and piglit. [airlied: fix cypress conversion opcodes] Reviewed-by: Roland Scheidegger <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/r600/r600_shader.c121
1 files changed, 93 insertions, 28 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index b0931a3a851..e3b832b04f7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4490,44 +4490,109 @@ static int egcm_int_to_double(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
- int i, r;
- int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int i, c, r;
+ int write_mask = inst->Dst[0].Register.WriteMask;
+ int temp_reg = r600_get_temp(ctx);
assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
inst->Instruction.Opcode == TGSI_OPCODE_U2D);
- for (i = 0; i <= (lasti+1)/2; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
-
- r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = i;
- alu.dst.write = 1;
- alu.last = 1;
+ for (c = 0; c < 2; c++) {
+ int dchan = c * 2;
+ if (write_mask & (0x3 << dchan)) {
+ /* split into 24-bit int and 8-bit int */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_AND_INT;
+ alu.dst.sel = temp_reg;
+ alu.dst.chan = dchan;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0xffffff00;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_AND_INT;
+ alu.dst.sel = temp_reg;
+ alu.dst.chan = dchan + 1;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0xff;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
- for (i = 0; i <= lasti; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_FLT32_TO_FLT64;
+ for (c = 0; c < 2; c++) {
+ int dchan = c * 2;
+ if (write_mask & (0x3 << dchan)) {
+ for (i = dchan; i <= dchan + 1; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = i == dchan ? ctx->inst_info->op : ALU_OP1_UINT_TO_FLT;
- alu.src[0].chan = i/2;
- if (i%2 == 0)
- alu.src[0].sel = ctx->temp_reg;
- else {
- alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[0].value = 0x0;
+ alu.src[0].sel = temp_reg;
+ alu.src[0].chan = i;
+ alu.dst.sel = temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (ctx.bc->chip_class == CAYMAN)
+ alu.last = i == dchan + 1;
+ else
+ alu.last = 1; /* trans only ops on evergreen */
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
}
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- alu.last = i == lasti;
+ }
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ for (c = 0; c < 2; c++) {
+ int dchan = c * 2;
+ if (write_mask & (0x3 << dchan)) {
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_FLT32_TO_FLT64;
+
+ alu.src[0].chan = dchan + (i / 2);
+ if (i == 0 || i == 2)
+ alu.src[0].sel = temp_reg;
+ else {
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0x0;
+ }
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.last = i == 3;
+ alu.dst.write = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i <= 1; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_64;
+
+ alu.src[0].chan = fp64_switch(i);
+ alu.src[0].sel = ctx->temp_reg;
+
+ alu.src[1].chan = fp64_switch(i + 2);
+ alu.src[1].sel = ctx->temp_reg;
+ tgsi_dst(ctx, &inst->Dst[0], dchan + i, &alu.dst);
+ alu.last = i == 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
}
return 0;