diff options
author | Dave Airlie <[email protected]> | 2017-12-04 05:31:46 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2018-01-29 05:42:28 +1000 |
commit | 1c9ea24a19a28e87f6038281c516287f25ad88b5 (patch) | |
tree | f59e78bd4516fc09ee7133de40c2dfa6fe5eb7e3 /src/gallium/drivers/r600/r600_shader.c | |
parent | a7ec366e503cc2b05d6920fa5027b0f001ae9e58 (diff) |
r600: add ARB_query_buffer_object support
This uses a different shader than radeonsi, as we can't address non-256
aligned ssbos, which the radeonsi code does. This passes some extra
offsets into the shader.
It also contains a set of u64 instruction implementation that may
or may not be complete (at least the u64div is definitely not something
that works outside this use-case). If r600 grows 64-bit integers,
it will use the GLSL lowering for divmod.
Reviewed-by: Roland Scheidegger <[email protected]>
Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600/r600_shader.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 707 |
1 files changed, 700 insertions, 7 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index cf669781202..3344bcb76a7 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -770,7 +770,7 @@ static int single_alu_op3(struct r600_shader_ctx *ctx, int op, int r; /* validate this for other ops */ - assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT); + assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT || op == ALU_OP3_BFE_UINT); memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = op; alu.src[0].sel = src0_sel; @@ -9457,7 +9457,8 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) return 0; } -static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) +static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type, + struct r600_bytecode_alu_src *src) { struct r600_bytecode_alu alu; int r; @@ -9471,7 +9472,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type alu.dst.write = 1; alu.dst.chan = 0; - r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + alu.src[0] = *src; alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; @@ -9697,7 +9698,8 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) } #endif -static int emit_if(struct r600_shader_ctx *ctx, int opcode) +static int emit_if(struct r600_shader_ctx *ctx, int opcode, + struct r600_bytecode_alu_src *src) { int alu_type = CF_OP_ALU_PUSH_BEFORE; @@ -9711,7 +9713,7 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode) alu_type = CF_OP_ALU; } - emit_logic_pred(ctx, opcode, alu_type); + emit_logic_pred(ctx, opcode, alu_type, src); r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); @@ -9723,12 +9725,17 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode) static int tgsi_if(struct r600_shader_ctx *ctx) { - return emit_if(ctx, ALU_OP2_PRED_SETNE); + struct r600_bytecode_alu_src alu_src; + r600_bytecode_src(&alu_src, &ctx->src[0], 0); + + return emit_if(ctx, ALU_OP2_PRED_SETNE, &alu_src); } static int tgsi_uif(struct r600_shader_ctx *ctx) { - return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); + struct r600_bytecode_alu_src alu_src; + r600_bytecode_src(&alu_src, &ctx->src[0], 0); + return emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); } static int tgsi_else(struct r600_shader_ctx *ctx) @@ -10077,6 +10084,684 @@ static int tgsi_clock(struct r600_shader_ctx *ctx) return 0; } +static int emit_u64add(struct r600_shader_ctx *ctx, int op, + int treg, + int src0_sel, int src0_chan, + int src1_sel, int src1_chan) +{ + struct r600_bytecode_alu alu; + int r; + int opc; + + if (op == ALU_OP2_ADD_INT) + opc = ALU_OP2_ADDC_UINT; + else + opc = ALU_OP2_SUBB_UINT; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; ; + alu.dst.sel = treg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.src[0].sel = src0_sel; + alu.src[0].chan = src0_chan + 0; + alu.src[1].sel = src1_sel; + alu.src[1].chan = src1_chan + 0; + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; + alu.dst.sel = treg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.src[0].sel = src0_sel; + alu.src[0].chan = src0_chan + 1; + alu.src[1].sel = src1_sel; + alu.src[1].chan = src1_chan + 1; + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = opc; + alu.dst.sel = treg; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.last = 1; + alu.src[0].sel = src0_sel; + alu.src[0].chan = src0_chan + 0; + alu.src[1].sel = src1_sel; + alu.src[1].chan = src1_chan + 0; + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; + alu.dst.sel = treg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.src[0].sel = treg; + alu.src[0].chan = 1; + alu.src[1].sel = treg; + alu.src[1].chan = 2; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + +static int egcm_u64add(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r; + int treg = ctx->temp_reg; + int op = ALU_OP2_ADD_INT, opc = ALU_OP2_ADDC_UINT; + + if (ctx->src[1].neg) { + op = ALU_OP2_SUB_INT; + opc = ALU_OP2_SUBB_UINT; + } + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; ; + alu.dst.sel = treg; + alu.dst.chan = 0; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; + alu.dst.sel = treg; + alu.dst.chan = 1; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = opc ; + alu.dst.sel = treg; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.last = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + alu.src[0].sel = treg; + alu.src[0].chan = 1; + alu.src[1].sel = treg; + alu.src[1].chan = 2; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.src[0].sel = treg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + +/* result.y = mul_high a, b + result.x = mul a,b + result.y += a.x * b.y + a.y * b.x; +*/ +static int egcm_u64mul(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r; + int treg = ctx->temp_reg; + + /* temp.x = mul_lo a.x, b.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_UINT; + alu.dst.sel = treg; + alu.dst.chan = 0; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); + r = emit_mul_int_op(ctx->bc, &alu); + if (r) + return r; + + /* temp.y = mul_hi a.x, b.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULHI_UINT; + alu.dst.sel = treg; + alu.dst.chan = 1; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); + r = emit_mul_int_op(ctx->bc, &alu); + if (r) + return r; + + /* temp.z = mul a.x, b.y */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_UINT; + alu.dst.sel = treg; + alu.dst.chan = 2; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); + r = emit_mul_int_op(ctx->bc, &alu); + if (r) + return r; + + /* temp.w = mul a.y, b.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_UINT; + alu.dst.sel = treg; + alu.dst.chan = 3; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); + r = emit_mul_int_op(ctx->bc, &alu); + if (r) + return r; + + /* temp.z = temp.z + temp.w */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_ADD_INT; + alu.dst.sel = treg; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.src[0].sel = treg; + alu.src[0].chan = 2; + alu.src[1].sel = treg; + alu.src[1].chan = 3; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* temp.y = temp.y + temp.z */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_ADD_INT; + alu.dst.sel = treg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.src[0].sel = treg; + alu.src[0].chan = 1; + alu.src[1].sel = treg; + alu.src[1].chan = 2; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.x = temp.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.src[0].sel = treg; + alu.src[0].chan = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.y = temp.y */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + alu.src[0].sel = treg; + alu.src[0].chan = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + return 0; +} + +static int emit_u64sge(struct r600_shader_ctx *ctx, + int treg, + int src0_sel, int src0_base_chan, + int src1_sel, int src1_base_chan) +{ + int r; + /* for 64-bit sge */ + /* result = (src0.y > src1.y) || ((src0.y == src1.y) && src0.x >= src1.x)) */ + r = single_alu_op2(ctx, ALU_OP2_SETGT_UINT, + treg, 1, + src0_sel, src0_base_chan + 1, + src1_sel, src1_base_chan + 1); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, + treg, 0, + src0_sel, src0_base_chan, + src1_sel, src1_base_chan); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_SETE_INT, + treg, 2, + src0_sel, src0_base_chan + 1, + src1_sel, src1_base_chan + 1); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_AND_INT, + treg, 0, + treg, 0, + treg, 2); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_OR_INT, + treg, 0, + treg, 0, + treg, 1); + if (r) + return r; + return 0; +} + +/* this isn't a complete div it's just enough for qbo shader to work */ +static int egcm_u64div(struct r600_shader_ctx *ctx) +{ + struct r600_bytecode_alu alu; + struct r600_bytecode_alu_src alu_num_hi, alu_num_lo, alu_denom_hi, alu_denom_lo, alu_src; + int r, i; + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + + /* make sure we are dividing my a const with 0 in the high bits */ + if (ctx->src[1].sel != V_SQ_ALU_SRC_LITERAL) + return -1; + if (ctx->src[1].value[ctx->src[1].swizzle[1]] != 0) + return -1; + /* make sure we are doing one division */ + if (inst->Dst[0].Register.WriteMask != 0x3) + return -1; + + /* emit_if uses ctx->temp_reg so we can't */ + int treg = r600_get_temp(ctx); + int tmp_num = r600_get_temp(ctx); + int sub_tmp = r600_get_temp(ctx); + + /* tmp quot are tmp_num.zw */ + r600_bytecode_src(&alu_num_lo, &ctx->src[0], 0); + r600_bytecode_src(&alu_num_hi, &ctx->src[0], 1); + r600_bytecode_src(&alu_denom_lo, &ctx->src[1], 0); + r600_bytecode_src(&alu_denom_hi, &ctx->src[1], 1); + + /* MOV tmp_num.xy, numerator */ + r = single_alu_op2(ctx, ALU_OP1_MOV, + tmp_num, 0, + alu_num_lo.sel, alu_num_lo.chan, + 0, 0); + if (r) + return r; + r = single_alu_op2(ctx, ALU_OP1_MOV, + tmp_num, 1, + alu_num_hi.sel, alu_num_hi.chan, + 0, 0); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP1_MOV, + tmp_num, 2, + V_SQ_ALU_SRC_LITERAL, 0, + 0, 0); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP1_MOV, + tmp_num, 3, + V_SQ_ALU_SRC_LITERAL, 0, + 0, 0); + if (r) + return r; + + /* treg 0 is log2_denom */ + /* normally this gets the MSB for the denom high value + - however we know this will always be 0 here. */ + r = single_alu_op2(ctx, + ALU_OP1_MOV, + treg, 0, + V_SQ_ALU_SRC_LITERAL, 32, + 0, 0); + if (r) + return r; + + /* normally check demon hi for 0, but we know it is already */ + /* t0.z = num_hi >= denom_lo */ + r = single_alu_op2(ctx, + ALU_OP2_SETGE_UINT, + treg, 1, + alu_num_hi.sel, alu_num_hi.chan, + V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); + if (r) + return r; + + memset(&alu_src, 0, sizeof(alu_src)); + alu_src.sel = treg; + alu_src.chan = 1; + r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); + if (r) + return r; + + /* for loops in here */ + /* get msb t0.x = msb(src[1].x) first */ + int msb_lo = util_last_bit(alu_denom_lo.value); + r = single_alu_op2(ctx, ALU_OP1_MOV, + treg, 0, + V_SQ_ALU_SRC_LITERAL, msb_lo, + 0, 0); + if (r) + return r; + + /* unroll the asm here */ + for (i = 0; i < 31; i++) { + r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, + treg, 2, + V_SQ_ALU_SRC_LITERAL, i, + treg, 0); + if (r) + return r; + + /* we can do this on the CPU */ + uint32_t denom_lo_shl = alu_denom_lo.value << (31 - i); + /* t0.z = tmp_num.y >= t0.z */ + r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, + treg, 1, + tmp_num, 1, + V_SQ_ALU_SRC_LITERAL, denom_lo_shl); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_AND_INT, + treg, 1, + treg, 1, + treg, 2); + if (r) + return r; + + memset(&alu_src, 0, sizeof(alu_src)); + alu_src.sel = treg; + alu_src.chan = 1; + r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_SUB_INT, + tmp_num, 1, + tmp_num, 1, + V_SQ_ALU_SRC_LITERAL, denom_lo_shl); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_OR_INT, + tmp_num, 3, + tmp_num, 3, + V_SQ_ALU_SRC_LITERAL, 1U << (31 - i)); + if (r) + return r; + + r = tgsi_endif(ctx); + if (r) + return r; + } + + /* log2_denom is always <= 31, so manually peel the last loop + * iteration. + */ + r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, + treg, 1, + tmp_num, 1, + V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); + if (r) + return r; + + memset(&alu_src, 0, sizeof(alu_src)); + alu_src.sel = treg; + alu_src.chan = 1; + r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_SUB_INT, + tmp_num, 1, + tmp_num, 1, + V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_OR_INT, + tmp_num, 3, + tmp_num, 3, + V_SQ_ALU_SRC_LITERAL, 1U); + if (r) + return r; + r = tgsi_endif(ctx); + if (r) + return r; + + r = tgsi_endif(ctx); + if (r) + return r; + + /* onto the second loop to unroll */ + for (i = 0; i < 31; i++) { + r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT, + treg, 1, + V_SQ_ALU_SRC_LITERAL, (63 - (31 - i)), + treg, 0); + if (r) + return r; + + uint64_t denom_shl = (uint64_t)alu_denom_lo.value << (31 - i); + r = single_alu_op2(ctx, ALU_OP1_MOV, + treg, 2, + V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff), + 0, 0); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP1_MOV, + treg, 3, + V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32), + 0, 0); + if (r) + return r; + + r = emit_u64sge(ctx, sub_tmp, + tmp_num, 0, + treg, 2); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_AND_INT, + treg, 1, + treg, 1, + sub_tmp, 0); + if (r) + return r; + + memset(&alu_src, 0, sizeof(alu_src)); + alu_src.sel = treg; + alu_src.chan = 1; + r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); + if (r) + return r; + + + r = emit_u64add(ctx, ALU_OP2_SUB_INT, + sub_tmp, + tmp_num, 0, + treg, 2); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP1_MOV, + tmp_num, 0, + sub_tmp, 0, + 0, 0); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP1_MOV, + tmp_num, 1, + sub_tmp, 1, + 0, 0); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_OR_INT, + tmp_num, 2, + tmp_num, 2, + V_SQ_ALU_SRC_LITERAL, 1U << (31 - i)); + if (r) + return r; + + r = tgsi_endif(ctx); + if (r) + return r; + } + + /* log2_denom is always <= 63, so manually peel the last loop + * iteration. + */ + uint64_t denom_shl = (uint64_t)alu_denom_lo.value; + r = single_alu_op2(ctx, ALU_OP1_MOV, + treg, 2, + V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff), + 0, 0); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP1_MOV, + treg, 3, + V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32), + 0, 0); + if (r) + return r; + + r = emit_u64sge(ctx, sub_tmp, + tmp_num, 0, + treg, 2); + if (r) + return r; + + memset(&alu_src, 0, sizeof(alu_src)); + alu_src.sel = sub_tmp; + alu_src.chan = 0; + r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src); + if (r) + return r; + + r = emit_u64add(ctx, ALU_OP2_SUB_INT, + sub_tmp, + tmp_num, 0, + treg, 2); + if (r) + return r; + + r = single_alu_op2(ctx, ALU_OP2_OR_INT, + tmp_num, 2, + tmp_num, 2, + V_SQ_ALU_SRC_LITERAL, 1U); + if (r) + return r; + r = tgsi_endif(ctx); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.src[0].sel = tmp_num; + alu.src[0].chan = 2; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + alu.src[0].sel = tmp_num; + alu.src[0].chan = 3; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + +static int egcm_u64sne(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r; + int treg = ctx->temp_reg; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_SETNE_INT; + alu.dst.sel = treg; + alu.dst.chan = 0; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_SETNE_INT; + alu.dst.sel = treg; + alu.dst.chan = 1; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); + r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_OR_INT; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.src[0].sel = treg; + alu.src[0].chan = 0; + alu.src[1].sel = treg; + alu.src[1].chan = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, @@ -10497,6 +11182,10 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, + [TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne }, + [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, + [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, + [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, }; @@ -10719,5 +11408,9 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, + [TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne }, + [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, + [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, + [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, }; |