diff options
author | Samuel Pitoiset <[email protected]> | 2020-06-04 10:35:23 +0200 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2020-06-05 16:04:05 +0200 |
commit | e1523b34c2aeebdf2952bfad4f0e40326fb2cc7c (patch) | |
tree | b8022b59156af1a14bf1ca83eb16570820623837 | |
parent | ee4bc13de2aacb7bab24a3e55e44e7e50434df94 (diff) |
aco: fix sign-extend 8-bit subgroup operations on GFX6-GFX7
SDWA is GFX8+.
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5327>
-rw-r--r-- | src/amd/compiler/aco_lower_to_hw_instr.cpp | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index a278b66ce82..480dd32e6ce 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -568,15 +568,27 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig } if (src.regClass() == v1b) { - aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; - sdwa->operands[0] = Operand(PhysReg{tmp}, v1); - sdwa->definitions[0] = Definition(PhysReg{tmp}, v1); - if (reduce_op == imin8 || reduce_op == imax8) - sdwa->sel[0] = sdwa_sbyte; - else - sdwa->sel[0] = sdwa_ubyte; - sdwa->dst_sel = sdwa_udword; - bld.insert(std::move(sdwa)); + if (ctx->program->chip_class >= GFX8) { + aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; + sdwa->operands[0] = Operand(PhysReg{tmp}, v1); + sdwa->definitions[0] = Definition(PhysReg{tmp}, v1); + if (reduce_op == imin8 || reduce_op == imax8) + sdwa->sel[0] = sdwa_sbyte; + else + sdwa->sel[0] = sdwa_ubyte; + sdwa->dst_sel = sdwa_udword; + bld.insert(std::move(sdwa)); + } else { + aco_opcode opcode; + + if (reduce_op == imin8 || reduce_op == imax8) + opcode = aco_opcode::v_bfe_i32; + else + opcode = aco_opcode::v_bfe_u32; + + bld.vop3(opcode, Definition(PhysReg{tmp}, v1), + Operand(PhysReg{tmp}, v1), Operand(0u), Operand(8u)); + } } else if (src.regClass() == v2b) { if (ctx->program->chip_class >= GFX10 && (reduce_op == iadd16 || reduce_op == imax16 || |