aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2020-06-04 10:35:23 +0200
committerSamuel Pitoiset <[email protected]>2020-06-05 16:04:05 +0200
commite1523b34c2aeebdf2952bfad4f0e40326fb2cc7c (patch)
treeb8022b59156af1a14bf1ca83eb16570820623837
parentee4bc13de2aacb7bab24a3e55e44e7e50434df94 (diff)
aco: fix sign-extend 8-bit subgroup operations on GFX6-GFX7
SDWA is GFX8+. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5327>
-rw-r--r--src/amd/compiler/aco_lower_to_hw_instr.cpp30
1 files changed, 21 insertions, 9 deletions
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index a278b66ce82..480dd32e6ce 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -568,15 +568,27 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
}
if (src.regClass() == v1b) {
- aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
- sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
- sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
- if (reduce_op == imin8 || reduce_op == imax8)
- sdwa->sel[0] = sdwa_sbyte;
- else
- sdwa->sel[0] = sdwa_ubyte;
- sdwa->dst_sel = sdwa_udword;
- bld.insert(std::move(sdwa));
+ if (ctx->program->chip_class >= GFX8) {
+ aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
+ sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
+ sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
+ if (reduce_op == imin8 || reduce_op == imax8)
+ sdwa->sel[0] = sdwa_sbyte;
+ else
+ sdwa->sel[0] = sdwa_ubyte;
+ sdwa->dst_sel = sdwa_udword;
+ bld.insert(std::move(sdwa));
+ } else {
+ aco_opcode opcode;
+
+ if (reduce_op == imin8 || reduce_op == imax8)
+ opcode = aco_opcode::v_bfe_i32;
+ else
+ opcode = aco_opcode::v_bfe_u32;
+
+ bld.vop3(opcode, Definition(PhysReg{tmp}, v1),
+ Operand(PhysReg{tmp}, v1), Operand(0u), Operand(8u));
+ }
} else if (src.regClass() == v2b) {
if (ctx->program->chip_class >= GFX10 &&
(reduce_op == iadd16 || reduce_op == imax16 ||