summaryrefslogtreecommitdiffstats
path: root/src/freedreno/ir3
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-03-20 08:53:44 -0400
committerRob Clark <[email protected]>2019-03-21 09:13:05 -0400
commit2e01c534f4c7909dd2ca766bc0086b3355ccf321 (patch)
tree579dc0f5d92f263d42997f0de93580d730c892c2 /src/freedreno/ir3
parent3d8349048bfb01f1e4240403c38c608d26aeb997 (diff)
freedreno/ir3: fix bit_count
Seems like it can only work 16b at a time. Fixes dEQP-GLES31.functional.shaders.builtin_functions.integer.bitcount.* TODO need to check if this limitation applies to a3xx as well. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno/ir3')
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 962dcdf1b88..b55f834920f 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -616,9 +616,30 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
dst[0] = ir3_SEL_B32(b, src[1], 0, cond, 0, src[2], 0);
break;
}
- case nir_op_bit_count:
- dst[0] = ir3_CBITS_B(b, src[0], 0);
+ case nir_op_bit_count: {
+ // TODO, we need to do this 16b at a time on a5xx+a6xx.. need to
+ // double check on earlier gen's. Once half-precision support is
+ // in place, this should probably move to a NIR lowering pass:
+ struct ir3_instruction *hi, *lo;
+
+ hi = ir3_COV(b, ir3_SHR_B(b, src[0], 0, create_immed(b, 16), 0),
+ TYPE_U32, TYPE_U16);
+ lo = ir3_COV(b, src[0], TYPE_U32, TYPE_U16);
+
+ hi = ir3_CBITS_B(b, hi, 0);
+ lo = ir3_CBITS_B(b, lo, 0);
+
+ // TODO maybe the builders should default to making dst half-precision
+ // if the src's were half precision, to make this less awkward.. otoh
+ // we should probably just do this lowering in NIR.
+ hi->regs[0]->flags |= IR3_REG_HALF;
+ lo->regs[0]->flags |= IR3_REG_HALF;
+
+ dst[0] = ir3_ADD_S(b, hi, 0, lo, 0);
+ dst[0]->regs[0]->flags |= IR3_REG_HALF;
+ dst[0] = ir3_COV(b, dst[0], TYPE_U16, TYPE_U32);
break;
+ }
case nir_op_ifind_msb: {
struct ir3_instruction *cmp;
dst[0] = ir3_CLZ_S(b, src[0], 0);