nvir: introduce OP_BMSK

This replaces the existing implementation without adding lowering for earlier GPUs. The reason for this is because the existing code isn't at all correct, and it also can't be hit anyway. Will be required to support SM70 lowering passes. v2: - fixup source selection Signed-off-by: Ben Skeggs <[email protected]> Reviewed-by: Karol Herbst <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
author: Ben Skeggs <[email protected]> 2020-06-07 09:51:55 +1000
committer: Marge Bot <[email protected]> 2020-06-10 22:52:41 +0000
commit: 6fd41da1ef1ed39847ac15b4e582420756d1a5f8 (patch)
tree: a5a5af95320fd01e8c7ee2484ede5cceab67fa2e /src/gallium/drivers/nouveau
parent: e1e4d1d373aa3090ed3cd186fe3158cee38d1e31 (diff)
5 files changed, 11 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index b03cab82a03..baddd49d591 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -154,6 +154,7 @@ enum operation
    OP_EXTBF,  // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
    OP_BFIND,  // find highest/lowest set bit
    OP_BREV,   // bitfield reverse
+   OP_BMSK,   // bitfield mask
    OP_PERMT,  // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
    OP_ATOM,
    OP_BAR,    // execution barrier, sources = { id, thread count, predicate }
@@ -267,6 +268,8 @@ enum operation
       uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \
       (uint8_t)(exp);                          \
 })
+#define NV50_IR_SUBOP_BMSK_C (0 << 0)
+#define NV50_IR_SUBOP_BMSK_W (1 << 0)
 
 #define NV50_IR_SUBOP_MINMAX_LOW  1
 #define NV50_IR_SUBOP_MINMAX_MED  2
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 42731f32f92..28d8cffcc53 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2774,7 +2774,7 @@ Converter::visit(nir_alu_instr *insn)
    case nir_op_bfm: {
       DEFAULT_CHECKS;
       LValues &newDefs = convert(&insn->dest);
-      mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
+      mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
       break;
    }
    case nir_op_bitfield_insert: {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index d17e605a51e..e30e996499b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -558,6 +558,9 @@ ConstantFolding::expr(Instruction *i,
    memset(&res.data, 0, sizeof(res.data));
 
    switch (i->op) {
+   case OP_BMSK:
+      res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32;
+      break;
    case OP_MAD:
    case OP_FMA:
    case OP_MUL:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index c6550ca187f..8a8e95cd272 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -180,6 +180,7 @@ const char *operationStr[OP_LAST + 1] =
    "extbf",
    "bfind",
    "brev",
+   "bmsk",
    "permt",
    "atom",
    "bar",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 888dc7fb876..97b025cf7a7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -51,7 +51,7 @@ const uint8_t Target::operationSrcNr[] =
    0,                      // TEXBAR
    1, 1,                   // DFDX, DFDY
    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
-   2, 3, 2, 1, 1, 3,       // POPCNT, INSBF, EXTBF, BFIND, BREV, PERMT
+   2, 3, 2, 1, 1, 2, 3,    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
    2, 2,                   // ATOM, BAR
    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
@@ -120,9 +120,9 @@ const OpClass Target::operationClass[] =
    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
-   // POPCNT, INSBF, EXTBF, BFIND, BREV; PERMT
+   // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT
    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
-   OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
    // ATOM, BAR
    OPCLASS_ATOMIC, OPCLASS_CONTROL,
    // VADD, VAVG, VMIN, VMAX
author	Ben Skeggs <[email protected]>	2020-06-07 09:51:55 +1000
committer	Marge Bot <[email protected]>	2020-06-10 22:52:41 +0000
commit	6fd41da1ef1ed39847ac15b4e582420756d1a5f8 (patch)
tree	a5a5af95320fd01e8c7ee2484ede5cceab67fa2e /src/gallium/drivers/nouveau
parent	e1e4d1d373aa3090ed3cd186fe3158cee38d1e31 (diff)