diff options
author | Ben Skeggs <[email protected]> | 2020-06-07 09:51:45 +1000 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-06-10 22:52:41 +0000 |
commit | 4b9b7e4dd33b06f696534327a3878de89659f2a6 (patch) | |
tree | da60fc2385c7724c055cf0836a263a1712dc1acc | |
parent | b80aff88fed60679d2b0ac7fcea29d6cd3ad95ce (diff) |
nvir: introduce OP_LOP3_LUT
Will be required to support SM70, but is also available on earlier GPUs.
v2:
- add convenience macro suggested by Karol
Signed-off-by: Ben Skeggs <[email protected]>
Reviewed-by: Karol Herbst <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
4 files changed, 22 insertions, 3 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 2718975b98a..a4ade030b00 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -67,6 +67,7 @@ enum operation OP_AND, OP_OR, OP_XOR, + OP_LOP3_LUT, OP_SHL, OP_SHR, OP_MAX, @@ -254,6 +255,15 @@ enum operation #define NV50_IR_SUBOP_VOTE_ALL 0 #define NV50_IR_SUBOP_VOTE_ANY 1 #define NV50_IR_SUBOP_VOTE_UNI 2 +#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0 +#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc +#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa +#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \ + uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \ + uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \ + uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \ + (uint8_t)(exp); \ +}) #define NV50_IR_SUBOP_MINMAX_LOW 1 #define NV50_IR_SUBOP_MINMAX_MED 2 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index e24e74a6463..5c3d15968cf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -780,6 +780,14 @@ ConstantFolding::expr(Instruction *i, memset(&res.data, 0, sizeof(res.data)); switch (i->op) { + case OP_LOP3_LUT: + for (int n = 0; n < 32; n++) { + uint8_t lut = ((a->data.u32 >> n) & 1) << 2 | + ((b->data.u32 >> n) & 1) << 1 | + ((c->data.u32 >> n) & 1); + res.data.u32 |= !!(i->subOp & (1 << lut)) << n; + } + break; case OP_INSBF: { int offset = b->data.u32 & 0xff; int width = (b->data.u32 >> 8) & 0xff; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 5dcbf3c3e0c..74738221080 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -93,6 +93,7 @@ const char *operationStr[OP_LAST + 1] = "and", "or", "xor", + "lop3 lut", "shl", "shr", "max", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 5c6d0570ae2..92ac7101cfc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] = 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 3, 3, // SHLADD, XMAD 1, 1, 1, // ABS, NEG, NOT - 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR + 2, 2, 2, 3, 2, 2, // AND, OR, XOR, LOP3_LUT, SHL, SHR 2, 2, 1, // MAX, MIN, SAT 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT @@ -75,9 +75,9 @@ const OpClass Target::operationClass[] = OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR + // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, - OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, + OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_SHIFT, OPCLASS_SHIFT, // MAX, MIN OPCLASS_COMPARE, OPCLASS_COMPARE, |