aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Skeggs <[email protected]>2020-06-07 09:51:45 +1000
committerMarge Bot <[email protected]>2020-06-10 22:52:41 +0000
commit4b9b7e4dd33b06f696534327a3878de89659f2a6 (patch)
treeda60fc2385c7724c055cf0836a263a1712dc1acc
parentb80aff88fed60679d2b0ac7fcea29d6cd3ad95ce (diff)
nvir: introduce OP_LOP3_LUT
Will be required to support SM70, but is also available on earlier GPUs. v2: - add convenience macro suggested by Karol Signed-off-by: Ben Skeggs <[email protected]> Reviewed-by: Karol Herbst <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h10
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp6
4 files changed, 22 insertions, 3 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 2718975b98a..a4ade030b00 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -67,6 +67,7 @@ enum operation
OP_AND,
OP_OR,
OP_XOR,
+ OP_LOP3_LUT,
OP_SHL,
OP_SHR,
OP_MAX,
@@ -254,6 +255,15 @@ enum operation
#define NV50_IR_SUBOP_VOTE_ALL 0
#define NV50_IR_SUBOP_VOTE_ANY 1
#define NV50_IR_SUBOP_VOTE_UNI 2
+#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0
+#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc
+#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa
+#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \
+ uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \
+ uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \
+ uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \
+ (uint8_t)(exp); \
+})
#define NV50_IR_SUBOP_MINMAX_LOW 1
#define NV50_IR_SUBOP_MINMAX_MED 2
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index e24e74a6463..5c3d15968cf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -780,6 +780,14 @@ ConstantFolding::expr(Instruction *i,
memset(&res.data, 0, sizeof(res.data));
switch (i->op) {
+ case OP_LOP3_LUT:
+ for (int n = 0; n < 32; n++) {
+ uint8_t lut = ((a->data.u32 >> n) & 1) << 2 |
+ ((b->data.u32 >> n) & 1) << 1 |
+ ((c->data.u32 >> n) & 1);
+ res.data.u32 |= !!(i->subOp & (1 << lut)) << n;
+ }
+ break;
case OP_INSBF: {
int offset = b->data.u32 & 0xff;
int width = (b->data.u32 >> 8) & 0xff;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5dcbf3c3e0c..74738221080 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -93,6 +93,7 @@ const char *operationStr[OP_LAST + 1] =
"and",
"or",
"xor",
+ "lop3 lut",
"shl",
"shr",
"max",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 5c6d0570ae2..92ac7101cfc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] =
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
3, 3, // SHLADD, XMAD
1, 1, 1, // ABS, NEG, NOT
- 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
+ 2, 2, 2, 3, 2, 2, // AND, OR, XOR, LOP3_LUT, SHL, SHR
2, 2, 1, // MAX, MIN, SAT
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
@@ -75,9 +75,9 @@ const OpClass Target::operationClass[] =
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
- // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
+ // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR
OPCLASS_CONVERT, OPCLASS_CONVERT,
- OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
+ OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
OPCLASS_SHIFT, OPCLASS_SHIFT,
// MAX, MIN
OPCLASS_COMPARE, OPCLASS_COMPARE,