From 5c3040e93a46fc00655e7f74b004f9f38d2f6471 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 7 Jun 2020 09:51:56 +1000 Subject: nvir: introduce OP_SGXT Required for SM70 EXTBF lowering. v2: - added constant folding Signed-off-by: Ben Skeggs Reviewed-by: Karol Herbst Part-of: --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 10 ++++++++++ src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 5 +++-- 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index baddd49d591..add0e270e56 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -156,6 +156,7 @@ enum operation OP_BREV, // bitfield reverse OP_BMSK, // bitfield mask OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) + OP_SGXT, OP_ATOM, OP_BAR, // execution barrier, sources = { id, thread count, predicate } OP_VADD, // byte/word vector operations diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index e30e996499b..669bfbcf548 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -558,6 +558,16 @@ ConstantFolding::expr(Instruction *i, memset(&res.data, 0, sizeof(res.data)); switch (i->op) { + case OP_SGXT: { + int bits = b->data.u32; + if (bits) { + uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits)); + if (bits < 32 && (data & (1 << (bits - 1)))) + data = data - (1 << bits); + res.data.u32 = data; + } + break; + } case OP_BMSK: res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32; break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 8a8e95cd272..a0fd9eb08bf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -182,6 +182,7 @@ const char *operationStr[OP_LAST + 1] = "brev", "bmsk", "permt", + "sgxt", "atom", "bar", "vadd", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 97b025cf7a7..3a772003271 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -52,6 +52,7 @@ const uint8_t Target::operationSrcNr[] = 1, 1, // DFDX, DFDY 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT + 2, // SGXT 2, 2, // ATOM, BAR 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL @@ -120,9 +121,9 @@ const OpClass Target::operationClass[] = // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, - // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT + // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT + OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, - OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, // ATOM, BAR OPCLASS_ATOMIC, OPCLASS_CONTROL, // VADD, VAVG, VMIN, VMAX -- cgit v1.2.3