diff options
author | Samuel Pitoiset <[email protected]> | 2016-09-18 12:33:12 +0200 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2016-09-18 16:42:39 +0200 |
commit | 557a29b51fa3324cfbeecff100a54c7c6a6d87cd (patch) | |
tree | ced4f6588b2248781fe8ac18773b3c5e192d8e85 /src/gallium/drivers/nouveau/codegen | |
parent | d8b4f5fcca2ce299b8ef248b6f57896c7b85d18c (diff) |
nv50/ir: optimize SUB(a, b) to MOV(a - b)
This helps shaders in UE4 demos, especially with Elemental
(+1% perf). This optimization reduces spilling usage in one
shader which explains the little gain.
GF100/GK104:
total instructions in shared programs :2838551 -> 2838045 (-0.02%)
total gprs used in shared programs :396706 -> 396684 (-0.01%)
total local used in shared programs :34432 -> 34416 (-0.05%)
local gpr inst bytes
helped 1 19 112 112
hurt 0 0 0 0
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index a9172f834b2..74a5a854e77 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -576,6 +576,16 @@ ConstantFolding::expr(Instruction *i, return; } break; + case OP_SUB: + switch (i->dType) { + case TYPE_F32: res.data.f32 = a->data.f32 - b->data.f32; break; + case TYPE_F64: res.data.f64 = a->data.f64 - b->data.f64; break; + case TYPE_S32: + case TYPE_U32: res.data.u32 = a->data.u32 - b->data.u32; break; + default: + return; + } + break; case OP_POW: switch (i->dType) { case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break; |