diff options
author | Ilia Mirkin <[email protected]> | 2015-12-02 21:02:12 -0500 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-12-03 23:02:57 -0500 |
commit | a3722b81f534598f25d9d155a6d30bc59a6f4e59 (patch) | |
tree | f3fe179124ccbf026f7a44a3880b622e07134a8b /src/gallium/drivers/nouveau | |
parent | 2b98914fe01f1c7b2de8a096c8923b3ab0a69578 (diff) |
nv50/ir: fold fma/mad when all 3 args are immediates
This happens pretty rarely, but might as well do it when it does.
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index bb7f4911c21..b79e465b4fa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -670,6 +670,34 @@ ConstantFolding::expr(Instruction *i, res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask); break; } + case OP_MAD: + case OP_FMA: { + switch (i->dType) { + case TYPE_F32: + res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor) + + c->data.f32; + break; + case TYPE_F64: + res.data.f64 = a->data.f64 * b->data.f64 + c->data.f64; + break; + case TYPE_S32: + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { + res.data.s32 = ((int64_t)a->data.s32 * b->data.s32 >> 32) + c->data.s32; + break; + } + /* fallthrough */ + case TYPE_U32: + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { + res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32 >> 32) + c->data.u32; + break; + } + res.data.u32 = a->data.u32 * b->data.u32 + c->data.u32; + break; + default: + return; + } + break; + } default: return; } @@ -684,6 +712,8 @@ ConstantFolding::expr(Instruction *i, i->setSrc(2, NULL); i->getSrc(0)->reg.data = res.data; + i->getSrc(0)->reg.type = i->dType; + i->getSrc(0)->reg.size = typeSizeof(i->dType); i->op = OP_MOV; } |