summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/codegen
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2015-12-02 21:02:12 -0500
committerIlia Mirkin <[email protected]>2015-12-03 23:02:57 -0500
commita3722b81f534598f25d9d155a6d30bc59a6f4e59 (patch)
treef3fe179124ccbf026f7a44a3880b622e07134a8b /src/gallium/drivers/nouveau/codegen
parent2b98914fe01f1c7b2de8a096c8923b3ab0a69578 (diff)
nv50/ir: fold fma/mad when all 3 args are immediates
This happens pretty rarely, but might as well do it when it does. Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp30
1 files changed, 30 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index bb7f4911c21..b79e465b4fa 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -670,6 +670,34 @@ ConstantFolding::expr(Instruction *i,
res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask);
break;
}
+ case OP_MAD:
+ case OP_FMA: {
+ switch (i->dType) {
+ case TYPE_F32:
+ res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor) +
+ c->data.f32;
+ break;
+ case TYPE_F64:
+ res.data.f64 = a->data.f64 * b->data.f64 + c->data.f64;
+ break;
+ case TYPE_S32:
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.s32 = ((int64_t)a->data.s32 * b->data.s32 >> 32) + c->data.s32;
+ break;
+ }
+ /* fallthrough */
+ case TYPE_U32:
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32 >> 32) + c->data.u32;
+ break;
+ }
+ res.data.u32 = a->data.u32 * b->data.u32 + c->data.u32;
+ break;
+ default:
+ return;
+ }
+ break;
+ }
default:
return;
}
@@ -684,6 +712,8 @@ ConstantFolding::expr(Instruction *i,
i->setSrc(2, NULL);
i->getSrc(0)->reg.data = res.data;
+ i->getSrc(0)->reg.type = i->dType;
+ i->getSrc(0)->reg.size = typeSizeof(i->dType);
i->op = OP_MOV;
}