diff options
author | Ilia Mirkin <[email protected]> | 2014-05-13 11:23:33 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2014-05-18 17:59:16 -0400 |
commit | 5b8f1a0f7c5b1412577a913d374192a2329fa615 (patch) | |
tree | d694105a8ad1ed05a01560e98c2f25ef6bfa45d3 /src/gallium | |
parent | 4ebaabcccb125e3d29ab6e6ac3d23897287d7574 (diff) |
nv50/ir: fix integer mul lowering for u32 x u32 -> high u32
UNION appears to expect that all of its sources are conditionally
defined. Otherwise it inserts an unpredicated mov instruction which
overwrites the desired result. This fixes tests that use UMUL_HI, and
much less directly, unsigned integer division by a constant, which uses
this functionality in a peephole pass.
Signed-off-by: Ilia Mirkin <[email protected]>
Cc: "10.1 10.2" <[email protected]>
Reviewed-by: Ben Skeggs <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 63db1d7554c..b17d57d0bfd 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -75,16 +75,17 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); if (highResult) { - Value *r[3]; + Value *r[4]; Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8)); c[0] = bld->getSSA(1, FILE_FLAGS); c[1] = bld->getSSA(1, FILE_FLAGS); - for (int j = 0; j < 3; ++j) + for (int j = 0; j < 4; ++j) r[j] = bld->getSSA(fullSize); i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8)); i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm); - bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[0]); + bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]); + bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]); i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]); // set carry defs / sources |