diff options
author | Roland Scheidegger <[email protected]> | 2010-09-24 15:02:24 +0200 |
---|---|---|
committer | José Fonseca <[email protected]> | 2010-09-25 12:19:31 +0100 |
commit | 46d05d4ef99857e50d978247917f3e16574418f4 (patch) | |
tree | fdb7bd877ce5cac758448196873375a51d6934ef /src | |
parent | 26dc60d0a32f3e5b8084fda5991b762a721662e8 (diff) |
gallivm: don't use URem/UDiv when calculating offsets for blocks
While it's true that llvm can and will indeed replace this with bit
arithmetic (since block height/width is POT), it does so (llvm 2.7) by element
and hence extracts/shifts/reinserts each element individually.
This costs about 16 instructions (and extract is not really fast) vs. 1...
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 19e380a8dce..44f44ff1aa4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -655,11 +655,21 @@ lp_build_sample_partial_offset(struct lp_build_context *bld, * Pixel blocks have power of two dimensions. LLVM should convert the * rem/div to bit arithmetic. * TODO: Verify this. + * It does indeed BUT it does transform it to scalar (and back) when doing so + * (using roughly extract, shift/and, mov, unpack) (llvm 2.7). + * The generated code looks seriously unfunny and is quite expensive. */ - +#if 0 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); subcoord = LLVMBuildURem(bld->builder, coord, block_width, ""); coord = LLVMBuildUDiv(bld->builder, coord, block_width, ""); +#else + unsigned logbase2 = util_unsigned_logbase2(block_length); + LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2); + LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1); + subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, ""); + coord = LLVMBuildLShr(bld->builder, coord, block_shift, ""); +#endif } offset = lp_build_mul(bld, coord, stride); |