diff options
author | Roland Scheidegger <[email protected]> | 2014-05-14 15:43:53 +0200 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2014-05-14 16:23:33 +0200 |
commit | 8a9f5ecdb116d0449d63f7b94efbfa8b205d826f (patch) | |
tree | 2be5cbce240d5ab6197c8c3e92d9e693e7b50196 | |
parent | 18c6454ad19d5d323af7b3318f18c409b84c7a12 (diff) |
gallivm: only fetch pointers to constant buffers once
In 1d35f77228ad540a551a8e09e062b764a6e31f5e support for multiple constant
buffers was introduced. This meant we had another indirection, and we did
resolve the indirection for each constant buffer access. This looks very
reasonable since llvm can figure out if it's the same pointer, however it
turns out that this can cause llvm compilation time to go through the roof
and beyond (I've seen cases in excess of factor 100, e.g. from 50 ms to more
than 10 seconds (!)), with all the additional time spent in IR optimization
passes (and in the end all of it in DominatorTree::dominate()).
I've been unable to narrow it down a bit more (only some shaders seem affected,
seemingly without much correlation to overall shader complexity or constant
usage) but it is easily avoidable by doing the buffer lookups themeselves just
once (at constant buffer declaration time).
Reviewed-by: Jose Fonseca <[email protected]>
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 100 |
2 files changed, 65 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index ffd6e874a89..88ac3c9b068 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -437,6 +437,8 @@ struct lp_build_tgsi_soa_context LLVMValueRef consts_ptr; LLVMValueRef const_sizes_ptr; + LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS]; + LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS]; const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 2b47fc28f9b..0eaa0204a7f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1213,7 +1213,6 @@ emit_fetch_constant( LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld_base->uint_bld; unsigned dimension = 0; - LLVMValueRef dimension_index; LLVMValueRef consts_ptr; LLVMValueRef num_consts; LLVMValueRef res; @@ -1227,11 +1226,8 @@ emit_fetch_constant( assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); } - dimension_index = lp_build_const_int32(gallivm, dimension); - consts_ptr = - lp_build_array_get(gallivm, bld->consts_ptr, dimension_index); - num_consts = - lp_build_array_get(gallivm, bld->const_sizes_ptr, dimension_index); + consts_ptr = bld->consts[dimension]; + num_consts = bld->consts_sizes[dimension]; if (reg->Register.Indirect) { LLVMValueRef indirect_index; @@ -2677,56 +2673,86 @@ lp_emit_declaration_soa( const unsigned last = decl->Range.Last; unsigned idx, i; - for (idx = first; idx <= last; ++idx) { - assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); - switch (decl->Declaration.File) { - case TGSI_FILE_TEMPORARY: - if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { - assert(idx < LP_MAX_INLINED_TEMPS); + assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); + + switch (decl->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { + assert(last < LP_MAX_INLINED_TEMPS); + for (idx = first; idx <= last; ++idx) { for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); } - break; + } + break; - case TGSI_FILE_OUTPUT: - if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { + case TGSI_FILE_OUTPUT: + if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { + for (idx = first; idx <= last; ++idx) { for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->outputs[idx][i] = lp_build_alloca(gallivm, vec_type, "output"); } - break; + } + break; - case TGSI_FILE_ADDRESS: - /* ADDR registers are only allocated with an integer LLVM IR type, - * as they are guaranteed to always have integers. - * XXX: Not sure if this exception is worthwhile (or the whole idea of - * an ADDR register for that matter). - */ + case TGSI_FILE_ADDRESS: + /* ADDR registers are only allocated with an integer LLVM IR type, + * as they are guaranteed to always have integers. + * XXX: Not sure if this exception is worthwhile (or the whole idea of + * an ADDR register for that matter). + */ + assert(last < LP_MAX_TGSI_ADDRS); + for (idx = first; idx <= last; ++idx) { assert(idx < LP_MAX_TGSI_ADDRS); for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); - break; + } + break; - case TGSI_FILE_PREDICATE: - assert(idx < LP_MAX_TGSI_PREDS); + case TGSI_FILE_PREDICATE: + assert(last < LP_MAX_TGSI_PREDS); + for (idx = first; idx <= last; ++idx) { for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, "predicate"); - break; + } + break; - case TGSI_FILE_SAMPLER_VIEW: - /* - * The target stored here MUST match whatever there actually - * is in the set sampler views (what about return type?). - */ - assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS); + case TGSI_FILE_SAMPLER_VIEW: + /* + * The target stored here MUST match whatever there actually + * is in the set sampler views (what about return type?). + */ + assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); + for (idx = first; idx <= last; ++idx) { bld->sv[idx] = decl->SamplerView; - break; - - default: - /* don't need to declare other vars */ - break; } + break; + + case TGSI_FILE_CONSTANT: + { + /* + * We could trivially fetch the per-buffer pointer when fetching the + * constant, relying on llvm to figure out it's always the same pointer + * anyway. However, doing so results in a huge (more than factor of 10) + * slowdown in llvm compilation times for some (but not all) shaders + * (more specifically, the IR optimization spends way more time in + * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. + */ + unsigned idx2D = decl->Dim.Index2D; + LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); + assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); + bld->consts[idx2D] = + lp_build_array_get(gallivm, bld->consts_ptr, index2D); + bld->consts_sizes[idx2D] = + lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); + } + break; + + default: + /* don't need to declare other vars */ + break; } } |