diff options
author | Roland Scheidegger <[email protected]> | 2012-07-06 02:53:44 +0200 |
---|---|---|
committer | José Fonseca <[email protected]> | 2012-07-20 20:17:15 +0100 |
commit | 70a969f123c98cf6fca71a5fed4efed983edf6c8 (patch) | |
tree | f0ffa94d275d9ade9e8bfb8ffc16b99ec7fcf946 /src/gallium/drivers/llvmpipe/lp_bld_interp.h | |
parent | 542bd6941f5a56f7a3aa84b44d92591488b146bf (diff) |
llvmpipe: use runtime loop instead of static loop for looping over quads
This can potentially cut shader program size by a factor of 4 for 4-wide
execution respectively 2 for 8-wide execution and while this ratios aren't
quite reached for more complex shaders it can be close.
Could not really measure a performance difference so far except for trivial
shaders (glxgears).
There seems to be a fair amount of unnecessary move's generated especially
at the beginning it might be possible to optimize those away somehow.
Things aren't quite as clean, some additional stuff needs to be done for
keeping both paths working (though llvm might be able to optimize this away).
glxgears seems to lose about 5-10% of performance, looking at the generated
shaders this is actually less than I'd think it would be - both 4 and 8-wide
shaders, despite containing a loop actually have about 10% more instructions
in total, and will have roughly 50% more executed instructions (though mostly
cheap ones). Need to figure out how to reduce overhead...
v2: keep complex interpolation for 4-wide mode, adapt to interface changes.
Reviewed-by: José Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_bld_interp.h')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_interp.h | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index f293b582318..d273e3f9b99 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -84,6 +84,8 @@ struct lp_build_interp_soa_context unsigned num_attribs; unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */ enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS]; + boolean simple_interp; + boolean dynamic_offsets; LLVMValueRef x; LLVMValueRef y; @@ -98,6 +100,9 @@ struct lp_build_interp_soa_context LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef xoffset_store; + LLVMValueRef yoffset_store; + /* * Convenience pointers. Callers may access this one. */ @@ -113,6 +118,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct lp_shader_input *inputs, LLVMBuilderRef builder, struct lp_type type, + boolean dynamic_offsets, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, @@ -129,5 +135,14 @@ lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, int quad__start_index); +void +lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index); + +void +lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index); #endif /* LP_BLD_INTERP_H */ |