diff options
author | Axel Davy <[email protected]> | 2015-01-02 14:57:00 +0100 |
---|---|---|
committer | Emil Velikov <[email protected]> | 2015-01-22 22:16:21 +0000 |
commit | dd055176cc3ef716c5ad10459a0d0073f2b8da71 (patch) | |
tree | bef2547f8c6f4d43d0c915c7d4b5405d93cc04f9 /src/gallium/state_trackers/nine | |
parent | 6a8e5e48be0bad4606b2d5d7ba736a3d2a277c55 (diff) |
st/nine: Match REP implementation to LOOP
Previous implementation was behaving fine, but improve it by:
. Improved documentation
. Decreasing counter (comparing to 0 is likely to be faster than to constant)
. Move the counter update at the end for better performance for shaders that
break the loop earlier than when the count is done.
Reviewed-by: Tiziano Bacocco <[email protected]>
Signed-off-by: Axel Davy <[email protected]>
Diffstat (limited to 'src/gallium/state_trackers/nine')
-rw-r--r-- | src/gallium/state_trackers/nine/nine_shader.c | 49 |
1 files changed, 30 insertions, 19 deletions
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index be930ef2fbc..101d0b9f0be 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -1561,43 +1561,54 @@ DECL_SPECIAL(REP) unsigned *label; struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]); struct ureg_dst ctr; - struct ureg_dst tmp = tx_scratch_scalar(tx); - struct ureg_src imm = - tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f); + struct ureg_dst tmp; + struct ureg_src ctrx; label = tx_bgnloop(tx); - ctr = tx_get_loopctr(tx, FALSE); + ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0); + ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); /* NOTE: rep must be constant, so we don't have to save the count */ assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE); - ureg_MOV(ureg, ctr, imm); + /* rep: num_iterations - 0 - 0 - 0 */ + ureg_MOV(ureg, ctr, rep); ureg_BGNLOOP(ureg, label); - if (tx->native_integers) - { - ureg_USGE(ureg, tmp, tx_src_scalar(ctr), rep); - ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); - } - else - { - ureg_SGE(ureg, tmp, tx_src_scalar(ctr), rep); + tmp = tx_scratch_scalar(tx); + /* Initially ctr.x contains the number of iterations. + * We decrease ctr.x at the end of every iteration, + * and stop when it reaches 0. */ + + if (!tx->native_integers) { + /* case src and ctr contain floats */ + /* to avoid precision issue, we stop when ctr <= 0.5 */ + ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); + } else { + /* case src and ctr contain integers */ + ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); + ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); } ureg_BRK(ureg); tx_endcond(tx); ureg_ENDIF(ureg); - if (tx->native_integers) { - ureg_UADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1u(ureg, 1)); - } else { - ureg_ADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1f(ureg, 1.0f)); - } - return D3D_OK; } DECL_SPECIAL(ENDREP) { + struct ureg_program *ureg = tx->ureg; + struct ureg_dst ctr = tx_get_loopctr(tx, FALSE); + struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); + struct ureg_src src_ctr = ureg_src(ctr); + + /* ctr.x -= 1 */ + if (!tx->native_integers) + ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); + else + ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); + ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); return D3D_OK; } |