summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorAxel Davy <[email protected]>2015-01-02 14:57:00 +0100
committerEmil Velikov <[email protected]>2015-01-22 22:16:21 +0000
commitdd055176cc3ef716c5ad10459a0d0073f2b8da71 (patch)
treebef2547f8c6f4d43d0c915c7d4b5405d93cc04f9 /src/gallium
parent6a8e5e48be0bad4606b2d5d7ba736a3d2a277c55 (diff)
st/nine: Match REP implementation to LOOP
Previous implementation was behaving fine, but improve it by: . Improved documentation . Decreasing counter (comparing to 0 is likely to be faster than to constant) . Move the counter update at the end for better performance for shaders that break the loop earlier than when the count is done. Reviewed-by: Tiziano Bacocco <[email protected]> Signed-off-by: Axel Davy <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c49
1 files changed, 30 insertions, 19 deletions
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index be930ef2fbc..101d0b9f0be 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -1561,43 +1561,54 @@ DECL_SPECIAL(REP)
unsigned *label;
struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
struct ureg_dst ctr;
- struct ureg_dst tmp = tx_scratch_scalar(tx);
- struct ureg_src imm =
- tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f);
+ struct ureg_dst tmp;
+ struct ureg_src ctrx;
label = tx_bgnloop(tx);
- ctr = tx_get_loopctr(tx, FALSE);
+ ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
+ ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
/* NOTE: rep must be constant, so we don't have to save the count */
assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
- ureg_MOV(ureg, ctr, imm);
+ /* rep: num_iterations - 0 - 0 - 0 */
+ ureg_MOV(ureg, ctr, rep);
ureg_BGNLOOP(ureg, label);
- if (tx->native_integers)
- {
- ureg_USGE(ureg, tmp, tx_src_scalar(ctr), rep);
- ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
- }
- else
- {
- ureg_SGE(ureg, tmp, tx_src_scalar(ctr), rep);
+ tmp = tx_scratch_scalar(tx);
+ /* Initially ctr.x contains the number of iterations.
+ * We decrease ctr.x at the end of every iteration,
+ * and stop when it reaches 0. */
+
+ if (!tx->native_integers) {
+ /* case src and ctr contain floats */
+ /* to avoid precision issue, we stop when ctr <= 0.5 */
+ ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
+ } else {
+ /* case src and ctr contain integers */
+ ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
+ ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
}
ureg_BRK(ureg);
tx_endcond(tx);
ureg_ENDIF(ureg);
- if (tx->native_integers) {
- ureg_UADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1u(ureg, 1));
- } else {
- ureg_ADD(ureg, ctr, tx_src_scalar(ctr), ureg_imm1f(ureg, 1.0f));
- }
-
return D3D_OK;
}
DECL_SPECIAL(ENDREP)
{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
+ struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
+ struct ureg_src src_ctr = ureg_src(ctr);
+
+ /* ctr.x -= 1 */
+ if (!tx->native_integers)
+ ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
+ else
+ ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
+
ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
return D3D_OK;
}