summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2016-06-19 00:43:06 -0400
committerIlia Mirkin <[email protected]>2016-06-21 21:50:16 -0400
commit5b0d64886dfe9d42d02666ee1b07f2aa375197a5 (patch)
treee60e31ec6aa46bfeb72ebd3ab02a503b56420405 /src/gallium
parent35b53c8d47d3a0b53ee2549d73296d5db8e3cca0 (diff)
translate: fix start_instance parameter in sse version
The generic version gets this right already, but this was using an incorrect formula in SSE. Signed-off-by: Ilia Mirkin <[email protected]> Cc: "11.2 12.0" <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 162e555a873..298192f2875 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -1098,13 +1098,14 @@ init_inputs(struct translate_sse *p, unsigned index_size)
* base_ptr + stride * index, where index depends on instance divisor
*/
if (variant->instance_divisor) {
+ struct x86_reg tmp_EDX = p->tmp2_EDX;
+
/* Start with instance = instance_id
* which is true if divisor is 1.
*/
x86_mov(p->func, tmp_EAX, instance_id);
if (variant->instance_divisor != 1) {
- struct x86_reg tmp_EDX = p->tmp2_EDX;
struct x86_reg tmp_ECX = p->src_ECX;
/* TODO: Add x86_shr() to rtasm and use it whenever
@@ -1113,14 +1114,13 @@ init_inputs(struct translate_sse *p, unsigned index_size)
x86_xor(p->func, tmp_EDX, tmp_EDX);
x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor);
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
-
- /* instance = (instance_id - start_instance) / divisor +
- * start_instance
- */
- x86_mov(p->func, tmp_EDX, start_instance);
- x86_add(p->func, tmp_EAX, tmp_EDX);
}
+ /* instance = (instance_id / divisor) + start_instance
+ */
+ x86_mov(p->func, tmp_EDX, start_instance);
+ x86_add(p->func, tmp_EAX, tmp_EDX);
+
/* XXX we need to clamp the index here too, but to a
* per-array max value, not the draw->pt.max_index value
* that's being given to us via translate->set_buffer().