summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/translate
diff options
context:
space:
mode:
authorZack Rusin <[email protected]>2013-06-27 20:40:10 -0400
committerZack Rusin <[email protected]>2013-06-28 05:21:20 -0400
commit1c2e5c223da28cdffe156b6b430fcdf638909021 (patch)
treef86833cf8b5b43134231309cc75932da000de080 /src/gallium/auxiliary/translate
parentdf4ab7974a825bf686f9dfa3474f3648e9a3ca66 (diff)
draw/translate: fix instancing
We were incorrectly computing the buffer offset when using the instances. The buffer offset is always equal to: start_instance * stride + (instance_num / instance_divisor) * stride We were completely ignoring the start instance quite often producing instances that completely wrong, e.g. if start instance = 5, instance divisor = 2, then on the first iteration it should be: 5 * stride, not (5/2) * stride as we'd have currently, and if start instance = 1, instance divisor = 3, then on the first iteration it should be: 1 * stride, not 0 as we'd have. This fixes it and adjusts all the code to the changes. Signed-off-by: Zack Rusin <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/translate')
-rw-r--r--src/gallium/auxiliary/translate/translate.h4
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c17
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c32
3 files changed, 42 insertions, 11 deletions
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 850ef39ef21..1132114de9d 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -74,24 +74,28 @@ struct translate;
typedef void (PIPE_CDECL *run_elts_func)(struct translate *,
const unsigned *elts,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer);
typedef void (PIPE_CDECL *run_elts16_func)(struct translate *,
const uint16_t *elts,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer);
typedef void (PIPE_CDECL *run_elts8_func)(struct translate *,
const uint8_t *elts,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer);
typedef void (PIPE_CDECL *run_func)(struct translate *,
unsigned start,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer);
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 894c1684813..96e35b0eb41 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -607,6 +607,7 @@ static emit_func get_emit_func( enum pipe_format format )
static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
unsigned elt,
+ unsigned start_instance,
unsigned instance_id,
void *vert )
{
@@ -623,7 +624,9 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *
int copy_size;
if (tg->attrib[attr].instance_divisor) {
- index = instance_id / tg->attrib[attr].instance_divisor;
+ index = start_instance;
+ index += (instance_id - start_instance) /
+ tg->attrib[attr].instance_divisor;
/* XXX we need to clamp the index here too, but to a
* per-array max value, not the draw->pt.max_index value
* that's being given to us via translate->set_buffer().
@@ -674,6 +677,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *
static void PIPE_CDECL generic_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
@@ -682,7 +686,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
unsigned i;
for (i = 0; i < count; i++) {
- generic_run_one(tg, *elts++, instance_id, vert);
+ generic_run_one(tg, *elts++, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -690,6 +694,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
static void PIPE_CDECL generic_run_elts16( struct translate *translate,
const uint16_t *elts,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
@@ -698,7 +703,7 @@ static void PIPE_CDECL generic_run_elts16( struct translate *translate,
unsigned i;
for (i = 0; i < count; i++) {
- generic_run_one(tg, *elts++, instance_id, vert);
+ generic_run_one(tg, *elts++, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -706,6 +711,7 @@ static void PIPE_CDECL generic_run_elts16( struct translate *translate,
static void PIPE_CDECL generic_run_elts8( struct translate *translate,
const uint8_t *elts,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
@@ -714,7 +720,7 @@ static void PIPE_CDECL generic_run_elts8( struct translate *translate,
unsigned i;
for (i = 0; i < count; i++) {
- generic_run_one(tg, *elts++, instance_id, vert);
+ generic_run_one(tg, *elts++, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -722,6 +728,7 @@ static void PIPE_CDECL generic_run_elts8( struct translate *translate,
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
+ unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
@@ -730,7 +737,7 @@ static void PIPE_CDECL generic_run( struct translate *translate,
unsigned i;
for (i = 0; i < count; i++) {
- generic_run_one(tg, start + i, instance_id, vert);
+ generic_run_one(tg, start + i, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index c2dd42db96e..a4f7b243c13 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -112,6 +112,7 @@ struct translate_sse {
boolean use_instancing;
unsigned instance_id;
+ unsigned start_instance;
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
@@ -1061,6 +1062,8 @@ static boolean init_inputs( struct translate_sse *p,
unsigned i;
struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
+ struct x86_reg start_instance = x86_make_disp(p->machine_EDI,
+ get_offset(p, &p->start_instance));
for (i = 0; i < p->nr_buffer_variants; i++) {
struct translate_buffer_variant *variant = &p->buffer_variant[i];
@@ -1082,7 +1085,8 @@ static boolean init_inputs( struct translate_sse *p,
* base_ptr + stride * index, where index depends on instance divisor
*/
if (variant->instance_divisor) {
- /* Our index is instance ID divided by instance divisor.
+ /* Start with instance = instance_id
+ * which is true if divisor is 1.
*/
x86_mov(p->func, tmp_EAX, instance_id);
@@ -1090,13 +1094,22 @@ static boolean init_inputs( struct translate_sse *p,
struct x86_reg tmp_EDX = p->tmp2_EDX;
struct x86_reg tmp_ECX = p->src_ECX;
+ /* instance_num = instance_id - start_instance */
+ x86_mov(p->func, tmp_EDX, start_instance);
+ x86_sub(p->func, tmp_EAX, tmp_EDX);
+
/* TODO: Add x86_shr() to rtasm and use it whenever
* instance divisor is power of two.
*/
-
x86_xor(p->func, tmp_EDX, tmp_EDX);
x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor);
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
+
+ /* instance = (instance_id - start_instance) / divisor +
+ * start_instance
+ */
+ x86_mov(p->func, tmp_EDX, start_instance);
+ x86_add(p->func, tmp_EAX, tmp_EDX);
}
/* XXX we need to clamp the index here too, but to a
@@ -1312,17 +1325,24 @@ static boolean build_vertex_emit( struct translate_sse *p,
x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3));
if(x86_target(p->func) != X86_32)
- x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
+ x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6));
else
- x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
+ x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6));
/* Load instance ID.
*/
- if (p->use_instancing) {
+ if (p->use_instancing) {
x86_mov(p->func,
- p->tmp_EAX,
+ p->tmp2_EDX,
x86_fn_arg(p->func, 4));
x86_mov(p->func,
+ x86_make_disp(p->machine_EDI, get_offset(p, &p->start_instance)),
+ p->tmp2_EDX);
+
+ x86_mov(p->func,
+ p->tmp_EAX,
+ x86_fn_arg(p->func, 5));
+ x86_mov(p->func,
x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)),
p->tmp_EAX);
}