diff options
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 18 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_private.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt.c | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt_emit.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt_fetch.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt_so_emit.c | 23 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_variant.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/translate/translate.h | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/translate/translate_generic.c | 17 | ||||
-rw-r--r-- | src/gallium/auxiliary/translate/translate_sse.c | 32 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_vbuf.c | 8 |
13 files changed, 93 insertions, 24 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 97b463f4ff8..f9bcadc6cfa 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -674,6 +674,7 @@ generate_vs(struct draw_llvm_variant *variant, static void generate_fetch(struct gallivm_state *gallivm, + struct draw_context *draw, LLVMValueRef vbuffers_ptr, LLVMValueRef *res, struct pipe_vertex_element *velem, @@ -704,10 +705,17 @@ generate_fetch(struct gallivm_state *gallivm, struct lp_build_if_state if_ctx; if (velem->instance_divisor) { - /* array index = instance_id / instance_divisor */ - index = LLVMBuildUDiv(builder, instance_id, - lp_build_const_int32(gallivm, velem->instance_divisor), - "instance_divisor"); + /* Index is equal to the start instance plus the number of current + * instance divided by the divisor. In this case we compute it as: + * index = start_instance + ((instance_id - start_instance) / divisor) + */ + LLVMValueRef current_instance; + index = lp_build_const_int32(gallivm, draw->start_instance); + current_instance = LLVMBuildSub(builder, instance_id, index, ""); + current_instance = LLVMBuildUDiv(builder, current_instance, + lp_build_const_int32(gallivm, velem->instance_divisor), + "instance_divisor"); + index = LLVMBuildAdd(builder, index, current_instance, "instance"); } stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit); @@ -1697,7 +1705,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index); LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); - generate_fetch(gallivm, vbuffers_ptr, + generate_fetch(gallivm, draw, vbuffers_ptr, &aos_attribs[j][i], velem, vb, true_index, system_values.instance_id); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 578433c2006..d3b38eb2df6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf, /* Note: we really do want data[0] here, not data[pos]: */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0, ~0); - vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); + vbuf->translate->run(vbuf->translate, 0, 1, 0, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index fd52c2d6b4c..f42cded118a 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -306,6 +306,7 @@ struct draw_context } extra_shader_outputs; unsigned instance_id; + unsigned start_instance; #ifdef HAVE_LLVM struct draw_llvm *llvm; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c4d06de84bb..e89ccd25401 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -533,6 +533,7 @@ draw_vbo(struct draw_context *draw, for (instance = 0; instance < info->instance_count; instance++) { draw->instance_id = instance + info->start_instance; + draw->start_instance = info->start_instance; /* check for overflow */ if (draw->instance_id < instance || draw->instance_id < info->start_instance) { diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 4c96d7414ee..fc64048be1b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -171,6 +171,7 @@ draw_pt_emit(struct pt_emit *emit, translate->run(translate, 0, vertex_count, + draw->start_instance, draw->instance_id, hw_verts ); @@ -234,6 +235,7 @@ draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->start_instance, draw->instance_id, hw_verts); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 3740deab5a8..8716f657777 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -168,6 +168,7 @@ draw_pt_fetch_run(struct pt_fetch *fetch, translate->run_elts( translate, elts, count, + draw->start_instance, draw->instance_id, verts ); } @@ -195,6 +196,7 @@ draw_pt_fetch_run_linear(struct pt_fetch *fetch, translate->run( translate, start, count, + draw->start_instance, draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index dc6decba70e..22ec8d6b2b5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -210,6 +210,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, feme->translate->run_elts( feme->translate, fetch_elts, fetch_count, + draw->start_instance, draw->instance_id, hw_verts ); @@ -267,6 +268,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->start_instance, draw->instance_id, hw_verts ); @@ -326,6 +328,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->start_instance, draw->instance_id, hw_verts ); diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index d624a990bc6..a6d1da4eb2e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -182,12 +182,29 @@ static void so_emit_prim(struct pt_so_emit *so, buffer = (float *)((char *)draw->so.targets[ob]->mapping + draw->so.targets[ob]->target.buffer_offset + - draw->so.targets[ob]->internal_offset) + state->output[slot].dst_offset; + draw->so.targets[ob]->internal_offset) + + state->output[slot].dst_offset; if (idx == so->pos_idx && pcp_ptr) - memcpy(buffer, &pre_clip_pos[start_comp], num_comps * sizeof(float)); + memcpy(buffer, &pre_clip_pos[start_comp], + num_comps * sizeof(float)); else - memcpy(buffer, &input[idx][start_comp], num_comps * sizeof(float)); + memcpy(buffer, &input[idx][start_comp], + num_comps * sizeof(float)); +#if 0 + { + int j; + debug_printf("VERT[%d], offset = %d, slot[%d] sc = %d, num_c = %d, idx = %d = [", + i + draw->so.targets[ob]->emitted_vertices, + draw->so.targets[ob]->internal_offset, + slot, start_comp, num_comps, idx); + for (j = 0; j < num_comps; ++j) { + unsigned *ubuffer = (unsigned*)buffer; + debug_printf("%d (0x%x), ", ubuffer[j], ubuffer[j]); + } + debug_printf("]\n"); + } +#endif } for (ob = 0; ob < draw->so.num_targets; ++ob) { struct draw_so_target *target = draw->so.targets[ob]; diff --git a/src/gallium/auxiliary/draw/draw_vs_variant.c b/src/gallium/auxiliary/draw/draw_vs_variant.c index 152c1303183..37500c7db8d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_variant.c +++ b/src/gallium/auxiliary/draw/draw_vs_variant.c @@ -168,6 +168,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant, vsvg->fetch->run_elts( vsvg->fetch, elts, count, + vsvg->draw->start_instance, vsvg->draw->instance_id, temp_buffer ); @@ -211,6 +212,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->start_instance, vsvg->draw->instance_id, output_buffer ); @@ -234,6 +236,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant, vsvg->fetch->run( vsvg->fetch, start, count, + vsvg->draw->start_instance, vsvg->draw->instance_id, temp_buffer ); @@ -274,6 +277,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->start_instance, vsvg->draw->instance_id, output_buffer ); diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 850ef39ef21..1132114de9d 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -74,24 +74,28 @@ struct translate; typedef void (PIPE_CDECL *run_elts_func)(struct translate *, const unsigned *elts, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer); typedef void (PIPE_CDECL *run_elts16_func)(struct translate *, const uint16_t *elts, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer); typedef void (PIPE_CDECL *run_elts8_func)(struct translate *, const uint8_t *elts, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer); typedef void (PIPE_CDECL *run_func)(struct translate *, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer); diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 894c1684813..96e35b0eb41 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -607,6 +607,7 @@ static emit_func get_emit_func( enum pipe_format format ) static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg, unsigned elt, + unsigned start_instance, unsigned instance_id, void *vert ) { @@ -623,7 +624,9 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * int copy_size; if (tg->attrib[attr].instance_divisor) { - index = instance_id / tg->attrib[attr].instance_divisor; + index = start_instance; + index += (instance_id - start_instance) / + tg->attrib[attr].instance_divisor; /* XXX we need to clamp the index here too, but to a * per-array max value, not the draw->pt.max_index value * that's being given to us via translate->set_buffer(). @@ -674,6 +677,7 @@ static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic * static void PIPE_CDECL generic_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer ) { @@ -682,7 +686,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, *elts++, instance_id, vert); + generic_run_one(tg, *elts++, start_instance, instance_id, vert); vert += tg->translate.key.output_stride; } } @@ -690,6 +694,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, static void PIPE_CDECL generic_run_elts16( struct translate *translate, const uint16_t *elts, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer ) { @@ -698,7 +703,7 @@ static void PIPE_CDECL generic_run_elts16( struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, *elts++, instance_id, vert); + generic_run_one(tg, *elts++, start_instance, instance_id, vert); vert += tg->translate.key.output_stride; } } @@ -706,6 +711,7 @@ static void PIPE_CDECL generic_run_elts16( struct translate *translate, static void PIPE_CDECL generic_run_elts8( struct translate *translate, const uint8_t *elts, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer ) { @@ -714,7 +720,7 @@ static void PIPE_CDECL generic_run_elts8( struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, *elts++, instance_id, vert); + generic_run_one(tg, *elts++, start_instance, instance_id, vert); vert += tg->translate.key.output_stride; } } @@ -722,6 +728,7 @@ static void PIPE_CDECL generic_run_elts8( struct translate *translate, static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_id, void *output_buffer ) { @@ -730,7 +737,7 @@ static void PIPE_CDECL generic_run( struct translate *translate, unsigned i; for (i = 0; i < count; i++) { - generic_run_one(tg, start + i, instance_id, vert); + generic_run_one(tg, start + i, start_instance, instance_id, vert); vert += tg->translate.key.output_stride; } } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index c2dd42db96e..a4f7b243c13 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -112,6 +112,7 @@ struct translate_sse { boolean use_instancing; unsigned instance_id; + unsigned start_instance; /* these are actually known values, but putting them in a struct * like this is helpful to keep them in sync across the file. @@ -1061,6 +1062,8 @@ static boolean init_inputs( struct translate_sse *p, unsigned i; struct x86_reg instance_id = x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)); + struct x86_reg start_instance = x86_make_disp(p->machine_EDI, + get_offset(p, &p->start_instance)); for (i = 0; i < p->nr_buffer_variants; i++) { struct translate_buffer_variant *variant = &p->buffer_variant[i]; @@ -1082,7 +1085,8 @@ static boolean init_inputs( struct translate_sse *p, * base_ptr + stride * index, where index depends on instance divisor */ if (variant->instance_divisor) { - /* Our index is instance ID divided by instance divisor. + /* Start with instance = instance_id + * which is true if divisor is 1. */ x86_mov(p->func, tmp_EAX, instance_id); @@ -1090,13 +1094,22 @@ static boolean init_inputs( struct translate_sse *p, struct x86_reg tmp_EDX = p->tmp2_EDX; struct x86_reg tmp_ECX = p->src_ECX; + /* instance_num = instance_id - start_instance */ + x86_mov(p->func, tmp_EDX, start_instance); + x86_sub(p->func, tmp_EAX, tmp_EDX); + /* TODO: Add x86_shr() to rtasm and use it whenever * instance divisor is power of two. */ - x86_xor(p->func, tmp_EDX, tmp_EDX); x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor); x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ + + /* instance = (instance_id - start_instance) / divisor + + * start_instance + */ + x86_mov(p->func, tmp_EDX, start_instance); + x86_add(p->func, tmp_EAX, tmp_EDX); } /* XXX we need to clamp the index here too, but to a @@ -1312,17 +1325,24 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3)); if(x86_target(p->func) != X86_32) - x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5)); + x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); else - x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5)); + x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); /* Load instance ID. */ - if (p->use_instancing) { + if (p->use_instancing) { x86_mov(p->func, - p->tmp_EAX, + p->tmp2_EDX, x86_fn_arg(p->func, 4)); x86_mov(p->func, + x86_make_disp(p->machine_EDI, get_offset(p, &p->start_instance)), + p->tmp2_EDX); + + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 5)); + x86_mov(p->func, x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)), p->tmp_EAX); } diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 5936f74a039..52b360ed7aa 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -403,13 +403,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, switch (ib->index_size) { case 4: - tr->run_elts(tr, (unsigned*)map, num_indices, 0, out_map); + tr->run_elts(tr, (unsigned*)map, num_indices, 0, 0, out_map); break; case 2: - tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, out_map); + tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, 0, out_map); break; case 1: - tr->run_elts8(tr, map, num_indices, 0, out_map); + tr->run_elts8(tr, map, num_indices, 0, 0, out_map); break; } @@ -428,7 +428,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, out_offset -= key->output_stride * start_vertex; - tr->run(tr, 0, num_vertices, 0, out_map); + tr->run(tr, 0, num_vertices, 0, 0, out_map); } /* Unmap all buffers. */ |