diff options
Diffstat (limited to 'src/gallium')
176 files changed, 4178 insertions, 1846 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index e3af41c6e04..8f937e3b4e9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -111,6 +111,7 @@ C_SOURCES = \ util/u_math.c \ util/u_mm.c \ util/u_rect.c \ + util/u_ringbuffer.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ util/u_stream_stdc.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 782eb533863..f957090b5fb 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -147,6 +147,7 @@ source = [ 'util/u_math.c', 'util/u_mm.c', 'util/u_rect.c', + 'util/u_ringbuffer.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', 'util/u_stream_stdc.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 2b16332e143..fdfb5faa59e 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -539,6 +539,38 @@ void cso_restore_samplers(struct cso_context *ctx) cso_single_sampler_done( ctx ); } +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = cso_single_vertex_sampler( ctx, i, templates[i] ); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < ctx->nr_samplers; i++) { + temp = cso_single_vertex_sampler( ctx, i, NULL ); + if (temp != PIPE_OK) + error = temp; + } + + cso_single_vertex_sampler_done( ctx ); + + return error; +} + void cso_save_vertex_samplers(struct cso_context *ctx) { diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index b9e313e32d6..d2089b1c883 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -84,6 +84,10 @@ enum pipe_error cso_single_sampler( struct cso_context *cso, void cso_single_sampler_done( struct cso_context *cso ); +enum pipe_error cso_set_vertex_samplers(struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states); + void cso_save_vertex_samplers(struct cso_context *cso); diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 667aa46b208..e90dfc5aec4 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -95,6 +95,7 @@ void draw_destroy( struct draw_context *draw ) draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); draw_vs_destroy( draw ); + draw_gs_destroy( draw ); FREE( draw ); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index b716209df29..8a64c06efcd 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -164,6 +164,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count); +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + void draw_flush(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 5db2e755423..daf8d071f12 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -59,6 +59,15 @@ draw_gs_init( struct draw_context *draw ) return TRUE; } +void draw_gs_destroy( struct draw_context *draw ) +{ + if (!draw->gs.machine) + return; + + align_free(draw->gs.machine->Primitives); + + tgsi_exec_machine_destroy(draw->gs.machine); +} void draw_gs_set_constants( struct draw_context *draw, const float (*constants)[4], diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 205cda5eabe..51a6115ebf5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -55,7 +55,7 @@ -struct clipper { +struct clip_stage { struct draw_stage stage; /**< base class */ /* Basically duplicate some of the flatshading logic here: @@ -70,9 +70,9 @@ struct clipper { /* This is a bit confusing: */ -static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { - return (struct clipper *)stage; + return (struct clip_stage *)stage; } @@ -92,11 +92,12 @@ static void interp_attr( float *fdst, fdst[3] = LINTERP( t, fout[3], fin[3] ); } + static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) { - const struct clipper *clipper = clipper_stage(stage); + const struct clip_stage *clipper = clip_stage(stage); uint i; for (i = 0; i < clipper->num_color_attribs; i++) { const uint attr = clipper->color_attribs[i]; @@ -108,7 +109,7 @@ static void copy_colors( struct draw_stage *stage, /* Interpolate between two vertices to produce a third. */ -static void interp( const struct clipper *clip, +static void interp( const struct clip_stage *clip, struct vertex_header *dst, float t, const struct vertex_header *out, @@ -179,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ if (i == n-1) - header.flags |= edge_last; + header.flags |= edge_last; if (0) { const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; @@ -200,13 +201,14 @@ static void emit_poly( struct draw_stage *stage, } } + static INLINE float dot4(const float *a, const float *b) { - return (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); + return (a[0] * b[0] + + a[1] * b[1] + + a[2] * b[2] + + a[3] * b[3]); } @@ -217,7 +219,7 @@ do_clip_tri( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *a[MAX_CLIPPED_VERTICES]; struct vertex_header *b[MAX_CLIPPED_VERTICES]; struct vertex_header **inlist = a; @@ -280,6 +282,7 @@ do_clip_tri( struct draw_stage *stage, dp_prev = dp; } + /* swap in/out lists */ { struct vertex_header **tmp = inlist; inlist = outlist; @@ -291,15 +294,11 @@ do_clip_tri( struct draw_stage *stage, /* If flat-shading, copy color to new provoking vertex. */ if (clipper->flat && inlist[0] != header->v[2]) { - if (1) { - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - } + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[2]); } - - /* Emit the polygon as triangles to the setup stage: */ if (n >= 3) @@ -314,7 +313,7 @@ do_clip_line( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - const struct clipper *clipper = clipper_stage( stage ); + const struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; const float *pos0 = v0->clip; @@ -416,13 +415,14 @@ clip_tri( struct draw_stage *stage, } } + /* Update state. Could further delay this until we hit the first * primitive that really requires clipping. */ static void clip_init_state( struct draw_stage *stage ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; @@ -488,7 +488,7 @@ static void clip_destroy( struct draw_stage *stage ) */ struct draw_stage *draw_clip_stage( struct draw_context *draw ) { - struct clipper *clipper = CALLOC_STRUCT(clipper); + struct clip_stage *clipper = CALLOC_STRUCT(clip_stage); if (clipper == NULL) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 1a5269c0de9..d40c0352401 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf, /* Note: we really do want data[0] here, not data[pos]: */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); - vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -271,10 +271,12 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index e49041556bd..ef49e575366 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -172,6 +172,8 @@ struct draw_context boolean force_passthrough; /**< never clip or shade */ + boolean dump_vs; + double mrd; /**< minimum resolvable depth value, for polygon offset */ /* pipe state that we need: */ @@ -239,6 +241,8 @@ struct draw_context unsigned reduced_prim; + unsigned instance_id; + void *driver_private; }; @@ -265,6 +269,7 @@ boolean draw_gs_init( struct draw_context *draw ); void draw_gs_set_constants( struct draw_context *, const float (*constants)[4], unsigned size ); +void draw_gs_destroy( struct draw_context *draw ); /******************************************************************************* * Common shading code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 2801dbafe47..a5ddec52863 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -280,20 +280,33 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - unsigned reduced_prim = u_reduced_prim(prim); + draw_arrays_instanced(draw, prim, start, count, 0, 1); +} + +void +draw_arrays_instanced(struct draw_context *draw, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + unsigned reduced_prim = u_reduced_prim(mode); + unsigned instance; + if (reduced_prim != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, prim, start, MIN2(count, 20)); + draw_print_arrays(draw, mode, start, MIN2(count, 20)); #if 0 { int i; - debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", - prim, start, count); + debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", + mode, start, count); tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { @@ -311,6 +324,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, } #endif - /* drawing done here: */ - draw_pt_arrays(draw, prim, start, count); + for (instance = 0; instance < instanceCount; instance++) { + draw->instance_id = instance + startInstance; + draw_pt_arrays(draw, mode, start, count); + } } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 20edf7a227e..d5e0d92a605 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vertex_input_count, - unsigned vertex_size ); + unsigned vertex_size, + unsigned instance_id_index ); void draw_pt_fetch_run( struct pt_fetch *fetch, const unsigned *elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 064e16c295c..4fb53276bbe 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -121,10 +121,12 @@ void draw_pt_emit_prepare( struct pt_emit *emit, emit_sz = 0; break; } - + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; hw_key.element[i].input_buffer = src_buffer; hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; hw_key.element[i].output_format = output_format; hw_key.element[i].output_offset = dst_offset; @@ -204,6 +206,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->run( translate, 0, vertex_count, + draw->instance_id, hw_verts ); render->unmap_vertices( render, @@ -263,6 +266,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, count, + draw->instance_id, hw_verts); if (0) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 305bfef4352..55e7a7b81ad 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -58,12 +58,14 @@ struct pt_fetch { */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned vs_input_count, - unsigned vertex_size ) + unsigned vertex_size, + unsigned instance_id_index ) { struct draw_context *draw = fetch->draw; unsigned nr_inputs; - unsigned i, nr = 0; + unsigned i, nr = 0, ei = 0; unsigned dst_offset = 0; + unsigned num_extra_inputs = 0; struct translate_key key; fetch->vertex_size = vertex_size; @@ -78,9 +80,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { /* Need to set header->vertex_id = 0xffff somehow. */ + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; + key.element[nr].instance_divisor = 0; key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].output_offset = dst_offset; dst_offset += 1 * sizeof(float); @@ -91,19 +95,36 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, */ dst_offset += 4 * sizeof(float); } - - assert( draw->pt.nr_vertex_elements >= vs_input_count ); - nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements ); + if (instance_id_index != ~0) { + num_extra_inputs++; + } + + assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count); + + nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs); for (i = 0; i < nr_inputs; i++) { - key.element[nr].input_format = draw->pt.vertex_element[i].src_format; - key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; - key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - key.element[nr].output_offset = dst_offset; + if (i == instance_id_index) { + key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID; + key.element[nr].input_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_format = PIPE_FORMAT_R32_USCALED; + key.element[nr].output_offset = dst_offset; + + dst_offset += sizeof(uint); + } else { + key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; + key.element[nr].input_format = draw->pt.vertex_element[ei].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset; + key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + ei++; + dst_offset += 4 * sizeof(float); + } - dst_offset += 4 * sizeof(float); nr++; } @@ -158,6 +179,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, translate->run_elts( translate, elts, count, + draw->instance_id, verts ); } @@ -183,6 +205,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, translate->run( translate, start, count, + draw->instance_id, verts ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e7fe6b3b768..2a604470e9a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -166,9 +166,11 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, continue; } + key.element[i].type = TRANSLATE_ELEMENT_NORMAL; key.element[i].input_format = input_format; key.element[i].input_buffer = input_buffer; key.element[i].input_offset = input_offset; + key.element[i].instance_divisor = src->instance_divisor; key.element[i].output_format = output_format; key.element[i].output_offset = dst_offset; @@ -256,6 +258,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, feme->translate->run_elts( feme->translate, fetch_elts, fetch_count, + draw->instance_id, hw_verts ); if (0) { @@ -314,6 +317,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); if (0) { @@ -374,6 +378,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->run( feme->translate, start, count, + draw->instance_id, hw_verts ); draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 1a9df4cac5d..23da556f797 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -59,6 +59,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; + unsigned i; + unsigned instance_id_index = ~0; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -66,6 +68,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned nr = MAX2( vs->info.num_inputs, vs->info.num_outputs + 1 ); + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + fpme->prim = prim; fpme->opt = opt; @@ -79,7 +90,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, vs->info.num_inputs, - fpme->vertex_size ); + fpme->vertex_size, + instance_id_index ); /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 35536895326..e03ac8c2291 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -43,11 +43,11 @@ #include "translate/translate.h" #include "translate/translate_cache.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" - void draw_vs_set_constants( struct draw_context *draw, const float (*constants)[4], unsigned size ) @@ -83,6 +83,10 @@ draw_create_vertex_shader(struct draw_context *draw, { struct draw_vertex_shader *vs; + if (draw->dump_vs) { + tgsi_dump(shader->tokens, 0); + } + vs = draw_create_vs_llvm( draw, shader ); if (!vs) { vs = draw_create_vs_sse( draw, shader ); @@ -152,6 +156,8 @@ draw_delete_vertex_shader(struct draw_context *draw, boolean draw_vs_init( struct draw_context *draw ) { + draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE); + draw->vs.machine = tgsi_exec_machine_create(); if (!draw->vs.machine) return FALSE; diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index e3b807ebd0e..00036cfe68b 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -43,6 +43,7 @@ struct draw_varient_input enum pipe_format format; unsigned buffer; unsigned offset; + unsigned instance_divisor; }; struct draw_varient_output diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ad184bd696d..da9f3e3d35c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; - float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; - float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]; + PIPE_ALIGN_VAR(16) float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]; + PIPE_ALIGN_VAR(16) float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]; uint attr; /* convert (up to) four input verts to SoA format */ diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index d16692584e5..9f40030f39f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -142,6 +142,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->fetch->run_elts( vsvg->fetch, elts, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, @@ -181,6 +182,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -203,6 +205,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->fetch->run( vsvg->fetch, start, count, + vsvg->draw->instance_id, temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, @@ -239,6 +242,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->run( vsvg->emit, 0, count, + vsvg->draw->instance_id, output_buffer ); FREE(temp_buffer); @@ -281,9 +285,11 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, fetch.nr_elements = key->nr_inputs; fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL; fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].instance_divisor = 0; fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; fetch.element[i].output_offset = i * 4 * sizeof(float); assert(fetch.element[i].output_offset < fetch.output_stride); @@ -295,17 +301,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, for (i = 0; i < key->nr_outputs; i++) { if (key->element[i].out.format != EMIT_1F_PSIZE) { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].instance_divisor = 0; emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; assert(emit.element[i].input_offset <= fetch.output_stride); } else { + emit.element[i].type = TRANSLATE_ELEMENT_NORMAL; emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].input_buffer = 1; emit.element[i].input_offset = 0; + emit.element[i].instance_divisor = 0; emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; emit.element[i].output_offset = key->element[i].out.offset; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index ba6f7b15f9e..a4b78f14943 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -80,27 +80,11 @@ struct fenced_buffer_list */ struct fenced_buffer { - /* - * Immutable members. - */ - struct pb_buffer base; + struct pb_buffer *buffer; - struct fenced_buffer_list *list; - - /** - * Protected by fenced_buffer_list::mutex - */ - struct list_head head; - /** - * Following members are mutable and protected by this mutex. - * - * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex - * held, but in order to prevent deadlocks you must never lock - * fenced_buffer_list::mutex with this mutex held. - */ - pipe_mutex mutex; + /* FIXME: protect access with mutex */ /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -112,6 +96,9 @@ struct fenced_buffer struct pb_validate *vl; unsigned validation_flags; struct pipe_fence_handle *fence; + + struct list_head head; + struct fenced_buffer_list *list; }; @@ -123,24 +110,15 @@ fenced_buffer(struct pb_buffer *buf) } -/** - * Add the buffer to the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order, before calling this function. - * - * Reference count should be incremented before calling this function. - */ static INLINE void -fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_add(struct fenced_buffer *fenced_buf) { + struct fenced_buffer_list *fenced_list = fenced_buf->list; + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); - /* TODO: Move the reference count increment here */ - #ifdef DEBUG LIST_DEL(&fenced_buf->head); assert(fenced_list->numUnfenced); @@ -152,16 +130,32 @@ fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, /** - * Remove the buffer from the fenced list. - * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order before calling this function. - * - * Reference count should be decremented after calling this function. + * Actually destroy the buffer. */ static INLINE void -fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(!fenced_buf->fence); +#ifdef DEBUG + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_list->numUnfenced); + --fenced_list->numUnfenced; +#else + (void)fenced_list; +#endif + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); +} + + +static INLINE void +_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { struct pb_fence_ops *ops = fenced_list->ops; @@ -183,56 +177,37 @@ fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, ++fenced_list->numUnfenced; #endif - /* TODO: Move the reference count decrement and destruction here */ + /** + * FIXME!!! + */ + + if(!pipe_is_referenced(&fenced_buf->base.base.reference)) + _fenced_buffer_destroy(fenced_buf); } -/** - * Wait for the fence to expire, and remove it from the fenced list. - * - * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be - * held -- it will be acquired internally. - */ static INLINE enum pipe_error -fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +_fenced_buffer_finish(struct fenced_buffer *fenced_buf) { + struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - enum pipe_error ret = PIPE_ERROR; #if 0 debug_warning("waiting for GPU"); #endif - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); - - /* - * Acquire the global lock. Must release buffer mutex first to preserve - * lock order. - */ - pipe_mutex_unlock(fenced_buf->mutex); - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) { - /* Remove from the fenced list */ - /* TODO: remove consequents */ - fenced_buffer_remove_locked(fenced_list, fenced_buf); - - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - - ret = PIPE_OK; + if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { + return PIPE_ERROR; } + /* Remove from the fenced list */ + /* TODO: remove consequents */ + _fenced_buffer_remove(fenced_list, fenced_buf); } - pipe_mutex_unlock(fenced_list->mutex); - - return ret; + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return PIPE_OK; } @@ -240,8 +215,8 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, * Free as many fenced buffers from the list head as possible. */ static void -fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, - int wait) +_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait) { struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; @@ -254,29 +229,21 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, while(curr != &fenced_list->delayed) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); - if(fenced_buf->fence != prev_fence) { int signaled; if (wait) signaled = ops->fence_finish(ops, fenced_buf->fence, 0); else signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - if (signaled != 0) { - pipe_mutex_unlock(fenced_buf->mutex); + if (signaled != 0) break; - } prev_fence = fenced_buf->fence; } else { assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - fenced_buffer_remove_locked(fenced_list, fenced_buf); - pipe_mutex_unlock(fenced_buf->mutex); - - pb_buf = &fenced_buf->base; - pb_reference(&pb_buf, NULL); + _fenced_buffer_remove(fenced_list, fenced_buf); curr = next; next = curr->next; @@ -290,25 +257,30 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(!fenced_buf->fence); - -#ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); - assert(fenced_buf->head.prev); - assert(fenced_buf->head.next); - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + if (fenced_buf->fence) { + struct pb_fence_ops *ops = fenced_list->ops; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + struct list_head *curr, *prev; + curr = &fenced_buf->head; + prev = curr->prev; + do { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); + _fenced_buffer_remove(fenced_list, fenced_buf); + curr = prev; + prev = curr->prev; + } while (curr != &fenced_list->delayed); + } + else { + /* delay destruction */ + } + } + else { + _fenced_buffer_destroy(fenced_buf); + } pipe_mutex_unlock(fenced_list->mutex); -#else - (void)fenced_list; -#endif - - pb_reference(&fenced_buf->buffer, NULL); - - pipe_mutex_destroy(fenced_buf->mutex); - FREE(fenced_buf); } @@ -319,23 +291,24 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - void *map = NULL; - - pipe_mutex_lock(fenced_buf->mutex); + void *map; assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); /* Serialize writes */ if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { - if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && - ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) { /* Don't wait for the GPU to finish writing */ - goto done; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) + _fenced_buffer_remove(fenced_list, fenced_buf); + else + return NULL; + } + else { + /* Wait for the GPU to finish writing */ + _fenced_buffer_finish(fenced_buf); } - - /* Wait for the GPU to finish writing */ - fenced_buffer_finish_locked(fenced_list, fenced_buf); } #if 0 @@ -352,9 +325,6 @@ fenced_buffer_map(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } -done: - pipe_mutex_unlock(fenced_buf->mutex); - return map; } @@ -363,9 +333,6 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - - pipe_mutex_lock(fenced_buf->mutex); - assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { pb_unmap(fenced_buf->buffer); @@ -373,8 +340,6 @@ fenced_buffer_unmap(struct pb_buffer *buf) if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } - - pipe_mutex_unlock(fenced_buf->mutex); } @@ -386,14 +351,11 @@ fenced_buffer_validate(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); enum pipe_error ret; - pipe_mutex_lock(fenced_buf->mutex); - if(!vl) { /* invalidate */ fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; - ret = PIPE_OK; - goto done; + return PIPE_OK; } assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); @@ -401,17 +363,14 @@ fenced_buffer_validate(struct pb_buffer *buf, flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) { - ret = PIPE_ERROR_RETRY; - goto done; - } + if(fenced_buf->vl && fenced_buf->vl != vl) + return PIPE_ERROR_RETRY; #if 0 /* Do not validate if buffer is still mapped */ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* TODO: wait for the thread that mapped the buffer to unmap it */ - ret = PIPE_ERROR_RETRY; - goto done; + return PIPE_ERROR_RETRY; } /* Final sanity checking */ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); @@ -421,21 +380,17 @@ fenced_buffer_validate(struct pb_buffer *buf, if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ - ret = PIPE_OK; - goto done; + return PIPE_OK; } ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - goto done; + return ret; fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; -done: - pipe_mutex_unlock(fenced_buf->mutex); - - return ret; + return PIPE_OK; } @@ -450,36 +405,29 @@ fenced_buffer_fence(struct pb_buffer *buf, fenced_buf = fenced_buffer(buf); fenced_list = fenced_buf->list; ops = fenced_list->ops; - - pipe_mutex_lock(fenced_list->mutex); - pipe_mutex_lock(fenced_buf->mutex); - - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - - if(fence != fenced_buf->fence) { - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - - if (fenced_buf->fence) { - fenced_buffer_remove_locked(fenced_list, fenced_buf); - p_atomic_dec(&fenced_buf->base.base.reference.count); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - } - if (fence) { - ops->fence_reference(ops, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - p_atomic_inc(&fenced_buf->base.base.reference.count); - fenced_buffer_add_locked(fenced_list, fenced_buf); - } - - pb_fence(fenced_buf->buffer, fence); - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; + if(fence == fenced_buf->fence) { + /* Nothing to do */ + return; } - pipe_mutex_unlock(fenced_buf->mutex); + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + _fenced_buffer_add(fenced_buf); + } pipe_mutex_unlock(fenced_list->mutex); + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; } @@ -489,7 +437,6 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - /* NOTE: accesses immutable members only -- mutex not necessary */ pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); } @@ -529,8 +476,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, buf->buffer = buffer; buf->list = fenced_list; - pipe_mutex_init(buf->mutex); - #ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); @@ -572,7 +517,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, wait); + _fenced_buffer_list_check_free(fenced_list, wait); pipe_mutex_unlock(fenced_list->mutex); } @@ -594,13 +539,11 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) next = curr->next; while(curr != &fenced_list->unfenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); assert(!fenced_buf->fence); debug_printf("%10p %7u %7u\n", (void *) fenced_buf, fenced_buf->base.base.size, p_atomic_read(&fenced_buf->base.base.reference.count)); - pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -610,7 +553,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) while(curr != &fenced_list->delayed) { int signaled; fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - pipe_mutex_lock(fenced_buf->mutex); signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); debug_printf("%10p %7u %7u %10p %s\n", (void *) fenced_buf, @@ -618,7 +560,6 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) p_atomic_read(&fenced_buf->base.base.reference.count), (void *) fenced_buf->fence, signaled == 0 ? "y" : "n"); - pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -639,8 +580,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif + _fenced_buffer_list_check_free(fenced_list, 1); pipe_mutex_lock(fenced_list->mutex); - fenced_buffer_list_check_free_locked(fenced_list, 1); } #ifdef DEBUG @@ -648,7 +589,6 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #endif pipe_mutex_unlock(fenced_list->mutex); - pipe_mutex_destroy(fenced_list->mutex); fenced_list->ops->destroy(fenced_list->ops); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 1acf3c373eb..f675427d987 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -673,6 +673,13 @@ void x86_and( struct x86_function *p, emit_op_modrm( p, 0x23, 0x21, dst, src ); } +void x86_div( struct x86_function *p, + struct x86_reg src ) +{ + assert(src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); +} + /*********************************************************************** diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 731a6517968..f7612d416a0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -244,6 +244,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); +void x86_div( struct x86_function *p, struct x86_reg src ); void x86_cdecl_caller_push_regs( struct x86_function *p ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index e2e5394f86f..c254a7274f1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -123,7 +123,8 @@ static const char *semantic_names[] = "NORMAL", "FACE", "EDGEFLAG", - "PRIM_ID" + "PRIM_ID", + "INSTANCEID" }; static const char *immediate_type_names[] = @@ -218,8 +219,13 @@ _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { + ENM(src->Register.File, file_names); + if (src->Register.Dimension) { + CHR('['); + SID(src->Dimension.Index); + CHR(']'); + } if (src->Register.Indirect) { - ENM( src->Register.File, file_names ); CHR( '[' ); ENM( src->Indirect.File, file_names ); CHR( '[' ); @@ -233,16 +239,10 @@ _dump_register_src( } CHR( ']' ); } else { - ENM( src->Register.File, file_names ); CHR( '[' ); SID( src->Register.Index ); CHR( ']' ); } - if (src->Register.Dimension) { - CHR( '[' ); - SID( src->Dimension.Index ); - CHR( ']' ); - } } static void diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 2bcb33392a8..83646b73c1e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1041,11 +1041,19 @@ fetch_src_file_channel( default: assert( 0 ); + chan->u[0] = 0; + chan->u[1] = 0; + chan->u[2] = 0; + chan->u[3] = 0; } break; default: assert( 0 ); + chan->u[0] = 0; + chan->u[1] = 0; + chan->u[2] = 0; + chan->u[3] = 0; } } @@ -1121,11 +1129,14 @@ fetch_source(const struct tgsi_exec_machine *mach, * subscript to a register file. Effectively it means that * the register file is actually a 2D array of registers. * - * file[1][3] == file[1*sizeof(file[1])+3], + * file[3][1] == file[3*sizeof(file[1])+1], * where: * [3] = Dimension.Index */ if (reg->Register.Dimension) { + int array_size; + union tgsi_exec_channel dim_index; + /* The size of the first-order array depends on the register file type. * We need to multiply the index to the first array to get an effective, * "flat" index that points to the beginning of the second-order array. @@ -1133,32 +1144,27 @@ fetch_source(const struct tgsi_exec_machine *mach, switch (reg->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_SYSTEM_VALUE: - index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + array_size = TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: - index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; + array_size = TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); + array_size = 0; } - index.i[0] += reg->Dimension.Index; - index.i[1] += reg->Dimension.Index; - index.i[2] += reg->Dimension.Index; - index.i[3] += reg->Dimension.Index; + dim_index.i[0] = + dim_index.i[1] = + dim_index.i[2] = + dim_index.i[3] = reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. * Nothing stops us from indirectly addressing the indirect register, * but there is no need for that, so we won't exercise it. * - * file[1][ind[4].y+3], + * file[ind[4].y+3][1], * where: * ind = DimIndirect.File * [4] = DimIndirect.Index @@ -1183,20 +1189,25 @@ fetch_source(const struct tgsi_exec_machine *mach, &index2, &indir_index ); - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + dim_index.i[0] += indir_index.i[0]; + dim_index.i[1] += indir_index.i[1]; + dim_index.i[2] += indir_index.i[2]; + dim_index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ for (i = 0; i < QUAD_SIZE; i++) { if ((execmask & (1 << i)) == 0) - index.i[i] = 0; + dim_index.i[i] = 0; } } + index.i[0] += dim_index.i[0] * array_size; + index.i[1] += dim_index.i[1] * array_size; + index.i[2] += dim_index.i[2] * array_size; + index.i[3] += dim_index.i[3] * array_size; + /* If by any chance there was a need for a 3D array of register * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095bb..ad553c71a57 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,8 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) const float +ppc_builtin_constants[] = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 7f1c8e5dd68..431c3ffb142 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -335,13 +335,9 @@ iter_instruction( fill_scan_register1d(ind_reg, inst->Src[i].Indirect.File, inst->Src[i].Indirect.Index); - if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || - reg->indices[0] != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } check_register_usage( ctx, - reg, + ind_reg, "indirect", FALSE ); } @@ -412,7 +408,7 @@ iter_declaration( uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); - fill_scan_register2d(reg, file, vert, i); + fill_scan_register2d(reg, file, i, vert); check_and_declare(ctx, reg); } } else { diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9fcffeda368..7fe5dad5ffb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -933,7 +933,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "NORMAL", "FACE", "EDGEFLAG", - "PRIM_ID" + "PRIM_ID", + "INSTANCEID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index e64e2b731df..ab557a23f94 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -40,6 +40,8 @@ union tgsi_any_token { struct tgsi_header header; struct tgsi_processor processor; struct tgsi_token token; + struct tgsi_property prop; + struct tgsi_property_data prop_data; struct tgsi_declaration decl; struct tgsi_declaration_range decl_range; struct tgsi_declaration_semantic decl_semantic; @@ -64,6 +66,7 @@ struct ureg_tokens { }; #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS +#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_CONSTANT_RANGE 32 #define UREG_MAX_IMMEDIATE 32 @@ -95,6 +98,13 @@ struct ureg_program unsigned nr_gs_inputs; struct { + unsigned index; + unsigned semantic_name; + unsigned semantic_index; + } system_value[UREG_MAX_SYSTEM_VALUE]; + unsigned nr_system_values; + + struct { unsigned semantic_name; unsigned semantic_index; } output[UREG_MAX_OUTPUT]; @@ -123,6 +133,8 @@ struct ureg_program } constant_range[UREG_MAX_CONSTANT_RANGE]; unsigned nr_constant_ranges; + unsigned property_gs_input_prim; + unsigned nr_addrs; unsigned nr_preds; unsigned nr_loops; @@ -234,19 +246,29 @@ ureg_src_register( unsigned file, src.SwizzleY = TGSI_SWIZZLE_Y; src.SwizzleZ = TGSI_SWIZZLE_Z; src.SwizzleW = TGSI_SWIZZLE_W; - src.Pad = 0; src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; src.IndirectIndex = 0; src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = index; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } +void +ureg_property_gs_input_prim(struct ureg_program *ureg, + unsigned gs_input_prim) +{ + ureg->property_gs_input_prim = gs_input_prim; +} + + struct ureg_src ureg_DECL_fs_input( struct ureg_program *ureg, @@ -304,6 +326,25 @@ ureg_DECL_gs_input(struct ureg_program *ureg, } +struct ureg_src +ureg_DECL_system_value(struct ureg_program *ureg, + unsigned index, + unsigned semantic_name, + unsigned semantic_index) +{ + if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { + ureg->system_value[ureg->nr_system_values].index = index; + ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; + ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; + ureg->nr_system_values++; + } else { + set_bad(ureg); + } + + return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); +} + + struct ureg_dst ureg_DECL_output( struct ureg_program *ureg, unsigned name, @@ -616,6 +657,35 @@ ureg_DECL_immediate_uint( struct ureg_program *ureg, struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + uint index; + uint i; + + if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { + set_bad(ureg); + return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); + } + + index = ureg->nr_immediates; + ureg->nr_immediates += (nr + 3) / 4; + + for (i = index; i < ureg->nr_immediates; i++) { + ureg->immediate[i].type = TGSI_IMM_UINT32; + ureg->immediate[i].nr = nr > 4 ? 4 : nr; + memcpy(ureg->immediate[i].value.u, + &v[(i - index) * 4], + ureg->immediate[i].nr * sizeof(uint)); + nr -= 4; + } + + return ureg_src_register(TGSI_FILE_IMMEDIATE, index); +} + + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *ureg, const int *v, unsigned nr ) @@ -628,7 +698,7 @@ void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = 1 + (src.Indirect ? 1 : 0); + unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -651,7 +721,7 @@ ureg_emit_src( struct ureg_program *ureg, if (src.Indirect) { out[0].src.Indirect = 1; out[n].value = 0; - out[n].src.File = TGSI_FILE_ADDRESS; + out[n].src.File = src.IndirectFile; out[n].src.SwizzleX = src.IndirectSwizzle; out[n].src.SwizzleY = src.IndirectSwizzle; out[n].src.SwizzleZ = src.IndirectSwizzle; @@ -660,6 +730,15 @@ ureg_emit_src( struct ureg_program *ureg, n++; } + if (src.Dimension) { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 0; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + n++; + } + assert(n == size); } @@ -1027,13 +1106,34 @@ emit_immediate( struct ureg_program *ureg, out[4].imm_data.Uint = v[3]; } +static void +emit_property(struct ureg_program *ureg, + unsigned name, + unsigned data) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); + + out[0].value = 0; + out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY; + out[0].prop.NrTokens = 2; + out[0].prop.PropertyName = name; + out[1].prop_data.Data = data; +} static void emit_decls( struct ureg_program *ureg ) { unsigned i; + if (ureg->property_gs_input_prim != ~0) { + assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); + + emit_property(ureg, + TGSI_PROPERTY_GS_INPUT_PRIM, + ureg->property_gs_input_prim); + } + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < UREG_MAX_INPUT; i++) { if (ureg->vs_inputs[i/32] & (1 << (i%32))) { @@ -1058,6 +1158,15 @@ static void emit_decls( struct ureg_program *ureg ) } } + for (i = 0; i < ureg->nr_system_values; i++) { + emit_decl(ureg, + TGSI_FILE_SYSTEM_VALUE, + ureg->system_value[i].index, + ureg->system_value[i].semantic_name, + ureg->system_value[i].semantic_index, + TGSI_INTERPOLATE_CONSTANT); + } + for (i = 0; i < ureg->nr_outputs; i++) { emit_decl( ureg, TGSI_FILE_OUTPUT, @@ -1234,6 +1343,7 @@ struct ureg_program *ureg_create( unsigned processor ) return NULL; ureg->processor = processor; + ureg->property_gs_input_prim = ~0; return ureg; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 6f11273320a..8c8a6bbce66 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -47,13 +47,15 @@ struct ureg_src unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ - unsigned Pad : 1; /* BOOL */ unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ unsigned Absolute : 1; /* BOOL */ - int Index : 16; /* SINT */ unsigned Negate : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned IndirectFile : 4; /* TGSI_FILE_ */ int IndirectIndex : 16; /* SINT */ - int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -118,6 +120,14 @@ ureg_create_shader_and_destroy( struct ureg_program *p, } +/*********************************************************************** + * Build shader properties: + */ + +void +ureg_property_gs_input_prim(struct ureg_program *ureg, + unsigned gs_input_prim); + /*********************************************************************** * Build shader declarations: @@ -137,6 +147,12 @@ struct ureg_src ureg_DECL_gs_input(struct ureg_program *, unsigned index); +struct ureg_src +ureg_DECL_system_value(struct ureg_program *, + unsigned index, + unsigned semantic_name, + unsigned semantic_index); + struct ureg_dst ureg_DECL_output( struct ureg_program *, unsigned semantic_name, @@ -153,6 +169,11 @@ ureg_DECL_immediate_uint( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *, const int *v, unsigned nr ); @@ -753,18 +774,30 @@ static INLINE struct ureg_src ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) { assert(reg.File != TGSI_FILE_NULL); - assert(addr.File == TGSI_FILE_ADDRESS); + assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY); reg.Indirect = 1; + reg.IndirectFile = addr.File; reg.IndirectIndex = addr.Index; reg.IndirectSwizzle = addr.SwizzleX; return reg; } +static INLINE struct ureg_src +ureg_src_dimension( struct ureg_src reg, int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimensionIndex = index; + return reg; +} + static INLINE struct ureg_dst ureg_dst( struct ureg_src src ) { struct ureg_dst dst; + assert(!src.Indirect || src.IndirectFile == TGSI_FILE_ADDRESS); + dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; @@ -792,13 +825,15 @@ ureg_src( struct ureg_dst dst ) src.SwizzleY = TGSI_SWIZZLE_Y; src.SwizzleZ = TGSI_SWIZZLE_Z; src.SwizzleW = TGSI_SWIZZLE_W; - src.Pad = 0; src.Indirect = dst.Indirect; + src.IndirectFile = TGSI_FILE_ADDRESS; src.IndirectIndex = dst.IndirectIndex; src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; src.Index = dst.Index; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } @@ -837,13 +872,15 @@ ureg_src_undef( void ) src.SwizzleY = 0; src.SwizzleZ = 0; src.SwizzleW = 0; - src.Pad = 0; src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; src.IndirectIndex = 0; src.IndirectSwizzle = 0; src.Absolute = 0; src.Index = 0; src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; return src; } diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 34526eb0617..54ed2c1a4be 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -44,12 +44,19 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + struct translate_element { + enum translate_element_type type; enum pipe_format input_format; enum pipe_format output_format; unsigned input_buffer:8; unsigned input_offset:24; + unsigned instance_divisor; unsigned output_offset; }; @@ -74,11 +81,13 @@ struct translate { void (PIPE_CDECL *run_elts)( struct translate *, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer); void (PIPE_CDECL *run)( struct translate *, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer); }; @@ -103,8 +112,13 @@ static INLINE int translate_keysize( const struct translate_key *key ) static INLINE int translate_key_compare( const struct translate_key *a, const struct translate_key *b ) { - int keysize = translate_keysize(a); - return memcmp(a, b, keysize); + int keysize_a = translate_keysize(a); + int keysize_b = translate_keysize(b); + + if (keysize_a != keysize_b) { + return keysize_a - keysize_b; + } + return memcmp(a, b, keysize_a); } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 266e7ee81e6..24727d49888 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -46,9 +46,12 @@ struct translate_generic { struct translate translate; struct { + enum translate_element_type type; + fetch_func fetch; unsigned buffer; unsigned input_offset; + unsigned instance_divisor; emit_func emit; unsigned output_offset; @@ -568,6 +571,7 @@ static emit_func get_emit_func( enum pipe_format format ) static void PIPE_CDECL generic_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -583,13 +587,20 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); + const char *src; char *dst = (vert + tg->attrib[attr].output_offset); + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", @@ -607,6 +618,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); @@ -623,13 +635,25 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src = (tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt); - char *dst = (vert + tg->attrib[attr].output_offset); - tg->attrib[attr].fetch( src, data ); + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const char *src; + + if (tg->attrib[attr].instance_divisor) { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * + (instance_id / tg->attrib[attr].instance_divisor); + } else { + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt; + } + + tg->attrib[attr].fetch( src, data ); + } else { + data[0] = (float)instance_id; + } if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", i, attr, data[0], data[1], data[2], data[3]); @@ -683,10 +707,12 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + tg->attrib[i].type = key->element[i].type; tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); tg->attrib[i].output_offset = key->element[i].output_offset; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index b62db8d8f33..c13e7427387 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -49,19 +49,29 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate, unsigned start, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, const unsigned *elts, unsigned count, - void *output_buffer ); + unsigned instance_id, + void *output_buffer); struct translate_buffer { const void *base_ptr; unsigned stride; - void *ptr; /* updated per vertex */ }; +struct translate_buffer_varient { + unsigned buffer_index; + unsigned instance_divisor; + void *ptr; /* updated either per vertex or per instance */ +}; + + +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + struct translate_sse { struct translate translate; @@ -81,6 +91,16 @@ struct translate_sse { struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; unsigned nr_buffers; + /* Multiple buffer varients can map to a single buffer. */ + struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; + unsigned nr_buffer_varients; + + /* Multiple elements can map to a single buffer varient. */ + unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; + + boolean use_instancing; + unsigned instance_id; + run_func gen_run; run_elts_func gen_run_elts; @@ -359,32 +379,61 @@ static boolean init_inputs( struct translate_sse *p, boolean linear ) { unsigned i; - if (linear) { - for (i = 0; i < p->nr_buffers; i++) { + struct x86_reg instance_id = x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; + struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; + + if (linear || varient->instance_divisor) { struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &buffer->stride)); struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].base_ptr)); + get_offset(p, &buffer->base_ptr)); struct x86_reg elt = p->idx_EBX; - struct x86_reg tmp = p->tmp_EAX; - + struct x86_reg tmp_EAX = p->tmp_EAX; /* Calculate pointer to first attrib: + * base_ptr + stride * index, where index depends on instance divisor */ - x86_mov(p->func, tmp, buf_stride); - x86_imul(p->func, tmp, elt); - x86_add(p->func, tmp, buf_base_ptr); + if (varient->instance_divisor) { + /* Our index is instance ID divided by instance divisor. + */ + x86_mov(p->func, tmp_EAX, instance_id); + + if (varient->instance_divisor != 1) { + struct x86_reg tmp_EDX = p->machine_EDX; + struct x86_reg tmp_ECX = p->outbuf_ECX; + + /* TODO: Add x86_shr() to rtasm and use it whenever + * instance divisor is power of two. + */ + + x86_push(p->func, tmp_EDX); + x86_push(p->func, tmp_ECX); + x86_xor(p->func, tmp_EDX, tmp_EDX); + x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); + x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ + x86_pop(p->func, tmp_ECX); + x86_pop(p->func, tmp_EDX); + } + } else { + x86_mov(p->func, tmp_EAX, elt); + } + x86_imul(p->func, tmp_EAX, buf_stride); + x86_add(p->func, tmp_EAX, buf_base_ptr); /* In the linear case, keep the buffer pointer instead of the * index number. */ - if (p->nr_buffers == 1) - x86_mov( p->func, elt, tmp ); + if (linear && p->nr_buffer_varients == 1) + x86_mov(p->func, elt, tmp_EAX); else - x86_mov( p->func, buf_ptr, tmp ); + x86_mov(p->func, buf_ptr, tmp_EAX); } } @@ -394,31 +443,36 @@ static boolean init_inputs( struct translate_sse *p, static struct x86_reg get_buffer_ptr( struct translate_sse *p, boolean linear, - unsigned buf_idx, + unsigned var_idx, struct x86_reg elt ) { - if (linear && p->nr_buffers == 1) { + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDX, + get_offset(p, &p->instance_id)); + } + if (linear && p->nr_buffer_varients == 1) { return p->idx_EBX; } - else if (linear) { + else if (linear || p->buffer_varient[var_idx].instance_divisor) { struct x86_reg ptr = p->tmp_EAX; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].ptr)); + get_offset(p, &p->buffer_varient[var_idx].ptr)); x86_mov(p->func, ptr, buf_ptr); return ptr; } else { struct x86_reg ptr = p->tmp_EAX; + const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[buf_idx].base_ptr)); + get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); @@ -436,28 +490,33 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p, static boolean incr_inputs( struct translate_sse *p, boolean linear ) { - if (linear && p->nr_buffers == 1) { + if (linear && p->nr_buffer_varients == 1) { struct x86_reg stride = x86_make_disp(p->machine_EDX, get_offset(p, &p->buffer[0].stride)); - x86_add(p->func, p->idx_EBX, stride); - sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + if (p->buffer_varient[0].instance_divisor == 0) { + x86_add(p->func, p->idx_EBX, stride); + sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + } } else if (linear) { unsigned i; /* Is this worthwhile?? */ - for (i = 0; i < p->nr_buffers; i++) { + for (i = 0; i < p->nr_buffer_varients; i++) { + struct translate_buffer_varient *varient = &p->buffer_varient[i]; struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].ptr)); + get_offset(p, &varient->ptr)); struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, - get_offset(p, &p->buffer[i].stride)); + get_offset(p, &p->buffer[varient->buffer_index].stride)); - x86_mov(p->func, p->tmp_EAX, buf_ptr); - x86_add(p->func, p->tmp_EAX, buf_stride); - if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); - x86_mov(p->func, buf_ptr, p->tmp_EAX); + if (varient->instance_divisor == 0) { + x86_mov(p->func, p->tmp_EAX, buf_ptr); + x86_add(p->func, p->tmp_EAX, buf_stride); + if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); + x86_mov(p->func, buf_ptr, p->tmp_EAX); + } } } else { @@ -514,7 +573,18 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); - x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4)); + x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); + + /* Load instance ID. + */ + if (p->use_instancing) { + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 4)); + x86_mov(p->func, + x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), + p->tmp_EAX); + } /* Get vertex count, compare to zero */ @@ -531,17 +601,18 @@ static boolean build_vertex_emit( struct translate_sse *p, label = x86_get_label(p->func); { struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); - int last_vb = -1; + int last_varient = -1; struct x86_reg vb; for (j = 0; j < p->translate.key.nr_elements; j++) { const struct translate_element *a = &p->translate.key.element[j]; + unsigned varient = p->element_to_buffer_varient[j]; /* Figure out source pointer address: */ - if (a->input_buffer != last_vb) { - last_vb = a->input_buffer; - vb = get_buffer_ptr(p, linear, a->input_buffer, elt); + if (varient != last_varient) { + last_varient = varient; + vb = get_buffer_ptr(p, linear, varient, elt); } if (!translate_attr( p, a, @@ -624,6 +695,7 @@ static void translate_sse_release( struct translate *translate ) static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, const unsigned *elts, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -631,12 +703,14 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, p->gen_run_elts( translate, elts, count, - output_buffer ); + instance_id, + output_buffer); } static void PIPE_CDECL translate_sse_run( struct translate *translate, unsigned start, unsigned count, + unsigned instance_id, void *output_buffer ) { struct translate_sse *p = (struct translate_sse *)translate; @@ -644,7 +718,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, p->gen_run( translate, start, count, - output_buffer ); + instance_id, + output_buffer); } @@ -666,8 +741,37 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; - for (i = 0; i < key->nr_elements; i++) - p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + for (i = 0; i < key->nr_elements; i++) { + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; + + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); + + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } + + /* + * Map vertex element to vertex buffer varient. + */ + for (j = 0; j < p->nr_buffer_varients; j++) { + if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && + p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { + break; + } + } + if (j == p->nr_buffer_varients) { + p->buffer_varient[j].buffer_index = key->element[i].input_buffer; + p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_varients++; + } + p->element_to_buffer_varient[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; + } + } if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3f74e2aa8b8..9725890bd4a 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -262,6 +262,10 @@ regions_overlap(int srcX0, int srcY0, * Copy pixel block from src surface to dst surface. * Overlapping regions are acceptable. * Flipping and stretching are supported. + * \param filter one of PIPE_TEX_MIPFILTER_NEAREST/LINEAR + * \param writemask controls which channels in the dest surface are sourced + * from the src surface. Disabled channels are sourced + * from (0,0,0,1). * XXX what about clipping??? * XXX need some control over blitting Z and/or stencil. */ diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 46b4706b768..249a0375fc5 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -379,9 +379,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, float t1 = y1 / (float)surf->height; float s2 = x2 / (float)surf->width; float t2 = y2 / (float)surf->height; - const float st[4][2] = { - {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2} - }; + float st[4][2]; + + st[0][0] = s1; + st[0][1] = t1; + st[1][0] = s2; + st[1][1] = t1; + st[2][0] = s2; + st[2][1] = t2; + st[3][0] = s1; + st[3][1] = t2; util_map_texcoords2d_onto_cubemap(surf->face, /* pointer, stride in floats */ diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 4110485fb19..e2e23c3cdd8 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -61,6 +61,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex attributes */ for (i = 0; i < num_attribs; i++) { velements[i].src_offset = i * 4 * sizeof(float); + velements[i].instance_divisor = 0; velements[i].vertex_buffer_index = 0; velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; velements[i].nr_components = 4; diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 9f16b42944e..01f7931aed1 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -62,10 +62,10 @@ PIPE_FORMAT_R16G16_SSCALED , array , 1, 1, s16 , s16 , , , xy01, PIPE_FORMAT_R16G16B16_SSCALED , array , 1, 1, s16 , s16 , s16 , , xyz1, rgb PIPE_FORMAT_R16G16B16A16_SSCALED , array , 1, 1, s16 , s16 , s16 , s16 , xyzw, rgb PIPE_FORMAT_R8_UNORM , array , 1, 1, un8 , , , , x001, rgb -PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , xy01, rgb -PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , xyz1, rgb -PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb -PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , xyz1, rgb +PIPE_FORMAT_R8G8_UNORM , array , 1, 1, un8 , un8 , , , yx01, rgb +PIPE_FORMAT_R8G8B8_UNORM , array , 1, 1, un8 , un8 , un8 , , zyx1, rgb +PIPE_FORMAT_R8G8B8A8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb +PIPE_FORMAT_R8G8B8X8_UNORM , array , 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb PIPE_FORMAT_R8_USCALED , array , 1, 1, u8 , , , , x001, rgb PIPE_FORMAT_R8G8_USCALED , array , 1, 1, u8 , u8 , , , xy01, rgb PIPE_FORMAT_R8G8B8_USCALED , array , 1, 1, u8 , u8 , u8 , , xyz1, rgb diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 43eb0153ee7..0ab53c75dd6 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -425,6 +425,8 @@ util_pack_z(enum pipe_format format, double z) if (z == 1.0) return 0xffffffff; return (uint) (z * 0xffffffff); + case PIPE_FORMAT_Z32_FLOAT: + return (uint)z; case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: if (z == 1.0) diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c new file mode 100644 index 00000000000..3f43a19e018 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.c @@ -0,0 +1,145 @@ + +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "util/u_ringbuffer.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +/* Generic ringbuffer: + */ +struct util_ringbuffer +{ + struct util_packet *buf; + unsigned mask; + + /* Can this be done with atomic variables?? + */ + unsigned head; + unsigned tail; + pipe_condvar change; + pipe_mutex mutex; +}; + + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ) +{ + struct util_ringbuffer *ring = CALLOC_STRUCT(util_ringbuffer); + if (ring == NULL) + return NULL; + + assert(util_is_power_of_two(dwords)); + + ring->buf = MALLOC( dwords * sizeof(unsigned) ); + if (ring->buf == NULL) + goto fail; + + ring->mask = dwords - 1; + + pipe_condvar_init(ring->change); + pipe_mutex_init(ring->mutex); + return ring; + +fail: + FREE(ring->buf); + FREE(ring); + return NULL; +} + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ) +{ + pipe_condvar_destroy(ring->change); + pipe_mutex_destroy(ring->mutex); + FREE(ring->buf); + FREE(ring); +} + +static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring ) +{ + return (ring->tail - (ring->head + 1)) & ring->mask; +} + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ) +{ + unsigned i; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* Wait for free space: + */ + while (util_ringbuffer_space(ring) < packet->dwords) + pipe_condvar_wait(ring->change, ring->mutex); + + /* Copy data to ring: + */ + for (i = 0; i < packet->dwords; i++) { + + /* Copy all dwords of the packet. Note we're abusing the + * typesystem a little - we're being passed a pointer to + * something, but probably not an array of packet structs: + */ + ring->buf[ring->head] = packet[i]; + ring->head++; + ring->head &= ring->mask; + } + + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); +} + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ) +{ + const struct util_packet *ring_packet; + unsigned i; + int ret = PIPE_OK; + + /* XXX: over-reliance on mutexes, etc: + */ + pipe_mutex_lock(ring->mutex); + + /* Wait for free space: + */ + if (wait) { + while (util_ringbuffer_space(ring) == 0) + pipe_condvar_wait(ring->change, ring->mutex); + } + else { + if (util_ringbuffer_space(ring) == 0) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + } + + ring_packet = &ring->buf[ring->tail]; + + /* Both of these are considered bugs. Raise an assert on debug builds. + */ + if (ring_packet->dwords > ring->mask + 1 - util_ringbuffer_space(ring) || + ring_packet->dwords > max_dwords) { + assert(0); + ret = PIPE_ERROR_BAD_INPUT; + goto out; + } + + /* Copy data from ring: + */ + for (i = 0; i < ring_packet->dwords; i++) { + packet[i] = ring->buf[ring->tail]; + ring->tail++; + ring->tail &= ring->mask; + } + +out: + /* Signal change: + */ + pipe_condvar_signal(ring->change); + pipe_mutex_unlock(ring->mutex); + return ret; +} diff --git a/src/gallium/auxiliary/util/u_ringbuffer.h b/src/gallium/auxiliary/util/u_ringbuffer.h new file mode 100644 index 00000000000..85f0ad6c1f6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_ringbuffer.h @@ -0,0 +1,29 @@ + +#ifndef UTIL_RINGBUFFER_H +#define UTIL_RINGBUFFER_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" /* only for pipe_error! */ + +/* Generic header + */ +struct util_packet { + unsigned dwords:8; + unsigned data24:24; +}; + +struct util_ringbuffer; + +struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ); + +void util_ringbuffer_destroy( struct util_ringbuffer *ring ); + +void util_ringbuffer_enqueue( struct util_ringbuffer *ring, + const struct util_packet *packet ); + +enum pipe_error util_ringbuffer_dequeue( struct util_ringbuffer *ring, + struct util_packet *packet, + unsigned max_dwords, + boolean wait ); + +#endif diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1ba82bb21f0..f9936eb1cb2 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -1357,7 +1357,10 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); + util_format_write_4f(format, + p, src_stride * sizeof(float), + packed, util_format_get_stride(format, w), + 0, 0, w, h); } pipe_put_tile_raw(pt, x, y, w, h, packed, 0); diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index fc2a1c59a6b..a524e2fdfb3 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -316,6 +316,7 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); c->vertex_elems[0].src_offset = 0; + c->vertex_elems[0].instance_divisor = 0; c->vertex_elems[0].vertex_buffer_index = 0; c->vertex_elems[0].nr_components = 2; c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; @@ -345,6 +346,7 @@ init_buffers(struct vl_compositor *c) pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); c->vertex_elems[1].src_offset = 0; + c->vertex_elems[1].instance_divisor = 0; c->vertex_elems[1].vertex_buffer_index = 1; c->vertex_elems[1].nr_components = 2; c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; @@ -353,7 +355,7 @@ init_buffers(struct vl_compositor *c) * Create our vertex shader's constant buffer * Const buffer contains scaling and translation vectors */ - c->vs_const_buf.buffer = pipe_buffer_create + c->vs_const_buf = pipe_buffer_create ( c->pipe->screen, 1, @@ -365,7 +367,7 @@ init_buffers(struct vl_compositor *c) * Create our fragment shader's constant buffer * Const buffer contains the color conversion matrix and bias vectors */ - c->fs_const_buf.buffer = pipe_buffer_create + c->fs_const_buf = pipe_buffer_create ( c->pipe->screen, 1, @@ -390,8 +392,8 @@ cleanup_buffers(struct vl_compositor *c) for (i = 0; i < 2; ++i) pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL); - pipe_buffer_reference(&c->vs_const_buf.buffer, NULL); - pipe_buffer_reference(&c->fs_const_buf.buffer, NULL); + pipe_buffer_reference(&c->vs_const_buf, NULL); + pipe_buffer_reference(&c->fs_const_buf, NULL); } bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe) @@ -483,13 +485,13 @@ void vl_compositor_render(struct vl_compositor *compositor, compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf); vs_consts = pipe_buffer_map ( compositor->pipe->screen, - compositor->vs_const_buf.buffer, + compositor->vs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); @@ -511,7 +513,7 @@ void vl_compositor_render(struct vl_compositor *compositor, vs_consts->src_trans.z = 0; vs_consts->src_trans.w = 0; - pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer); + pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf); compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence); @@ -525,10 +527,10 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float memcpy ( - pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), mat, sizeof(struct fragment_shader_consts) ); - pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf.buffer); + pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf); } diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index f441901a751..6a9a3fd7af1 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -47,7 +47,7 @@ struct vl_compositor struct pipe_scissor_state scissor; struct pipe_vertex_buffer vertex_bufs[2]; struct pipe_vertex_element vertex_elems[2]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; + struct pipe_buffer *vs_const_buf, *fs_const_buf; }; bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index caf581aca60..e43187545c5 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -891,53 +891,61 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) /* Position element */ r->vertex_elems[0].src_offset = 0; + r->vertex_elems[0].instance_divisor = 0; r->vertex_elems[0].vertex_buffer_index = 0; r->vertex_elems[0].nr_components = 2; r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Luma, texcoord element */ r->vertex_elems[1].src_offset = sizeof(struct vertex2f); + r->vertex_elems[1].instance_divisor = 0; r->vertex_elems[1].vertex_buffer_index = 0; r->vertex_elems[1].nr_components = 2; r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Chroma Cr texcoord element */ r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; + r->vertex_elems[2].instance_divisor = 0; r->vertex_elems[2].vertex_buffer_index = 0; r->vertex_elems[2].nr_components = 2; r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Chroma Cb texcoord element */ r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; + r->vertex_elems[3].instance_divisor = 0; r->vertex_elems[3].vertex_buffer_index = 0; r->vertex_elems[3].nr_components = 2; r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; /* First ref surface top field texcoord element */ r->vertex_elems[4].src_offset = 0; + r->vertex_elems[4].instance_divisor = 0; r->vertex_elems[4].vertex_buffer_index = 1; r->vertex_elems[4].nr_components = 2; r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; /* First ref surface bottom field texcoord element */ r->vertex_elems[5].src_offset = sizeof(struct vertex2f); + r->vertex_elems[5].instance_divisor = 0; r->vertex_elems[5].vertex_buffer_index = 1; r->vertex_elems[5].nr_components = 2; r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Second ref surface top field texcoord element */ r->vertex_elems[6].src_offset = 0; + r->vertex_elems[6].instance_divisor = 0; r->vertex_elems[6].vertex_buffer_index = 2; r->vertex_elems[6].nr_components = 2; r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Second ref surface bottom field texcoord element */ r->vertex_elems[7].src_offset = sizeof(struct vertex2f); + r->vertex_elems[7].instance_divisor = 0; r->vertex_elems[7].vertex_buffer_index = 2; r->vertex_elems[7].nr_components = 2; r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; - r->vs_const_buf.buffer = pipe_buffer_create + r->vs_const_buf = pipe_buffer_create ( r->pipe->screen, DEFAULT_BUF_ALIGNMENT, @@ -945,7 +953,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) sizeof(struct vertex_shader_consts) ); - r->fs_const_buf.buffer = pipe_buffer_create + r->fs_const_buf = pipe_buffer_create ( r->pipe->screen, DEFAULT_BUF_ALIGNMENT, @@ -954,11 +962,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) memcpy ( - pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(r->pipe->screen, r->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), &fs_consts, sizeof(struct fragment_shader_consts) ); - pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer); + pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf); return true; } @@ -970,8 +978,8 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r) assert(r); - pipe_buffer_reference(&r->vs_const_buf.buffer, NULL); - pipe_buffer_reference(&r->fs_const_buf.buffer, NULL); + pipe_buffer_reference(&r->vs_const_buf, NULL); + pipe_buffer_reference(&r->fs_const_buf, NULL); for (i = 0; i < 3; ++i) pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL); @@ -1284,19 +1292,19 @@ flush(struct vl_mpeg12_mc_renderer *r) vs_consts = pipe_buffer_map ( - r->pipe->screen, r->vs_const_buf.buffer, + r->pipe->screen, r->vs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); vs_consts->denorm.x = r->surface->width0; vs_consts->denorm.y = r->surface->height0; - pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); + pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf); r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, - &r->vs_const_buf); + r->vs_const_buf); r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, - &r->fs_const_buf); + r->fs_const_buf); if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index 64184337a06..f00b8c7b8b1 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -63,8 +63,8 @@ struct vl_mpeg12_mc_renderer struct pipe_viewport_state viewport; struct pipe_scissor_state scissor; - struct pipe_constant_buffer vs_const_buf; - struct pipe_constant_buffer fs_const_buf; + struct pipe_buffer *vs_const_buf; + struct pipe_buffer *fs_const_buf; struct pipe_framebuffer_state fb_state; struct pipe_vertex_element vertex_elems[8]; diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 21f5f9111a0..d394f5b4f1b 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -33,7 +33,11 @@ This state describes how resources in various flavours (textures, buffers, surfaces) are bound to the driver. -* ``set_constant_buffer`` +* ``set_constant_buffer`` sets a constant buffer to be used for a given shader + type. index is used to indicate which buffer to set (some apis may allow + multiple ones to be set, and binding a specific one later, though drivers + are mostly restricted to the first one right now). + * ``set_framebuffer_state`` * ``set_fragment_sampler_textures`` * ``set_vertex_sampler_textures`` @@ -47,7 +51,6 @@ These pieces of state are too small, variable, and/or trivial to have CSO objects. They all follow simple, one-method binding calls, e.g. ``set_edgeflags``. -* ``set_edgeflags`` * ``set_blend_color`` * ``set_clip_state`` * ``set_polygon_stipple`` @@ -72,12 +75,67 @@ stencil-only clears of packed depth-stencil buffers. Drawing ^^^^^^^ -``draw_arrays`` +``draw_arrays`` draws a specified primitive. + +This command is equivalent to calling ``draw_arrays_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. + +``draw_elements`` draws a specified primitive using an optional +index buffer. -``draw_elements`` +This command is equivalent to calling ``draw_elements_instanced`` +with ``startInstance`` set to 0 and ``instanceCount`` set to 1. ``draw_range_elements`` +XXX: this is (probably) a temporary entrypoint, as the range +information should be available from the vertex_buffer state. +Using this to quickly evaluate a specialized path in the draw +module. + +``draw_arrays_instanced`` draws multiple instances of the same primitive. + +This command is equivalent to calling ``draw_elements_instanced`` +with ``indexBuffer`` set to NULL and ``indexSize`` set to 0. + +``draw_elements_instanced`` draws multiple instances of the same primitive +using an optional index buffer. + +For instanceID in the range between ``startInstance`` +and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive +specified by ``mode`` and sequential numbers in the range between ``start`` +and ``start``+``count``-1, inclusive. + +If ``indexBuffer`` is not NULL, it specifies an index buffer with index +byte size of ``indexSize``. The sequential numbers are used to lookup +the index buffer and the resulting indices in turn are used to fetch +vertex attributes. + +If ``indexBuffer`` is NULL, the sequential numbers are used directly +as indices to fetch vertex attributes. + +If a given vertex element has ``instance_divisor`` set to 0, it is said +it contains per-vertex data and effective vertex attribute address needs +to be recalculated for every index. + + attribAddr = ``stride`` * index + ``src_offset`` + +If a given vertex element has ``instance_divisor`` set to non-zero, +it is said it contains per-instance data and effective vertex attribute +address needs to recalculated for every ``instance_divisor``-th instance. + + attribAddr = ``stride`` * instanceID / ``instance_divisor`` + ``src_offset`` + +In the above formulas, ``src_offset`` is taken from the given vertex element +and ``stride`` is taken from a vertex buffer associated with the given +vertex element. + +The calculated attribAddr is used as an offset into the vertex buffer to +fetch the attribute data. + +The value of ``instanceID`` can be read in a vertex shader through a system +value register declared with INSTANCEID semantic name. + Queries ^^^^^^^ diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst index 4d8e1708e7c..bfa4a1170a2 100644 --- a/src/gallium/docs/source/cso/rasterizer.rst +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -7,32 +7,69 @@ The rasterizer state controls the rendering of points, lines and triangles. Attributes include polygon culling state, line width, line stipple, multisample state, scissoring and flat/smooth shading. - Members ------- +bypass_vs_clip_and_viewport +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Whether the entire TCL pipeline should be bypassed. This implies that +vertices are pre-transformed for the viewport, and will not be run +through the vertex shader. + +.. note:: + + Implementations may still clip away vertices that are not in the viewport + when this is set. + flatshade - If set, the provoking vertex of each polygon is used to determine the - color of the entire polygon. If not set, fragment colors will be - interpolated between the vertex colors. - Note that this is separate from the fragment shader input attributes - CONSTANT, LINEAR and PERSPECTIVE. We need the flatshade state at +^^^^^^^^^ + +If set, the provoking vertex of each polygon is used to determine the color +of the entire polygon. If not set, fragment colors will be interpolated +between the vertex colors. + +The actual interpolated shading algorithm is obviously +implementation-dependent, but will usually be Gourard for most hardware. + +.. note:: + + This is separate from the fragment shader input attributes + CONSTANT, LINEAR and PERSPECTIVE. The flatshade state is needed at clipping time to determine how to set the color of new vertices. - Also note that the draw module can implement flat shading by copying - the provoking vertex color to all the other vertices in the primitive. + + :ref:`Draw` can implement flat shading by copying the provoking vertex + color to all the other vertices in the primitive. flatshade_first - Whether the first vertex should be the provoking vertex, for most - primitives. If not set, the last vertex is the provoking vertex. +^^^^^^^^^^^^^^^ + +Whether the first vertex should be the provoking vertex, for most primitives. +If not set, the last vertex is the provoking vertex. + +There are several important exceptions to the specification of this rule. + +* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first + vertex. If the caller wishes to change the provoking vertex, they merely + need to rotate the vertices themselves. +* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no + effect; the provoking vertex is always the last vertex. +* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the + second vertex, not the first. This permits each segment of the fan to have + a different color. + +Other Members +^^^^^^^^^^^^^ light_twoside - If set, there are per-vertex back-facing colors. The draw module + If set, there are per-vertex back-facing colors. :ref:`Draw` uses this state along with the front/back information to set the final vertex colors prior to rasterization. front_winding Indicates the window order of front-facing polygons, either PIPE_WINDING_CW or PIPE_WINDING_CCW + cull_mode Indicates which polygons to cull, either PIPE_WINDING_NONE (cull no polygons), PIPE_WINDING_CW (cull clockwise-winding polygons), @@ -68,7 +105,7 @@ line_stipple_enable line_stipple_pattern 16-bit bitfield of on/off flags, used to pattern the line stipple. line_stipple_factor - When drawinga stippled line, each bit in the stipple pattern is + When drawing a stippled line, each bit in the stipple pattern is repeated N times, where N = line_stipple_factor + 1. line_last_pixel Controls whether the last pixel in a line is drawn or not. OpenGL @@ -96,7 +133,7 @@ sprite_coord_mode lower left vertex will have coordinate (0,0,0,1). For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have coordinate (0,0,0,1). - This state is needed by the 'draw' module because that's where each + This state is needed by :ref:`Draw` because that's where each point vertex is converted into four quad vertices. There's no other place to emit the new vertex texture coordinates which are required for sprite rendering. @@ -108,45 +145,9 @@ scissor Whether the scissor test is enabled. multisample - Whether :ref:`MSAA` is enabled. - -bypass_vs_clip_and_viewport - Whether the entire TCL pipeline should be bypassed. This implies that - vertices are pre-transformed for the viewport, and will not be run - through the vertex shader. Note that implementations may still clip away - vertices that are not in the viewport. + Whether :term:`MSAA` is enabled. gl_rasterization_rules Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set, the rasterizer will use (0, 0) for pixel centers. - -Notes ------ - -flatshade -^^^^^^^^^ - -The actual interpolated shading algorithm is obviously -implementation-dependent, but will usually be Gourard for most hardware. - -bypass_vs_clip_and_viewport -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When set, this implies that vertices are pre-transformed for the viewport, and -will not be run through the vertex shader. Note that implementations may still -clip away vertices that are not visible. - -flatshade_first -^^^^^^^^^^^^^^^ - -There are several important exceptions to the specification of this rule. - -* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first - vertex. If the caller wishes to change the provoking vertex, they merely - need to rotate the vertices themselves. -* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no - effect; the provoking vertex is always the last vertex. -* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the - second vertex, not the first. This permits each segment of the fan to have - a different color. diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst index 33e846e33d2..0ef9fe2645c 100644 --- a/src/gallium/docs/source/distro.rst +++ b/src/gallium/docs/source/distro.rst @@ -61,10 +61,7 @@ VMWare SVGA ATI r300 ^^^^^^^^ -AMD/ATI r600 -^^^^^^^^^^^^ - -Highly experimental. +Testing-quality. Softpipe ^^^^^^^^ @@ -109,17 +106,31 @@ Auxiliary CSO Cache ^^^^^^^^^ +The CSO cache is used to accelerate preparation of state by saving +driver-specific state structures for later use. + +.. _draw: + Draw ^^^^ +Draw is a software :term:`TCL` pipeline for hardware that lacks vertex shaders +or other essential parts of pre-rasterization vertex preparation. + Gallivm ^^^^^^^ Indices ^^^^^^^ -Pipe Buffer Manager -^^^^^^^^^^^^^^^^^^^ +Indices provides tools for translating or generating element indices for +use with element-based rendering. + +Pipe Buffer Managers +^^^^^^^^^^^^^^^^^^^^ + +Each of these managers provides various services to drivers that are not +fully utilizing a memory manager. Remote Debugger ^^^^^^^^^^^^^^^ @@ -127,12 +138,12 @@ Remote Debugger Runtime Assembly Emission ^^^^^^^^^^^^^^^^^^^^^^^^^ -Surface Context Tracker -^^^^^^^^^^^^^^^^^^^^^^^ - TGSI ^^^^ +The TGSI auxiliary module provides basic utilities for manipulating TGSI +streams. + Translate ^^^^^^^^^ diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst index 6a9110ce786..aec89f8b5c4 100644 --- a/src/gallium/docs/source/glossary.rst +++ b/src/gallium/docs/source/glossary.rst @@ -8,3 +8,8 @@ Glossary Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes multiple samples of the depth buffer, and uses this information to smooth the edges of polygons. + + TCL + Transform, Clipping, & Lighting. The three stages of preparation in a + rasterizing pipeline prior to the actual rasterization of vertices into + fragments. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 9631e6967ef..72bb75a55dc 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -3,6 +3,28 @@ Screen A screen is an object representing the context-independent part of a device. +Useful Flags +------------ + +.. _pipe_texture_usage: + +PIPE_TEXTURE_USAGE +^^^^^^^^^^^^^^^^^^ + +These flags determine the possible roles a texture may be used for during its +lifetime. Texture usage flags are cumulative and may be combined to create a +texture that can be used as multiple things. + +* ``RENDER_TARGET``: A colorbuffer or pixelbuffer. +* ``DISPLAY_TARGET``: A sharable buffer that can be given to another process. +* ``PRIMARY``: A frontbuffer or scanout buffer. +* ``DEPTH_STENCIL``: A depthbuffer, stencilbuffer, or Z buffer. Gallium does + not explicitly provide for stencil-only buffers, so any stencilbuffer + validated here is implicitly also a depthbuffer. +* ``SAMPLER``: A texture that may be sampled from in a fragment or vertex + shader. +* ``DYNAMIC``: A texture that will be mapped frequently. + Methods ------- @@ -33,6 +55,14 @@ is_format_supported See if a format can be used in a specific manner. +**usage** is a bitmask of :ref:`PIPE_TEXTURE_USAGE` flags. + +Returns TRUE if all usages can be satisfied. + +.. note:: + + ``PIPE_TEXTURE_USAGE_DYNAMIC`` is not a valid usage. + texture_create ^^^^^^^^^^^^^^ diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index ebee4902b05..65a669d8cfa 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -516,8 +516,11 @@ SEQ - Set On Equal .. math:: dst.x = (src0.x == src1.x) ? 1 : 0 + dst.y = (src0.y == src1.y) ? 1 : 0 + dst.z = (src0.z == src1.z) ? 1 : 0 + dst.w = (src0.w == src1.w) ? 1 : 0 @@ -526,8 +529,11 @@ SFL - Set On False .. math:: dst.x = 0 + dst.y = 0 + dst.z = 0 + dst.w = 0 Considered for removal. @@ -537,8 +543,11 @@ SGT - Set On Greater Than .. math:: dst.x = (src0.x > src1.x) ? 1 : 0 + dst.y = (src0.y > src1.y) ? 1 : 0 + dst.z = (src0.z > src1.z) ? 1 : 0 + dst.w = (src0.w > src1.w) ? 1 : 0 @@ -560,8 +569,11 @@ SLE - Set On Less Equal Than .. math:: dst.x = (src0.x <= src1.x) ? 1 : 0 + dst.y = (src0.y <= src1.y) ? 1 : 0 + dst.z = (src0.z <= src1.z) ? 1 : 0 + dst.w = (src0.w <= src1.w) ? 1 : 0 @@ -570,8 +582,11 @@ SNE - Set On Not Equal .. math:: dst.x = (src0.x != src1.x) ? 1 : 0 + dst.y = (src0.y != src1.y) ? 1 : 0 + dst.z = (src0.z != src1.z) ? 1 : 0 + dst.w = (src0.w != src1.w) ? 1 : 0 @@ -580,8 +595,11 @@ STR - Set On True .. math:: dst.x = 1 + dst.y = 1 + dst.z = 1 + dst.w = 1 @@ -629,8 +647,11 @@ X2D - 2D Coordinate Transformation .. math:: dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w + dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w Considered for removal. @@ -979,13 +1000,13 @@ XOR - Bitwise Xor .. math:: - dst.x = src0.x ^ src1.x + dst.x = src0.x \oplus src1.x - dst.y = src0.y ^ src1.y + dst.y = src0.y \oplus src1.y - dst.z = src0.z ^ src1.z + dst.z = src0.z \oplus src1.z - dst.w = src0.w ^ src1.w + dst.w = src0.w \oplus src1.w SAD - Sum Of Absolute Differences diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba8..aa29dcb3947 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -358,6 +358,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +371,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 5c3188e7f9d..e402ed29220 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_fence fence; struct cell_buffer_node *head; }; @@ -115,7 +115,7 @@ struct cell_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_buffer *constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 3fa8b975d39..0a4da8ecc85 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -51,17 +51,17 @@ cell_map_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) { - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + if (sp->constants[i] && sp->constants[i]->size) { + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, sp->mapped_constants[i], - sp->constants[i].buffer->size); + sp->constants[i]->size); } } draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[PIPE_SHADER_VERTEX].buffer->size); + sp->constants[PIPE_SHADER_VERTEX]->size); } static void @@ -70,8 +70,8 @@ cell_unmap_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + if (sp->constants[i] && sp->constants[i]->size) + ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5b87286d4c5..f1e1dcb9eb0 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -240,12 +240,12 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { const uint shader = PIPE_SHADER_FRAGMENT; - const uint num_const = cell->constants[shader].buffer->size / sizeof(float); + const uint num_const = cell->constants[shader]->size / sizeof(float); uint i, j; float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); uint32_t *ibuf = (uint32_t *) buf; const float *constants = pipe_buffer_map(cell->pipe.screen, - cell->constants[shader].buffer, + cell->constants[shader], PIPE_BUFFER_USAGE_CPU_READ); ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; ibuf[4] = num_const; @@ -253,7 +253,7 @@ cell_emit_state(struct cell_context *cell) for (i = 0; i < num_const; i++) { buf[j++] = constants[i]; } - pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader]); } if (cell->dirty & (CELL_NEW_FRAMEBUFFER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 6568c784fec..1b09cf7f7d7 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -183,7 +183,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct cell_context *cell = cell_context(pipe); @@ -193,7 +193,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, draw_flush(cell->draw); /* note: reference counting */ - pipe_buffer_reference(&cell->constants[shader].buffer, buf->buffer); + pipe_buffer_reference(&cell->constants[shader], buf); if (shader == PIPE_SHADER_VERTEX) cell->dirty |= CELL_NEW_VS_CONSTANTS; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 12b855a3db2..55bd85bde2b 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -543,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d86d8e09a51..d2166a49016 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 86056799405..0ca92af248d 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d258..98919c43ffc 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51d..b18f4c22ef1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -107,10 +108,11 @@ struct spu_framebuffer uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; @@ -123,20 +125,22 @@ struct spu_texture_level vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -165,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +191,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073abf..14987e3c3a2 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a57..087963960df 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d397..3e9804bf8ee 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 149393712a3..191a44c3dfc 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -125,7 +125,7 @@ failover_context( struct pipe_context *pipe ) void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); #endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 3f5f5560323..d6ec4d13136 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -495,7 +495,7 @@ failover_set_vertex_elements(struct pipe_context *pipe, void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct failover_context *failover = failover_context(pipe); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 234b441ce6e..37cbd56036b 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -233,7 +233,8 @@ struct i915_context struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + /* XXX unneded */ + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 5f5b6f8e185..0fab6e1bc36 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -518,7 +518,7 @@ static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) static void i915_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct i915_context *i915 = i915_context(pipe); struct pipe_screen *screen = pipe->screen; @@ -538,13 +538,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, */ if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(screen, buf->buffer, + if (buf->size && + (mapped = pipe_buffer_map(screen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(i915->current.constants[shader], mapped, buf->buffer->size); - pipe_buffer_unmap(screen, buf->buffer); + memcpy(i915->current.constants[shader], mapped, buf->size); + pipe_buffer_unmap(screen, buf); i915->current.num_user_constants[shader] - = buf->buffer->size / (4 * sizeof(float)); + = buf->size / (4 * sizeof(float)); } else { i915->current.num_user_constants[shader] = 0; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index bb32d90e331..e389587f3e1 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -262,7 +262,7 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) static void brw_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct brw_context *brw = brw_context(pipe); @@ -270,13 +270,13 @@ static void brw_set_constant_buffer(struct pipe_context *pipe, if (shader == PIPE_SHADER_FRAGMENT) { pipe_buffer_reference( &brw->curr.fragment_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS; } else { pipe_buffer_reference( &brw->curr.vertex_constants, - buf->buffer ); + buf ); brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 9f5b4e63236..f9063d90fb1 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -404,17 +404,17 @@ static void identity_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *_buffer) + struct pipe_buffer *_buffer) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - struct pipe_constant_buffer unwrapped_buffer; - struct pipe_constant_buffer *buffer = NULL; + struct pipe_buffer *unwrapped_buffer; + struct pipe_buffer *buffer = NULL; - /* unwrap the input state */ + /* XXX hmm? unwrap the input state */ if (_buffer) { - unwrapped_buffer.buffer = identity_buffer_unwrap(_buffer->buffer); - buffer = &unwrapped_buffer; + unwrapped_buffer = identity_buffer_unwrap(_buffer); + buffer = unwrapped_buffer; } pipe->set_constant_buffer(pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 1cc3c9227cc..aaa675aec77 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -124,8 +124,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } for (i = 0; i < Elements(llvmpipe->constants); i++) { - if (llvmpipe->constants[i].buffer) { - pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); + if (llvmpipe->constants[i]) { + pipe_buffer_reference(&llvmpipe->constants[i], NULL); } } diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 6411797cf5d..426d6eb4a12 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -64,7 +64,7 @@ struct llvmpipe_context { /** Other rendering state */ struct pipe_blend_color blend_color[4][16]; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index 7eb05de77a1..c3a48700a4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -31,6 +31,7 @@ #ifndef LP_QUAD_H #define LP_QUAD_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "tgsi/tgsi_exec.h" @@ -83,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; }; @@ -92,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd3..0b2d3a28014 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -117,7 +117,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ -ALIGN_STACK +PIPE_ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7020da145f3..e16793186be 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -154,7 +154,7 @@ void llvmpipe_set_clip_state( struct pipe_context *, void llvmpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); void *llvmpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b73ca2d41ed..9f4bbef73fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -714,12 +714,11 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *constants) + struct pipe_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct pipe_buffer *buffer = constants ? constants->buffer : NULL; - unsigned size = buffer ? buffer->size : 0; - const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; + unsigned size = constants ? constants->size : 0; + const void *data = constants ? llvmpipe_buffer(constants)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); @@ -727,7 +726,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, draw_flush(llvmpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); + pipe_buffer_reference(&llvmpipe->constants[shader], constants); if(shader == PIPE_SHADER_FRAGMENT) { llvmpipe->jit_context.constants = data; diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981a..6c29e8d8ace 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index faddfb96779..c1abee424c9 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 23ea9ebbe7d..2b258f1052e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index e3dc4c5bf44..b67f1e16b1c 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -332,7 +332,7 @@ nv04_set_clip_state(struct pipe_context *pipe, static void nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv04_context *nv04 = nv04_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -342,13 +342,13 @@ nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf && buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv04->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv04->constbuf[shader], mapped, buf->size); nv04->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index ffc6be3c401..ad7def53b12 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -458,7 +458,7 @@ nv10_set_clip_state(struct pipe_context *pipe, static void nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv10_context *nv10 = nv10_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -468,13 +468,13 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv10->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv10->constbuf[shader], mapped, buf->size); nv10->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index 3a82e63423d..45697a60efd 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -451,7 +451,7 @@ nv20_set_clip_state(struct pipe_context *pipe, static void nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv20_context *nv20 = nv20_context(pipe); struct pipe_screen *pscreen = pipe->screen; @@ -461,13 +461,13 @@ nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(pscreen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + if (buf->size && + (mapped = pipe_buffer_map(pscreen, buf, PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(nv20->constbuf[shader], mapped, buf->buffer->size); + memcpy(nv20->constbuf[shader], mapped, buf->size); nv20->constbuf_nr[shader] = - buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pscreen, buf->buffer); + buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pscreen, buf); } } } diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 98935678911..0cc3172dcd5 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -43,7 +43,6 @@ static struct nv30_texture_format * nv30_fragtex_format(uint pipe_format) { struct nv30_texture_format *tf = nv30_texture_formats; - char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -65,7 +64,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); struct nv30_texture_format *tf; struct nouveau_stateobj *so; - uint32_t txf, txs , txp; + uint32_t txf, txs; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; tf = nv30_fragtex_format(pt->format); @@ -97,13 +96,6 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) return NULL; } - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - txp = 0; - } else { - txp = nv30mt->level[0].pitch; - txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */; - } - txs = tf->swizzle; so = so_new(1, 8, 2); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index a80dfb04880..065c927a10d 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -590,12 +590,12 @@ nv30_set_clip_state(struct pipe_context *pipe, static void nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv30_context *nv30 = nv30_context(pipe); - nv30->constbuf[shader] = buf->buffer; - nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv30->constbuf[shader] = buf; + nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv30->dirty |= NV30_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index ed0ca9e02c3..7d990f7d567 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -605,12 +605,12 @@ nv40_set_clip_state(struct pipe_context *pipe, static void nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->constbuf[shader] = buf->buffer; - nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + nv40->constbuf[shader] = buf; + nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { nv40->dirty |= NV40_NEW_VERTPROG; diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 5997456e4c9..1e697463221 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -67,8 +67,12 @@ nv50_destroy(struct pipe_context *pipe) so_ref(NULL, &nv50->state.vertprog); if (nv50->state.fragprog) so_ref(NULL, &nv50->state.fragprog); - if (nv50->state.programs) - so_ref(NULL, &nv50->state.programs); + if (nv50->state.geomprog) + so_ref(NULL, &nv50->state.geomprog); + if (nv50->state.fp_linkage) + so_ref(NULL, &nv50->state.fp_linkage); + if (nv50->state.gp_linkage) + so_ref(NULL, &nv50->state.gp_linkage); if (nv50->state.vtxfmt) so_ref(NULL, &nv50->state.vtxfmt); if (nv50->state.vtxbuf) @@ -100,7 +104,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.destroy = nv50_destroy; nv50->pipe.draw_arrays = nv50_draw_arrays; + nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; nv50->pipe.draw_elements = nv50_draw_elements; + nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; nv50->pipe.clear = nv50_clear; nv50->pipe.flush = nv50_flush; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cbd4c3ff86d..bebcd95054f 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -29,9 +29,7 @@ #define NV50_CB_PVP 1 #define NV50_CB_PFP 2 #define NV50_CB_PGP 3 -#define NV50_CB_TIC 4 -#define NV50_CB_TSC 5 -#define NV50_CB_PUPLOAD 6 +#define NV50_CB_AUX 4 #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) @@ -45,9 +43,11 @@ #define NV50_NEW_VERTPROG_CB (1 << 9) #define NV50_NEW_FRAGPROG (1 << 10) #define NV50_NEW_FRAGPROG_CB (1 << 11) -#define NV50_NEW_ARRAYS (1 << 12) -#define NV50_NEW_SAMPLER (1 << 13) -#define NV50_NEW_TEXTURE (1 << 14) +#define NV50_NEW_GEOMPROG (1 << 12) +#define NV50_NEW_GEOMPROG_CB (1 << 13) +#define NV50_NEW_ARRAYS (1 << 14) +#define NV50_NEW_SAMPLER (1 << 15) +#define NV50_NEW_TEXTURE (1 << 16) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -129,10 +129,13 @@ struct nv50_state { unsigned miptree_nr[PIPE_SHADER_TYPES]; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; - struct nouveau_stateobj *programs; + struct nouveau_stateobj *geomprog; + struct nouveau_stateobj *fp_linkage; + struct nouveau_stateobj *gp_linkage; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; + struct nouveau_stateobj *instbuf; unsigned vtxelt_nr; }; @@ -157,6 +160,7 @@ struct nv50_context { struct pipe_framebuffer_state framebuffer; struct nv50_program *vertprog; struct nv50_program *fragprog; + struct nv50_program *geomprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; @@ -193,11 +197,22 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); +extern void nv50_draw_arrays_instanced(struct pipe_context *, unsigned mode, + unsigned start, unsigned count, + unsigned startInstance, + unsigned instanceCount); extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +extern void nv50_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); extern void nv50_vbo_validate(struct nv50_context *nv50); /* nv50_clear.c */ @@ -207,7 +222,9 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); -extern void nv50_linkage_validate(struct nv50_context *nv50); +extern void nv50_geomprog_validate(struct nv50_context *nv50); +extern void nv50_fp_linkage_validate(struct nv50_context *nv50); +extern void nv50_gp_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a139..dc8364ced7e 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -92,12 +92,23 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) case PIPE_FORMAT_Z24S8_UNORM: tile_flags = 0x1800; break; + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c00; + break; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: tile_flags = 0x2800; break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + tile_flags = 0x7400; + break; default: - tile_flags = 0x7000; + if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) && + util_format_get_blocksizebits(pt->format) == 32) + tile_flags = 0x7a00; + else + tile_flags = 0x7000; break; } @@ -145,7 +156,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +199,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index e16fa479e5f..20db51070ff 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -92,6 +92,11 @@ struct nv50_reg { int rhw; /* result hw for FP outputs, or interpolant index */ int acc; /* instruction where this reg is last read (first insn == 1) */ + + int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */ + int indirect[2]; /* index into pc->addr, or -1 */ + + ubyte buf_index; /* c{0 .. 15}[] or g{0 .. 15}[] */ }; #define NV50_MOD_NEG 1 @@ -135,7 +140,8 @@ struct nv50_pc { int immd_nr; struct nv50_reg **addr; int addr_nr; - uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ + struct nv50_reg *sysval; + int sysval_nr; struct nv50_reg *temp_temp[16]; struct nv50_program_exec *temp_temp_exec[16]; @@ -171,6 +177,8 @@ struct nv50_pc { uint8_t edgeflag_out; }; +static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *); + static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) { @@ -179,7 +187,10 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) reg->hw = hw; reg->mod = 0; reg->rhw = -1; + reg->vtx = -1; reg->acc = 0; + reg->indirect[0] = reg->indirect[1] = -1; + reg->buf_index = (type == P_CONST) ? 1 : 0; } static INLINE unsigned @@ -197,7 +208,8 @@ terminate_mbb(struct nv50_pc *pc) /* remove records of temporary address register values */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - pc->r_addr[i].rhw = -1; + if (pc->r_addr[i].index < 0) + pc->r_addr[i].acc = 0; } static void @@ -260,6 +272,7 @@ reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) if (reg) { alloc_reg(pc, reg); *ri = *reg; + reg->indirect[0] = reg->indirect[1] = -1; reg->mod = 0; } return ri; @@ -464,6 +477,12 @@ is_join(struct nv50_program_exec *e) return FALSE; } +static INLINE boolean +is_control_flow(struct nv50_program_exec *e) +{ + return (e->inst[0] & 2); +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) @@ -525,11 +544,33 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) static INLINE void set_addr(struct nv50_program_exec *e, struct nv50_reg *a) { + assert(a->type == P_ADDR); + assert(!(e->inst[0] & 0x0c000000)); assert(!(e->inst[1] & 0x00000004)); e->inst[0] |= (a->hw & 3) << 26; - e->inst[1] |= (a->hw >> 2) << 2; + e->inst[1] |= a->hw & 4; +} + +static void +emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t); + +static void +emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int); + +static void +emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[1] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + set_addr(e, src); + + emit(pc, e); } static void @@ -548,72 +589,6 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } -static struct nv50_reg * -alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) -{ - struct nv50_reg *a_tgsi = NULL, *a = NULL; - int i; - uint8_t avail = ~pc->addr_alloc; - - if (!ref) { - /* allocate for TGSI_FILE_ADDRESS */ - while (avail) { - i = ffs(avail) - 1; - - if (pc->r_addr[i].rhw < 0 || - pc->r_addr[i].acc != pc->insn_cur) { - pc->addr_alloc |= (1 << i); - - pc->r_addr[i].rhw = -1; - pc->r_addr[i].index = i; - return &pc->r_addr[i]; - } - avail &= ~(1 << i); - } - assert(0); - return NULL; - } - - /* Allocate and set an address reg so we can access 'ref'. - * - * If and r_addr->index will be -1 or the hw index the value - * value in rhw is relative to. If rhw < 0, the reg has not - * been initialized or is in use for TGSI_FILE_ADDRESS. - */ - while (avail) { /* only consider regs that are not TGSI */ - i = ffs(avail) - 1; - avail &= ~(1 << i); - - if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) { - /* prefer an usused reg with low hw index */ - a = &pc->r_addr[i]; - continue; - } - if (!a && pc->r_addr[i].acc != pc->insn_cur) - a = &pc->r_addr[i]; - - if (ref->hw - pc->r_addr[i].rhw >= 128) - continue; - - if ((ref->acc >= 0 && pc->r_addr[i].index < 0) || - (ref->acc < 0 && pc->r_addr[i].index == ref->index)) { - pc->r_addr[i].acc = pc->insn_cur; - return &pc->r_addr[i]; - } - } - assert(a); - - if (ref->acc < 0) - a_tgsi = pc->addr[ref->index]; - - emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); - - a->rhw = ref->hw & ~0x7f; - a->acc = pc->insn_cur; - a->index = a_tgsi ? ref->index : -1; - return a; -} - #define INTERP_LINEAR 0 #define INTERP_FLAT 1 #define INTERP_PERSPECTIVE 2 @@ -657,15 +632,15 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->param.shift = s; e->param.mask = m << (s % 32); - if (src->hw > 127) - set_addr(e, alloc_addr(pc, src)); + if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */ + set_addr(e, get_address_reg(pc, src)); else if (src->acc < 0) { assert(src->type == P_CONST); - set_addr(e, pc->addr[src->index]); + set_addr(e, pc->addr[src->indirect[0]]); } - e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); + e->inst[1] |= (src->buf_index << 22); } /* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ @@ -694,6 +669,12 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + /* indirect (vertex base + c) load from p[] */ + e->inst[0] |= 0x01800000; + set_addr(e, get_address_reg(pc, src)); + } } alloc_reg(pc, src); @@ -808,6 +789,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + e->inst[0] |= 0x01800000; /* src from p[] */ + set_addr(e, get_address_reg(pc, src)); + } } else if (src->type == P_CONST || src->type == P_IMMD) { struct nv50_reg *temp = temp_temp(pc, e); @@ -832,13 +818,13 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x00800000)); - if (e->inst[0] & 0x01000000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x00800000)); set_data(pc, src, 0x7f, 16, e); e->inst[0] |= 0x00800000; } @@ -862,13 +848,13 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x01000000)); - if (e->inst[0] & 0x00800000) { + if (e->inst[0] & 0x01800000) { struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x01000000)); set_data(pc, src, 0x7f, 32+14, e); e->inst[0] |= 0x01000000; } @@ -997,11 +983,125 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= dst->hw << 2; e->inst[0] |= s << 16; /* shift left */ - set_src_0_restricted(pc, src, e); + set_src_0(pc, src, e); emit(pc, e); } +static boolean +address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r) +{ + if (!r) + return FALSE; + + if (r->vtx != a->vtx) + return FALSE; + if (r->vtx >= 0) + return (r->indirect[1] == a->indirect[1]); + + if (r->hw < a->rhw || (r->hw - a->rhw) >= 128) + return FALSE; + + if (a->index >= 0) + return (a->index == r->indirect[0]); + return (a->indirect[0] == r->indirect[0]); +} + +static void +load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *a, int shift) +{ + struct nv50_reg mem, *temp; + + ctor_reg(&mem, P_ATTR, -1, dst->vtx); + + assert(dst->type == P_ADDR); + if (!a) { + emit_arl(pc, dst, &mem, 0); + return; + } + temp = alloc_temp(pc, NULL); + + if (shift) { + emit_mov_from_addr(pc, temp, a); + if (shift < 0) + emit_shl_imm(pc, temp, temp, shift); + emit_arl(pc, dst, temp, MAX2(shift, 0)); + } + emit_mov(pc, temp, &mem); + set_addr(pc->p->exec_tail, dst); + + emit_arl(pc, dst, temp, 0); + free_temp(pc, temp); +} + +/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS + * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX + * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX + * case (vtx < 0, acc >= 0): memory address too high to encode + * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS + */ +static struct nv50_reg * +get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref) +{ + int i; + struct nv50_reg *a_ref, *a = NULL; + + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { + if (pc->r_addr[i].acc == 0) + a = &pc->r_addr[i]; /* an unused address reg */ + else + if (address_reg_suitable(&pc->r_addr[i], ref)) { + pc->r_addr[i].acc = pc->insn_cur; + return &pc->r_addr[i]; + } else + if (!a && pc->r_addr[i].index < 0 && + pc->r_addr[i].acc < pc->insn_cur) + a = &pc->r_addr[i]; + } + if (!a) { + /* We'll be able to spill address regs when this + * mess is replaced with a proper compiler ... + */ + NOUVEAU_ERR("out of address regs\n"); + abort(); + return NULL; + } + + /* initialize and reserve for this TGSI instruction */ + a->rhw = 0; + a->index = a->indirect[0] = a->indirect[1] = -1; + a->acc = pc->insn_cur; + + if (!ref) { + a->vtx = -1; + return a; + } + a->vtx = ref->vtx; + + /* now put in the correct value ... */ + + if (ref->vtx >= 0) { + a->indirect[1] = ref->indirect[1]; + + /* For an indirect vertex index, we need to shift address right + * by 2, the address register will contain vtx * 16, we need to + * load from a[vtx * 4]. + */ + load_vertex_base(pc, a, (ref->acc < 0) ? + pc->addr[ref->indirect[1]] : NULL, -2); + } else { + assert(ref->acc < 0 || ref->indirect[0] < 0); + + a->rhw = ref->hw & ~0x7f; + a->indirect[0] = ref->indirect[0]; + a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL; + + emit_add_addr_imm(pc, a, a_ref, a->rhw * 4); + } + return a; +} + #define NV50_MAX_F32 0x880 #define NV50_MAX_S32 0x08c #define NV50_MAX_U32 0x084 @@ -1629,6 +1729,18 @@ emit_ret(struct nv50_pc *pc, int pred, unsigned cc) emit_control_flow(pc, 0x3, pred, cc); } +static void +emit_prim_cmd(struct nv50_pc *pc, unsigned cmd) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000 | (cmd << 9); + e->inst[1] = 0xc0000000; + set_long(pc, e); + + emit(pc, e); +} + #define QOP_ADD 0 #define QOP_SUBR 1 #define QOP_SUB 2 @@ -2171,14 +2283,19 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { - r = alloc_addr(pc, NULL); - pc->addr[dst->Register.Index * 4 + c] = r; + r = get_address_reg(pc, NULL); + r->index = dst->Register.Index * 4 + c; + pc->addr[r->index] = r; } assert(r); return r; } case TGSI_FILE_NULL: return NULL; + case TGSI_FILE_SYSTEM_VALUE: + assert(pc->sysval[dst->Register.Index].type == P_RESULT); + assert(c == 0); + return &pc->sysval[dst->Register.Index]; default: break; } @@ -2208,6 +2325,18 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, switch (src->Register.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->Register.Index * 4 + c]; + + if (!src->Dimension.Dimension) + break; + r = reg_instance(pc, r); + r->vtx = src->Dimension.Index; + + if (!src->Dimension.Indirect) + break; + swz = tgsi_util_get_src_register_swizzle( + &src->DimIndirect, 0); + r->acc = -1; + r->indirect[1] = src->DimIndirect.Index * 4 + swz; break; case TGSI_FILE_TEMPORARY: r = &pc->temp[src->Register.Index * 4 + c]; @@ -2221,12 +2350,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, * use the index field to select the address reg. */ r = reg_instance(pc, NULL); + ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c); + swz = tgsi_util_get_src_register_swizzle( - &src->Indirect, 0); - ctor_reg(r, P_CONST, - src->Indirect.Index * 4 + swz, - src->Register.Index * 4 + c); + &src->Indirect, 0); r->acc = -1; + r->indirect[0] = src->Indirect.Index * 4 + swz; break; case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->Register.Index * 4 + c]; @@ -2237,6 +2366,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; + case TGSI_FILE_SYSTEM_VALUE: + assert(c == 0); + r = &pc->sysval[src->Register.Index]; + break; default: assert(0); break; @@ -2273,7 +2406,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r->mod |= mod & NV50_MOD_I32; assert(r); - if (r->acc >= 0 && r != temp) + if (r->acc >= 0 && r->vtx < 0 && r != temp) return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2495,10 +2628,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_ARL: - assert(src[0][0]); temp = temp_temp(pc, NULL); - emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); - emit_arl(pc, dst[0], temp, 4); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, temp, src[0][c], -1, + CVT_FLOOR | CVT_S32_F32); + emit_arl(pc, dst[c], temp, 4); + } break; case TGSI_OPCODE_BGNLOOP: pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc); @@ -2605,6 +2742,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); break; + case TGSI_OPCODE_EMIT: + emit_prim_cmd(pc, 1); + break; case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; @@ -2628,8 +2768,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDPRIM: + emit_prim_cmd(pc, 2); + break; case TGSI_OPCODE_ENDSUB: assert(pc->in_subroutine); + terminate_mbb(pc); pc->in_subroutine = FALSE; break; case TGSI_OPCODE_EX2: @@ -3028,10 +3172,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!is_long(pc->p->exec_tail)) convert_to_long(pc, pc->p->exec_tail); else - if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + if (is_immd(pc->p->exec_tail) || + is_join(pc->p->exec_tail) || + is_control_flow(pc->p->exec_tail)) emit_nop(pc); pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ + + terminate_mbb(pc); break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -3327,17 +3475,53 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) * value of 0 for back-facing, and 0xffffffff for front-facing. */ static void -load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) +load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv) +{ + struct nv50_reg *temp = alloc_temp(pc, NULL); + int r_pred = 0; + + temp->rhw = 255; + emit_interp(pc, temp, NULL, INTERP_FLAT); + + emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32); + + emit_not(pc, temp, temp); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + emit_cvt(pc, sv, temp, -1, CVT_F32_S32); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + + free_temp(pc, temp); +} + +static void +load_instance_id(struct nv50_pc *pc, unsigned index) { - struct nv50_reg *one = alloc_immd(pc, 1.0f); + struct nv50_reg reg, mem; - assert(a->rhw == -1); - alloc_reg(pc, a); /* do this before rhw is set */ - a->rhw = 255; - load_interpolant(pc, a); - emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND); + ctor_reg(®, P_TEMP, -1, -1); + ctor_reg(&mem, P_CONST, -1, 24); /* startInstance */ + mem.buf_index = 2; - FREE(one); + emit_add_b32(pc, ®, &pc->sysval[index], &mem); + pc->sysval[index] = reg; +} + +static void +copy_semantic_info(struct nv50_program *p) +{ + unsigned i, id; + + for (i = 0; i < p->cfg.in_nr; ++i) { + id = p->cfg.in[i].id; + p->cfg.in[i].sn = p->info.input_semantic_name[id]; + p->cfg.in[i].si = p->info.input_semantic_index[id]; + } + + for (i = 0; i < p->cfg.out_nr; ++i) { + id = p->cfg.out[i].id; + p->cfg.out[i].sn = p->info.output_semantic_name[id]; + p->cfg.out[i].si = p->info.output_semantic_index[id]; + } } static boolean @@ -3346,7 +3530,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct tgsi_parse_context tp; struct nv50_program *p = pc->p; boolean ret = FALSE; - unsigned i, c, flat_nr = 0; + unsigned i, c, instance_id, vertex_id, flat_nr = 0; tgsi_parse_init(&tp, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&tp)) { @@ -3386,13 +3570,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_PSIZE: p->cfg.psiz = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_EDGEFLAG: pc->edgeflag_out = first; @@ -3432,6 +3616,37 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->interp_mode[i] = mode; } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(d->Declaration.Semantic); + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_FACE: + assert(p->type == PIPE_SHADER_FRAGMENT); + load_frontfacing(pc, + &pc->sysval[first]); + break; + case TGSI_SEMANTIC_INSTANCEID: + assert(p->type == PIPE_SHADER_VERTEX); + instance_id = first; + p->cfg.regs[0] |= (1 << 4); + break; + case TGSI_SEMANTIC_PRIMID: + assert(p->type != PIPE_SHADER_VERTEX); + p->cfg.prim_id = first; + break; + /* + case TGSI_SEMANTIC_PRIMIDIN: + assert(p->type == PIPE_SHADER_GEOMETRY); + pc->sysval[first].hw = 6; + p->cfg.regs[0] |= (1 << 8); + break; + case TGSI_SEMANTIC_VERTEXID: + assert(p->type == PIPE_SHADER_VERTEX); + vertex_id = first; + p->cfg.regs[0] |= (1 << 12) | (1 << 0); + break; + */ + } + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_SAMPLER: @@ -3452,68 +3667,98 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - if (p->type == PIPE_SHADER_VERTEX) { + if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) { int rid = 0; - for (i = 0; i < pc->attr_nr * 4; ++i) { - if (pc->attr[i].acc) { - pc->attr[i].hw = rid++; - p->cfg.attr[i / 32] |= 1 << (i % 32); + if (p->type == PIPE_SHADER_GEOMETRY) { + for (i = 0; i < pc->attr_nr; ++i) { + p->cfg.in[i].hw = rid; + p->cfg.in[i].id = i; + + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->attr[n].acc) + continue; + pc->attr[n].hw = rid++; + p->cfg.in[i].mask |= 1 << c; + } + } + } else { + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); + } + } + if (p->cfg.regs[0] & (1 << 0)) + pc->sysval[vertex_id].hw = rid++; + if (p->cfg.regs[0] & (1 << 4)) { + pc->sysval[instance_id].hw = rid++; + load_instance_id(pc, instance_id); } } for (i = 0, rid = 0; i < pc->result_nr; ++i) { - p->cfg.io[i].hw = rid; - p->cfg.io[i].id = i; + p->cfg.out[i].hw = rid; + p->cfg.out[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; if (!pc->result[n].acc) continue; pc->result[n].hw = rid++; - p->cfg.io[i].mask |= 1 << c; + p->cfg.out[i].mask |= 1 << c; } } + if (p->cfg.prim_id < 0x40) { + /* GP has to write to PrimitiveID */ + ctor_reg(&pc->sysval[p->cfg.prim_id], + P_RESULT, p->cfg.prim_id, rid); + p->cfg.prim_id = rid++; + } for (c = 0; c < 2; ++c) if (p->cfg.two_side[c].hw < 0x40) - p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c] = p->cfg.out[ p->cfg.two_side[c].hw]; if (p->cfg.psiz < 0x40) - p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw; + + copy_semantic_info(p); } else if (p->type == PIPE_SHADER_FRAGMENT) { - int rid, aid; + int rid, aid, base; unsigned n = 0, m = pc->attr_nr - flat_nr; pc->allow32 = TRUE; - int base = (TGSI_SEMANTIC_POSITION == - p->info.input_semantic_name[0]) ? 0 : 1; + base = (TGSI_SEMANTIC_POSITION == + p->info.input_semantic_name[0]) ? 0 : 1; /* non-flat interpolants have to be mapped to * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { if (pc->interp_mode[i] == INTERP_FLAT) - p->cfg.io[m++].id = i; + p->cfg.in[m++].id = i; else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) - p->cfg.io[n].linear = TRUE; - p->cfg.io[n++].id = i; + p->cfg.in[n].linear = TRUE; + p->cfg.in[n++].id = i; } } + copy_semantic_info(p); if (!base) /* set w-coordinate mask from perspective interp */ - p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; + p->cfg.in[0].mask |= p->cfg.regs[1] >> 24; aid = popcnt4( /* if fcrd isn't contained in cfg.io */ - base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); + base ? (p->cfg.regs[1] >> 24) : p->cfg.in[0].mask); for (n = 0; n < pc->attr_nr; ++n) { - p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id; + p->cfg.in[n].hw = rid = aid; + i = p->cfg.in[n].id; if (p->info.input_semantic_name[n] == TGSI_SEMANTIC_FACE) { @@ -3525,15 +3770,15 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!pc->attr[i * 4 + c].acc) continue; pc->attr[i * 4 + c].rhw = rid++; - p->cfg.io[n].mask |= 1 << c; + p->cfg.in[n].mask |= 1 << c; load_interpolant(pc, &pc->attr[i * 4 + c]); } - aid += popcnt4(p->cfg.io[n].mask); + aid += popcnt4(p->cfg.in[n].mask); } if (!base) - p->cfg.regs[1] |= p->cfg.io[0].mask << 24; + p->cfg.regs[1] |= p->cfg.in[0].mask << 24; m = popcnt4(p->cfg.regs[1] >> 24); @@ -3543,32 +3788,33 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.regs[1] |= aid - m; if (flat_nr) { - i = p->cfg.io[pc->attr_nr - flat_nr].hw; + i = p->cfg.in[pc->attr_nr - flat_nr].hw; p->cfg.regs[1] |= (i - m) << 16; } else p->cfg.regs[1] |= p->cfg.regs[1] << 16; /* mark color semantic for light-twoside */ - n = 0x40; - for (i = 0; i < pc->attr_nr; i++) { - ubyte si, sn; - - sn = p->info.input_semantic_name[p->cfg.io[i].id]; - si = p->info.input_semantic_index[p->cfg.io[i].id]; - - if (sn == TGSI_SEMANTIC_COLOR) { - p->cfg.two_side[si] = p->cfg.io[i]; - - /* increase colour count */ - p->cfg.regs[0] += popcnt4( - p->cfg.two_side[si].mask) << 16; - - n = MIN2(n, p->cfg.io[i].hw - m); + n = 0x80; + for (i = 0; i < p->cfg.in_nr; i++) { + if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) { + n = MIN2(n, p->cfg.in[i].hw - m); + p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i]; + + p->cfg.regs[0] += /* increase colour count */ + popcnt4(p->cfg.in[i].mask) << 16; } } - if (n < 0x40) + if (n < 0x80) p->cfg.regs[0] += n; + if (p->cfg.prim_id < 0x40) { + pc->sysval[p->cfg.prim_id].rhw = rid++; + emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL, + INTERP_FLAT); + /* increase FP_INTERPOLANT_CTRL_COUNT */ + p->cfg.regs[1] += 1; + } + /* Initialize FP results: * FragDepth is always first TGSI and last hw output */ @@ -3622,10 +3868,31 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->sysval) + FREE(pc->sysval); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } +static INLINE uint32_t +nv50_map_gs_output_prim(unsigned pprim) +{ + switch (pprim) { + case PIPE_PRIM_POINTS: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; + case PIPE_PRIM_LINE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; + default: + NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim); + abort(); + return 0; + } +} + static boolean ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) { @@ -3639,25 +3906,55 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; assert(pc->addr_nr <= 2); + pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; p->cfg.high_temp = 4; p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.prim_id = 0x40; p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + for (i = 0; i < p->info.num_properties; ++i) { + unsigned *data = &p->info.properties[i].data[0]; + + switch (p->info.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + p->cfg.prim_type = nv50_map_gs_output_prim(data[0]); + break; + case TGSI_PROPERTY_GS_MAX_VERTICES: + p->cfg.vert_count = data[0]; + break; + default: + break; + } + } + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; p->cfg.clpd = 0x40; - p->cfg.io_nr = pc->result_nr; + p->cfg.out_nr = pc->result_nr; + break; + case PIPE_SHADER_GEOMETRY: + assert(p->cfg.prim_type); + assert(p->cfg.vert_count); + + p->cfg.psiz = 0x80; + p->cfg.clpd = 0x80; + p->cfg.prim_id = 0x80; + p->cfg.out_nr = pc->result_nr; + p->cfg.in_nr = pc->attr_nr; + + p->cfg.two_side[0].hw = 0x80; + p->cfg.two_side[1].hw = 0x80; break; case PIPE_SHADER_FRAGMENT: rtype[0] = rtype[1] = P_TEMP; p->cfg.regs[0] = 0x01000004; - p->cfg.io_nr = pc->attr_nr; + p->cfg.in_nr = pc->attr_nr; if (p->info.writes_z) { p->cfg.regs[2] |= 0x00000100; @@ -3715,7 +4012,16 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return FALSE; } for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); + ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + + if (pc->sysval_nr) { + pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *)); + if (!pc->sysval) + return FALSE; + /* will only ever use SYSTEM_VALUE[i].x (hopefully) */ + for (i = 0; i < pc->sysval_nr; ++i) + ctor_reg(&pc->sysval[i], rtype[0], i, -1); + } return TRUE; } @@ -3877,13 +4183,17 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { unsigned cb; - uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], + uint32_t *map = pipe_buffer_map(pscreen, + nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - - if (p->type == PIPE_SHADER_VERTEX) + switch (p->type) { + case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break; + case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break; + default: cb = NV50_CB_PVP; - else - cb = NV50_CB_PFP; + assert(p->type == PIPE_SHADER_VERTEX); + break; + } nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); @@ -3977,19 +4287,18 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(5, 8, 2); + so = so_new(5, 7, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); + NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); so_data (so, p->cfg.attr[0]); so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); - so_data (so, p->cfg.high_result); //8); + so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_VP_START_ID, 1); so_data (so, 0); /* program start offset */ @@ -4033,42 +4342,74 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } +void +nv50_geomprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->geomprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(6, 7, 2); + so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1); + so_data (so, p->cfg.prim_type); + so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1); + so_data (so, p->cfg.vert_count); + so_method(so, tesla, NV50TCL_GP_START_ID, 1); + so_data (so, 0); + so_ref(so, &nv50->state.geomprog); + so_ref(NULL, &so); +} + static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { + struct nv50_program *vp; struct nv50_program *fp = nv50->fragprog; - struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; uint32_t origin = 0x00000010; + vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog; + /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in * the VP is PointCoord. */ memset(pntc, 0, 8 * sizeof(uint32_t)); - for (i = 0; i < fp->cfg.io_nr; i++) { - uint8_t sn, si; - uint8_t j, k = fp->cfg.io[i].id; - unsigned n = popcnt4(fp->cfg.io[i].mask); + for (i = 0; i < fp->cfg.in_nr; i++) { + unsigned j, n = popcnt4(fp->cfg.in[i].mask); - if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) { m += n; continue; } - for (j = 0; j < vp->info.num_outputs; ++j) { - sn = vp->info.output_semantic_name[j]; - si = vp->info.output_semantic_index[j]; - - if (sn == fp->info.input_semantic_name[k] && - si == fp->info.input_semantic_index[k]) + for (j = 0; j < vp->cfg.out_nr; ++j) + if (vp->cfg.out[j].sn == fp->cfg.in[i].sn && + vp->cfg.out[j].si == fp->cfg.in[i].si) break; - } - if (j < vp->info.num_outputs) { - ubyte mode = - nv50->rasterizer->pipe.sprite_coord_mode[si]; + if (j < vp->cfg.out_nr) { + ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[ + vp->cfg.out[j].si]; if (mode == PIPE_SPRITE_COORD_NONE) { m += n; @@ -4080,7 +4421,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) /* this is either PointCoord or replaced by sprite coords */ for (c = 0; c < 4; c++) { - if (!(fp->cfg.io[i].mask & (1 << c))) + if (!(fp->cfg.in[i].mask & (1 << c))) continue; pntc[m / 8] |= (c + 1) << ((m % 8) * 4); ++m; @@ -4090,18 +4431,22 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) } static int -nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], - struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) { int c; uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; - uint8_t *map = (uint8_t *)p_map; + uint8_t *map = (uint8_t *)map32; for (c = 0; c < 4; ++c) { if (mf & 1) { if (fpi->linear == TRUE) lin[mid / 32] |= 1 << (mid % 32); - map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + if (mv & 1) + map[mid] = oid; + else + map[mid] = (c == 3) ? (zval + 1) : zval; + ++mid; } oid += mv & 1; @@ -4113,34 +4458,42 @@ nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], } void -nv50_linkage_validate(struct nv50_context *nv50) +nv50_fp_linkage_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *vp = nv50->vertprog; struct nv50_program *fp = nv50->fragprog; struct nouveau_stateobj *so; - struct nv50_sreg4 dummy, *vpo; + struct nv50_sreg4 dummy; int i, n, c, m = 0; - uint32_t map[16], lin[4], reg[5], pcrd[8]; + uint32_t map[16], lin[4], reg[6], pcrd[8]; + uint8_t zval = 0x40; + if (nv50->geomprog) { + vp = nv50->geomprog; + zval = 0x80; + } memset(map, 0, sizeof(map)); memset(lin, 0, sizeof(lin)); reg[1] = 0x00000004; /* low and high clip distance map ids */ reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ reg[3] = 0x00000000; /* point size map id & enable */ + reg[5] = 0x00000000; /* primitive ID map slot */ reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ reg[4] = fp->cfg.regs[1]; /* interpolant info */ dummy.linear = FALSE; dummy.mask = 0xf; /* map all components of HPOS */ - m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]); dummy.mask = 0x0; if (vp->cfg.clpd < 0x40) { - for (c = 0; c < vp->cfg.clpd_nr; ++c) - map[m++] = vp->cfg.clpd + c; + for (c = 0; c < vp->cfg.clpd_nr; ++c) { + map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8); + ++m; + } reg[1] = (m << 8); } @@ -4148,35 +4501,37 @@ nv50_linkage_validate(struct nv50_context *nv50) /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ if (nv50->rasterizer->pipe.light_twoside) { - vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *fpi = &fp->cfg.two_side[0]; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]); } reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - for (i = 0; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; - - /* position must be mapped first */ - assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); - + for (i = 0; i < fp->cfg.in_nr; i++) { /* maybe even remove these from cfg.io */ - if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION || + fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE) continue; - /* VP outputs and vp->cfg.io are in the same order */ - for (n = 0; n < vp->info.num_outputs; ++n) { - if (vp->info.output_semantic_name[n] == sn && - vp->info.output_semantic_index[n] == si) + for (n = 0; n < vp->cfg.out_nr; ++n) + if (vp->cfg.out[n].sn == fp->cfg.in[i].sn && + vp->cfg.out[n].si == fp->cfg.in[i].si) break; - } - vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i], + (n < vp->cfg.out_nr) ? + &vp->cfg.out[n] : &dummy); + } + /* PrimitiveID either is replaced by the system value, or + * written by the geometry shader into an output register + */ + if (fp->cfg.prim_id < 0x40) { + map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8); + reg[5] = m++; } if (nv50->rasterizer->pipe.point_size_per_vertex) { @@ -4184,14 +4539,28 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[3] = (m++ << 4) | 1; } - /* now fill the stateobj */ - so = so_new(7, 57, 0); + /* now fill the stateobj (at most 28 so_data) */ + so = so_new(10, 54, 0); n = (m + 3) / 4; - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); - so_data (so, m); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); - so_datap (so, map, n); + assert(m <= 32); + if (vp->type == PIPE_SHADER_GEOMETRY) { + so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n); + so_datap (so, map, n); + } else { + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0]); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1); + so_data (so, reg[5]); + + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + } so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); so_datap (so, reg, 4); @@ -4211,8 +4580,77 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, pcrd, 8); } - so_ref(so, &nv50->state.programs); - so_ref(NULL, &so); + so_method(so, tesla, NV50TCL_GP_ENABLE, 1); + so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0); + + so_ref(so, &nv50->state.fp_linkage); + so_ref(NULL, &so); +} + +static int +construct_vp_gp_mapping(uint32_t *map32, int m, + struct nv50_program *vp, struct nv50_program *gp) +{ + uint8_t *map = (uint8_t *)map32; + int i, j, c; + + for (i = 0; i < gp->cfg.in_nr; ++i) { + uint8_t oid, mv = 0, mg = gp->cfg.in[i].mask; + + for (j = 0; j < vp->cfg.out_nr; ++j) { + if (vp->cfg.out[j].sn == gp->cfg.in[i].sn && + vp->cfg.out[j].si == gp->cfg.in[i].si) { + mv = vp->cfg.out[j].mask; + oid = vp->cfg.out[j].hw; + break; + } + } + + for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { + if (mg & mv & 1) + map[m++] = oid; + else + if (mg & 1) + map[m++] = (c == 3) ? 0x41 : 0x40; + oid += mv & 1; + } + } + return m; +} + +void +nv50_gp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *gp = nv50->geomprog; + uint32_t map[16]; + int m = 0; + + if (!gp) { + so_ref(NULL, &nv50->state.gp_linkage); + return; + } + memset(map, 0, sizeof(map)); + + m = construct_vp_gp_mapping(map, m, vp, gp); + + so = so_new(3, 24 - 3, 0); + + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]); + + assert(m <= 32); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + + m = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m); + so_datap (so, map, m); + + so_ref(so, &nv50->state.gp_linkage); + so_ref(NULL, &so); } void @@ -4229,6 +4667,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_bo_ref(NULL, &p->bo); + FREE(p->immd); nouveau_resource_free(&p->data[0]); p->translated = 0; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 461fec1d89c..1e3ad6bff05 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -16,11 +16,13 @@ struct nv50_program_exec { }; struct nv50_sreg4 { - uint8_t hw; - uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ + uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ + uint8_t id; /* tgsi index */ uint8_t mask; boolean linear; + + ubyte sn, si; /* semantic name & index */ }; struct nv50_program { @@ -49,16 +51,24 @@ struct nv50_program { uint32_t regs[4]; /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ - unsigned io_nr; - struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + unsigned in_nr, out_nr; + struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS]; + struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS]; /* FP colour inputs, VP/GP back colour outputs */ struct nv50_sreg4 two_side[2]; - /* VP only */ + /* GP only */ + unsigned vert_count; + uint8_t prim_type; + + /* VP & GP only */ uint8_t clpd, clpd_nr; uint8_t psiz; uint8_t edgeflag_in; + + /* FP & GP only */ + uint8_t prim_id; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 28e2b35deaa..9d58f3c9651 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -167,7 +167,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) struct nv50_screen *screen = nv50_screen(pscreen); unsigned i; - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { if (screen->constbuf_parm[i]) nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); } @@ -329,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(40, 84, 20); + so = so_new(47, 95, 24); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); @@ -352,10 +352,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); - so_data (so, 0x54); - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); - so_data (so, 0x54); + for (i = 0; i < 3; ++i) { + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1); + so_data (so, 0x54); + } + /* origin is top left (set to 1 for bottom left) */ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); @@ -370,8 +371,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - for (i = 0; i < 2; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4, + for (i = 0; i < 3; i++) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -406,22 +407,45 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000021 | (NV50_CB_PMISC << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); + /* bind auxiliary constbuf to immediate data bo */ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_AUX << 16) | 0x00000200); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000201 | (NV50_CB_AUX << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000221 | (NV50_CB_AUX << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PVP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PGP << 16) | 0x00000800); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000121 | (NV50_CB_PGP << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PFP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index a038a4e3c2a..0d786b0f2e3 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -18,10 +18,12 @@ struct nv50_screen { struct nouveau_notifier *sync; struct nouveau_bo *constbuf_misc[1]; - struct nouveau_bo *constbuf_parm[2]; + struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[2]; + struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; + + struct pipe_buffer *strm_vbuf[16]; struct nouveau_bo *tic; struct nouveau_bo *tsc; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 1f67df814b1..6ab33be663d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -531,7 +531,7 @@ nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -563,7 +563,39 @@ nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_GEOMETRY; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_GEOMPROG; +} + +static void +nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -585,17 +617,21 @@ nv50_set_clip_state(struct pipe_context *pipe, static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv50_context *nv50 = nv50_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->constbuf[PIPE_SHADER_VERTEX] = buf; nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } else + if (shader == PIPE_SHADER_GEOMETRY) { + nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; + nv50->dirty |= NV50_NEW_GEOMPROG_CB; } } @@ -696,6 +732,10 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_fs_state = nv50_fp_state_bind; nv50->pipe.delete_fs_state = nv50_fp_state_delete; + nv50->pipe.create_gs_state = nv50_gp_state_create; + nv50->pipe.bind_gs_state = nv50_gp_state_bind; + nv50->pipe.delete_gs_state = nv50_gp_state_delete; + nv50->pipe.set_blend_color = nv50_set_blend_color; nv50->pipe.set_clip_state = nv50_set_clip_state; nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f83232f43cf..956da9b304c 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -199,6 +199,8 @@ nv50_state_emit(struct nv50_context *nv50) nv50->state.dirty |= NV50_NEW_VERTPROG; if (nv50->state.fragprog) nv50->state.dirty |= NV50_NEW_FRAGPROG; + if (nv50->state.geomprog) + nv50->state.dirty |= NV50_NEW_GEOMPROG; if (nv50->state.rast) nv50->state.dirty |= NV50_NEW_RASTERIZER; if (nv50->state.blend_colour) @@ -228,9 +230,14 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog) + so_emit(chan, nv50->state.geomprog); if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - so_emit(chan, nv50->state.programs); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + so_emit(chan, nv50->state.fp_linkage); + if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG)) + && nv50->state.gp_linkage) + so_emit(chan, nv50->state.gp_linkage); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -267,6 +274,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan) so_emit_reloc_markers(chan, nv50->state.fragprog); so_emit_reloc_markers(chan, nv50->state.vtxbuf); so_emit_reloc_markers(chan, nv50->screen->static_init); + + if (nv50->state.instbuf) + so_emit_reloc_markers(chan, nv50->state.instbuf); } boolean @@ -291,9 +301,15 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB)) + nv50_geomprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - nv50_linkage_validate(nv50); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + nv50_fp_linkage_validate(nv50); + + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG)) + nv50_gp_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); @@ -400,8 +416,9 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES - + nr * 8, PIPE_SHADER_TYPES * 2); + so = so_new(1 + 5 * PIPE_SHADER_TYPES, + 1 + 19 * PIPE_SHADER_TYPES + nr * 8, + PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index bef548b7286..871536dca9b 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -155,7 +155,7 @@ static boolean nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, unsigned p) { - static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 }; + static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2, 1 }; struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f2e510fba61..bfb1b34d27a 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -40,6 +40,8 @@ nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); static boolean nv50_push_arrays(struct nv50_context *, unsigned, unsigned); +#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16) + static INLINE unsigned nv50_prim(unsigned mode) { @@ -55,6 +57,14 @@ nv50_prim(unsigned mode) case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + case PIPE_PRIM_LINES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; default: break; } @@ -152,6 +162,309 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } +/* For instanced drawing from user buffers, hitting the FIFO repeatedly + * with the same vertex data is probably worse than uploading all data. + */ +static boolean +nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i) +{ + struct nv50_screen *nscreen = nv50->screen; + struct pipe_screen *pscreen = &nscreen->base.base; + struct pipe_buffer *buf = nscreen->strm_vbuf[i]; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + uint8_t *src; + unsigned size = align(vb->buffer->size, 4096); + + if (buf && buf->size < size) + pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL); + + if (!nscreen->strm_vbuf[i]) { + nscreen->strm_vbuf[i] = pipe_buffer_create( + pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size); + buf = nscreen->strm_vbuf[i]; + } + + src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if (!src) + return FALSE; + src += vb->buffer_offset; + + size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */ + if (vb->buffer_offset + size > vb->buffer->size) + size = vb->buffer->size - vb->buffer_offset; + + pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src); + pipe_buffer_unmap(pscreen, vb->buffer); + + vb->buffer = buf; /* don't pipe_reference, this is a private copy */ + return TRUE; +} + +static void +nv50_upload_user_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + if (nv50->vbo_fifo) + nv50->dirty |= NV50_NEW_ARRAYS; + if (!(nv50->dirty & NV50_NEW_ARRAYS)) + return; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) { + if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX) + continue; + nv50_upload_vtxbuf(nv50, i); + } +} + +static void +nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + float v[4]; + + util_format_read_4f(nv50->vtxelt[i].src_format, + v, 0, data, 0, 0, 0, 1, 1); + + switch (nv50->vtxelt[i].nr_components) { + case 4: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + OUT_RINGf (chan, v[3]); + break; + case 3: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + OUT_RINGf (chan, v[2]); + break; + case 2: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2); + OUT_RINGf (chan, v[0]); + OUT_RINGf (chan, v[1]); + break; + case 1: + BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1); + OUT_RINGf (chan, v[0]); + break; + default: + assert(0); + break; + } +} + +static unsigned +init_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned startInstance, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b, count = 0; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + ++count; + b = nv50->vtxelt[i].vertex_buffer_index; + + pos[i] = nv50->vtxelt[i].src_offset + + nv50->vtxbuf[b].buffer_offset + + startInstance * nv50->vtxbuf[b].stride; + step[i] = startInstance % nv50->vtxelt[i].instance_divisor; + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + if (!bo->map) + nouveau_bo_map(bo, NOUVEAU_BO_RD); + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } + + return count; +} + +static unsigned +init_per_instance_arrays(struct nv50_context *nv50, + unsigned startInstance, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct nouveau_bo *bo; + struct nouveau_stateobj *so; + unsigned i, b, count = 0; + const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + if (nv50->vbo_fifo) + return init_per_instance_arrays_immd(nv50, startInstance, + pos, step); + + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + ++count; + b = nv50->vtxelt[i].vertex_buffer_index; + + pos[i] = nv50->vtxelt[i].src_offset + + nv50->vtxbuf[b].buffer_offset + + startInstance * nv50->vtxbuf[b].stride; + + if (!startInstance) { + step[i] = 0; + continue; + } + step[i] = startInstance % nv50->vtxelt[i].instance_divisor; + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); + } + + if (count && startInstance) { + so_ref (so, &nv50->state.instbuf); /* for flush notify */ + so_emit(chan, nv50->state.instbuf); + } + so_ref (NULL, &so); + + return count; +} + +static void +step_per_instance_arrays_immd(struct nv50_context *nv50, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_bo *bo; + unsigned i, b; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + if (++step[i] != nv50->vtxelt[i].instance_divisor) + continue; + b = nv50->vtxelt[i].vertex_buffer_index; + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + step[i] = 0; + pos[i] += nv50->vtxbuf[b].stride; + + nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + } +} + +static void +step_per_instance_arrays(struct nv50_context *nv50, + unsigned pos[16], unsigned step[16]) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct nouveau_bo *bo; + struct nouveau_stateobj *so; + unsigned i, b; + const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + if (nv50->vbo_fifo) { + step_per_instance_arrays_immd(nv50, pos, step); + return; + } + + so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + if (!nv50->vtxelt[i].instance_divisor) + continue; + b = nv50->vtxelt[i].vertex_buffer_index; + + if (++step[i] == nv50->vtxelt[i].instance_divisor) { + step[i] = 0; + pos[i] += nv50->vtxbuf[b].stride; + } + + bo = nouveau_bo(nv50->vtxbuf[b].buffer); + + so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); + } + + so_ref (so, &nv50->state.instbuf); /* for flush notify */ + so_ref (NULL, &so); + + so_emit(chan, nv50->state.instbuf); +} + +static INLINE void +nv50_unmap_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); +} + +void +nv50_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count, + unsigned startInstance, unsigned instanceCount) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->tesla->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned i, nz_divisors; + unsigned step[16], pos[16]; + + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); + + nv50_state_validate(nv50); + + nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, startInstance); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + for (i = 1; i < instanceCount; i++) { + if (nz_divisors) /* any non-zero array divisors ? */ + step_per_instance_arrays(nv50, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode) | (1 << 28)); + + if (nv50->vbo_fifo) + nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } + nv50_unmap_vbufs(nv50); + + so_ref(NULL, &nv50->state.instbuf); +} + void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -182,6 +495,8 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + /* XXX: not sure what to do if ret != TRUE: flush and retry? */ assert(ret); @@ -210,7 +525,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -243,7 +558,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -268,7 +583,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -277,6 +592,77 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } +static INLINE void +nv50_draw_elements_inline(struct nv50_context *nv50, + void *map, unsigned indexSize, + unsigned start, unsigned count) +{ + switch (indexSize) { + case 1: + nv50_draw_elements_inline_u08(nv50, map, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(nv50, map, start, count); + break; + case 4: + nv50_draw_elements_inline_u32(nv50, map, start, count); + break; + } +} + +void +nv50_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count, + unsigned startInstance, unsigned instanceCount) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct pipe_screen *pscreen = pipe->screen; + void *map; + unsigned i, nz_divisors; + unsigned step[16], pos[16]; + + map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + + if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) + nv50_upload_user_vbufs(nv50); + + nv50_state_validate(nv50); + + nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, startInstance); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + for (i = 1; i < instanceCount; ++i) { + if (nz_divisors) /* any non-zero array divisors ? */ + step_per_instance_arrays(nv50, pos, step); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode) | (1 << 28)); + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } + nv50_unmap_vbufs(nv50); + + so_ref(NULL, &nv50->state.instbuf); +} + void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -287,7 +673,6 @@ nv50_draw_elements(struct pipe_context *pipe, struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_screen *pscreen = pipe->screen; void *map; - boolean ret; map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -300,29 +685,15 @@ nv50_draw_elements(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode)); - switch (indexSize) { - case 1: - ret = nv50_draw_elements_inline_u08(nv50, map, start, count); - break; - case 2: - ret = nv50_draw_elements_inline_u16(nv50, map, start, count); - break; - case 4: - ret = nv50_draw_elements_inline_u32(nv50, map, start, count); - break; - default: - assert(0); - ret = FALSE; - break; - } + + nv50_draw_elements_inline(nv50, map, indexSize, start, count); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); + nv50_unmap_vbufs(nv50); + pipe_buffer_unmap(pscreen, indexBuffer); - - /* XXX: what to do if ret != TRUE? Flush and retry? - */ - assert(ret); } static INLINE boolean @@ -335,23 +706,16 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, struct nouveau_stateobj *so; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_bo *bo = nouveau_bo(vb->buffer); - float *v; + float v[4]; int ret; - enum pipe_format pf = ve->src_format; - const struct util_format_description *desc; - - desc = util_format_description(pf); - assert(desc); - - if ((desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT) || - util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32) - return FALSE; ret = nouveau_bo_map(bo, NOUVEAU_BO_RD); if (ret) return FALSE; - v = (float *)(bo->map + (vb->buffer_offset + ve->src_offset)); + util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map + + (vb->buffer_offset + ve->src_offset), 0, + 0, 0, 1, 1); so = *pso; if (!so) *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); @@ -409,7 +773,7 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; - if (nv50->vertprog->cfg.edgeflag_in < 16) + if (NV50_USING_LOATHED_EDGEFLAG(nv50)) nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); @@ -437,17 +801,20 @@ nv50_vbo_validate(struct nv50_context *nv50) nv50->vbo_fifo &= ~(1 << i); continue; } - so_data(vtxfmt, hw | i); if (nv50->vbo_fifo) { + so_data (vtxfmt, hw | + (ve->instance_divisor ? (1 << 4) : i)); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); continue; } + so_data(vtxfmt, hw | i); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); - so_data (vtxbuf, 0x20000000 | vb->stride); + so_data (vtxbuf, 0x20000000 | + (ve->instance_divisor ? 0 : vb->stride)); so_reloc (vtxbuf, bo, vb->buffer_offset + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -485,7 +852,7 @@ typedef void (*pfn_push)(struct nouveau_channel *, void *); struct nv50_vbo_emitctx { pfn_push push[16]; - void *map[16]; + uint8_t *map[16]; unsigned stride[16]; unsigned nr_ve; unsigned vtx_dwords; @@ -523,19 +890,18 @@ nv50_map_vbufs(struct nv50_context *nv50) for (i = 0; i < nv50->vtxbuf_nr; ++i) { struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; - unsigned size, delta; + unsigned size = vb->stride * (vb->max_index + 1) + 16; if (nouveau_bo(vb->buffer)->map) continue; - size = vb->stride * (vb->max_index + 1); - delta = vb->buffer_offset; - + size = vb->stride * (vb->max_index + 1) + 16; + size = MIN2(size, vb->buffer->size); if (!size) - size = vb->buffer->size - vb->buffer_offset; + size = vb->buffer->size; if (nouveau_bo_map_range(nouveau_bo(vb->buffer), - delta, size, NOUVEAU_BO_RD)) + 0, size, NOUVEAU_BO_RD)) break; } @@ -546,16 +912,6 @@ nv50_map_vbufs(struct nv50_context *nv50) return FALSE; } -static INLINE void -nv50_unmap_vbufs(struct nv50_context *nv50) -{ - unsigned i; - - for (i = 0; i < nv50->vtxbuf_nr; ++i) - if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) - nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); -} - static void emit_b32_1(struct nouveau_channel *chan, void *data) { @@ -650,12 +1006,13 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, ve = &nv50->vtxelt[i]; vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - if (!(nv50->vbo_fifo & (1 << i))) + if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor) continue; n = emit->nr_ve++; emit->stride[n] = vb->stride; - emit->map[n] = nouveau_bo(vb->buffer)->map + + emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map + + vb->buffer_offset + (start * vb->stride + ve->src_offset); desc = util_format_description(ve->src_format); @@ -745,13 +1102,12 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -772,13 +1128,12 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -799,13 +1154,12 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } @@ -826,13 +1180,12 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) set_edgeflag(chan, tesla, &emit, *map); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); count -= nr; } - nv50_unmap_vbufs(nv50); return TRUE; } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff34..92de297ef1d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a72..28084864929 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index af95bbe789c..94a9ab3ef3f 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -87,7 +87,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, { struct pipe_buffer* buf = 0; - r300_get_texture_buffer(texture, &buf, NULL); + r300_get_texture_buffer(pipe->screen, texture, &buf, NULL); return pipe->is_buffer_referenced(pipe, buf); } @@ -110,23 +110,33 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } -#define R300_INIT_ATOM(name) \ - r300->name##_state.state = NULL; \ - r300->name##_state.emit = r300_emit_##name##_state; \ - r300->name##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->name##_state); +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); static void r300_setup_atoms(struct r300_context* r300) { + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(ztop); - R300_INIT_ATOM(blend); - R300_INIT_ATOM(blend_color); - R300_INIT_ATOM(clip); - R300_INIT_ATOM(dsa); - R300_INIT_ATOM(rs); - R300_INIT_ATOM(scissor); - R300_INIT_ATOM(viewport); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(rs, 22); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -143,8 +153,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)radeon_winsys; r300->context.screen = screen; - r300_init_debug(r300); - r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; @@ -182,7 +190,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 5937f0e2cc5..5e33dc042ac 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,16 +30,28 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "r300_screen.h" + struct r300_context; struct r300_fragment_shader; struct r300_vertex_shader; struct r300_atom { + /* List pointers. */ struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ void* state; + /* Emit the state to the context. */ void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; }; struct r300_blend_state { @@ -75,10 +87,10 @@ struct r300_rs_state { uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ - uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ - uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ - uint32_t depth_scale_back; /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ - uint32_t depth_offset_back; /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ + float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ + /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ + float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ + /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ @@ -106,16 +118,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -188,6 +190,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -224,6 +232,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -315,9 +326,10 @@ struct r300_context { uint32_t dirty_hw; /* Whether the TCL engine should be in bypass mode. */ boolean tcl_bypass; - - /** Combination of DBG_xxx flags */ - unsigned debug; + /* Whether polygon offset is enabled. */ + boolean polygon_offset_enabled; + /* Z buffer bit depth. */ + uint32_t zbuffer_bpp; }; /* Convenience cast wrapper. */ @@ -331,35 +343,15 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); -/* Debug functionality. */ - -/** - * Debug flags to disable/enable certain groups of debugging outputs. - * - * \note These may be rather coarse, and the grouping may be impractical. - * If you find, while debugging the driver, that a different grouping - * of these flags would be beneficial, just feel free to change them - * but make sure to update the documentation in r300_debug.c to reflect - * those changes. - */ -/*@{*/ -#define DBG_HELP 0x0000001 -#define DBG_FP 0x0000002 -#define DBG_VP 0x0000004 -#define DBG_CS 0x0000008 -#define DBG_DRAW 0x0000010 -#define DBG_TEX 0x0000020 -#define DBG_FALL 0x0000040 -/*@}*/ - -static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { - return (ctx->debug & flags) ? TRUE : FALSE; + return SCREEN_DBG_ON(r300_screen(ctx->context.screen), flags); } -static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags, + const char * fmt, ...) { - if (DBG_ON(ctx, flags)) { + if (CTX_DBG_ON(ctx, flags)) { va_list va; va_start(va, fmt); debug_vprintf(fmt, va); @@ -367,6 +359,8 @@ static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * f } } -void r300_init_debug(struct r300_context * ctx); +#define DBG_ON CTX_DBG_ON +#define DBG CTX_DBG #endif /* R300_CONTEXT_H */ + diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee0502..151f72b0fe4 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 2a6ed54ac9b..00d4f31c2b6 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -46,7 +46,7 @@ static struct debug_option debug_options[] = { { 0, 0, 0 } }; -void r300_init_debug(struct r300_context * ctx) +void r300_init_debug(struct r300_screen * screen) { const char * options = debug_get_option("RADEON_DEBUG", 0); boolean printhint = FALSE; @@ -64,7 +64,7 @@ void r300_init_debug(struct r300_context * ctx) for(opt = debug_options; opt->name; ++opt) { if (!strncmp(options, opt->name, length)) { - ctx->debug |= opt->flag; + screen->debug |= opt->flag; break; } } @@ -77,11 +77,11 @@ void r300_init_debug(struct r300_context * ctx) options += length; } - if (!ctx->debug) + if (!screen->debug) printhint = TRUE; } - if (printhint || ctx->debug & DBG_HELP) { + if (printhint || screen->debug & DBG_HELP) { debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" "to a comma-separated list of debug options. Available options are:\n"); for(opt = debug_options; opt->name; ++opt) { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0e5533c7902..2ea9fab015d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -41,6 +41,7 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); + BEGIN_CS(8); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); @@ -419,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -443,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -579,19 +584,37 @@ void r300_emit_query_end(struct r300_context* r300) void r300_emit_rs_state(struct r300_context* r300, void* state) { struct r300_rs_state* rs = (struct r300_rs_state*)state; + float scale, offset; CS_LOCALS(r300); - BEGIN_CS(22); + BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0)); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); OUT_CS(rs->point_minmax); OUT_CS(rs->line_control); - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6); - OUT_CS(rs->depth_scale_front); - OUT_CS(rs->depth_offset_front); - OUT_CS(rs->depth_scale_back); - OUT_CS(rs->depth_offset_back); + + if (rs->polygon_offset_enable) { + scale = rs->depth_scale * 12; + offset = rs->depth_offset; + + switch (r300->zbuffer_bpp) { + case 16: + offset *= 4; + break; + case 24: + offset *= 2; + break; + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + OUT_CS_32F(scale); + OUT_CS_32F(offset); + } + + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); OUT_CS(rs->polygon_offset_enable); OUT_CS(rs->cull_mode); OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); @@ -641,27 +664,47 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = r300->framebuffer_state.width; + maxy = r300->framebuffer_state.height; -void r300_emit_scissor_state(struct r300_context* r300, void* state) -{ - struct r300_scissor_state* scissor = (struct r300_scissor_state*)state; - /* XXX argfl! */ if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -680,12 +723,18 @@ void r300_emit_texture(struct r300_context* r300, filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - /* determine min/max levels */ - /* the MAX_MIP level is the largest (finest) one */ - max_level = MIN2(sampler->max_lod, tex->tex.last_level); - min_level = MIN2(sampler->min_lod, max_level); - format0 |= R300_TX_NUM_LEVELS(max_level); - filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + if (tex->is_npot) { + /* NPOT textures don't support mip filter, unfortunately. + * This prevents incorrect rendering. */ + filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + } else { + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + } BEGIN_CS(16); OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | @@ -697,8 +746,10 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } @@ -764,32 +815,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) -{ - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -992,14 +1017,23 @@ void r300_emit_dirty_state(struct r300_context* r300) struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; struct r300_atom* atom; - int i, dirty_tex = 0; + unsigned i, dwords = 1024; + int dirty_tex = 0; boolean invalid = FALSE; - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } + } + + /* Make sure we have at least 2*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + if (!r300->winsys->check_cs(r300->winsys, dwords)) { r300->context.flush(&r300->context, 0, NULL); } @@ -1039,10 +1073,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -1071,7 +1107,7 @@ validate: } foreach(atom, &r300->atom_list) { - if (atom->dirty) { + if (atom->dirty || atom->always_dirty) { atom->emit(r300, atom->state); atom->dirty = FALSE; } diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc4..59819cb1061 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,8 +37,10 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); + (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -54,7 +56,15 @@ static void r300_flush(struct pipe_context* pipe, r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 034bfc15cf9..361813891fb 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2283,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2544,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index ee43421cdb7..90de062bcda 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -114,6 +114,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static void r300_emit_draw_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; + unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); + unsigned i; + uint32_t* map; + CS_LOCALS(r300); + + map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, + start * vertex_size, count * vertex_size, + PIPE_BUFFER_USAGE_CPU_READ); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); + for (i = 0; i < count * vertex_size; i++) { + if (i % vertex_size == 0) { + //debug_printf("r300: -- vert --\n"); + } + //debug_printf("r300: 0x%08x\n", *map); + OUT_CS(*map); + map++; + } + END_CS; + + pipe_buffer_unmap(r300->context.screen, vbo); +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -207,17 +245,49 @@ validate: return TRUE; } +static void r300_shorten_ubyte_elts(struct r300_context* r300, + struct pipe_buffer** elts, + unsigned count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + unsigned char *in_map; + unsigned short *out_map; + unsigned i; + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + 2 * count); + + in_map = pipe_buffer_map(screen, *elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)*in_map; + in_map++; + out_map++; + } + + pipe_buffer_unmap(screen, *elts); + pipe_buffer_unmap(screen, new_elts); + + *elts = new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { return; @@ -236,13 +306,18 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } + if (indexSize == 1) { + r300_shorten_ubyte_elts(r300, &indexBuffer, count); + indexSize = 2; + } + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return; + goto cleanup; } r300_emit_dirty_state(r300); @@ -251,6 +326,11 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); + +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ @@ -264,7 +344,7 @@ void r300_draw_elements(struct pipe_context* pipe, } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); @@ -287,9 +367,12 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_dirty_state(r300); - r300_emit_aos(r300, start); - - r300_emit_draw_arrays(r300, mode, count); + if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { + r300_emit_draw_immediate(r300, mode, start, count); + } else { + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 287664b1d20..67325c6b80a 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -183,10 +183,20 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_format(enum pipe_format format, uint32_t usage, - boolean is_r500) +static boolean r300_is_format_supported(struct pipe_screen* screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned usage, + unsigned geom_flags) { uint32_t retval = 0; + boolean is_r500 = r300_screen(screen)->caps->is_r500; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + debug_printf("r300: Implementation error: Received bogus texture " + "target %d in %s\n", target, __FUNCTION__); + return FALSE; + } switch (format) { /* Supported formats. */ @@ -247,28 +257,13 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: - debug_printf("r300: Note: Got unsupported format: %s in %s\n", - pf_name(format), __FUNCTION__); + SCREEN_DBG(r300_screen(screen), DBG_TEX, + "r300: Note: Got unsupported format: %s in %s\n", + pf_name(format), __FUNCTION__); return FALSE; - /* XXX These don't even exist - case PIPE_FORMAT_A32R32G32B32: - case PIPE_FORMAT_A16R16G16B16: */ - /* XXX What the deuce is UV88? (r3xx accel page 14) - debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ - - /* XXX Supported yet unimplemented r5xx formats: */ - /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */ - /* XXX Even more that don't exist - case PIPE_FORMAT_A10R10G10B10_UNORM: - case PIPE_FORMAT_A2R10G10B10_UNORM: - case PIPE_FORMAT_I10_UNORM: - debug_printf( - "r300: Warning: Got unimplemented r500 format: %s in %s\n", - pf_name(format), __FUNCTION__); - return FALSE; */ + /* XXX Add all remaining gallium-supported formats, + * see util/u_format.csv. */ default: /* Unknown format... */ @@ -286,30 +281,6 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, return (retval >= usage); } -static boolean r300_is_format_supported(struct pipe_screen* pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, - unsigned geom_flags) -{ - switch (target) { - case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_3D: - case PIPE_TEXTURE_CUBE: - return check_tex_format(format, tex_usage, - r300_screen(pscreen)->caps->is_r500); - - default: - debug_printf("r300: Fatal: This is not a format target: %d\n", - target); - assert(0); - break; - } - - return FALSE; -} - static struct pipe_transfer* r300_get_tex_transfer(struct pipe_screen *screen, struct pipe_texture *texture, @@ -319,6 +290,7 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; + struct r300_screen *rscreen = r300_screen(screen); unsigned offset; offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ @@ -330,11 +302,8 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; - trans->transfer.stride = r300_texture_get_stride(tex, level); + trans->transfer.stride = r300_texture_get_stride(rscreen, tex, level); trans->transfer.usage = usage; - - /* XXX not sure whether it's required to set these two, - the driver doesn't use them */ trans->transfer.zslice = zslice; trans->transfer.face = face; @@ -396,6 +365,7 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) caps->num_frag_pipes = radeon_winsys->gb_pipes; caps->num_z_pipes = radeon_winsys->z_pipes; + r300_init_debug(r300screen); r300_parse_chipset(caps); r300screen->caps = caps; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 2217988addd..580fda3984e 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -35,6 +35,9 @@ struct r300_screen { /* Chipset capabilities */ struct r300_capabilities* caps; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; struct r300_transfer { @@ -60,4 +63,44 @@ r300_transfer(struct pipe_transfer* transfer) /* Creates a new r300 screen. */ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +#define DBG_TEX 0x0000020 +#define DBG_FALL 0x0000040 +/*@}*/ + +static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) +{ + return (screen->debug & flags) ? TRUE : FALSE; +} + +static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags, + const char * fmt, ...) +{ + if (SCREEN_DBG_ON(screen, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_screen* ctx); + #endif /* R300_SCREEN_H */ + diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 78764ddc98e..e2ec0bc5bd2 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -340,6 +340,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_color_state* state = (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; @@ -355,6 +356,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; r300->blend_color_state.dirty = TRUE; } @@ -365,11 +367,14 @@ static void r300_set_clip_state(struct pipe_context* pipe, if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); - r300->clip_state.dirty = TRUE; + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -462,8 +467,10 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; r300->dsa_state.dirty = TRUE; } @@ -474,36 +481,12 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - struct pipe_scissor_state pscissor; + uint32_t zbuffer_bpp = 0; if (r300->draw) { draw_flush(r300->draw); @@ -511,19 +494,29 @@ static void r300->framebuffer_state = *state; - /* XXX Arg. This is silly. */ - pscissor.minx = pscissor.miny = 0; - pscissor.maxx = state->width; - pscissor.maxy = state->height; - r300_set_scissor_regs(&pscissor, &scissor->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ r300->dirty_state |= R300_NEW_FRAMEBUFFERS; r300->blend_state.dirty = TRUE; r300->dsa_state.dirty = TRUE; r300->scissor_state.dirty = TRUE; + + /* Polyfon offset depends on the zbuffer bit depth. */ + if (state->zsbuf && r300->polygon_offset_enabled) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; + } + + if (r300->zbuffer_bpp != zbuffer_bpp) { + r300->zbuffer_bpp = zbuffer_bpp; + r300->rs_state.dirty = TRUE; + } + } } /* Create fragment shader state. */ @@ -627,9 +620,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; - /* XXX I think there is something wrong with the polygon mode, - * XXX re-test when r300g is in a better shape */ - /* Enable polygon mode */ if (state->fill_cw != PIPE_POLYGON_MODE_FILL || state->fill_ccw != PIPE_POLYGON_MODE_FILL) { @@ -682,10 +672,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } if (rs->polygon_offset_enable) { - rs->depth_offset_front = rs->depth_offset_back = - fui(state->offset_units); - rs->depth_scale_front = rs->depth_scale_back = - fui(state->offset_scale); + rs->depth_offset = state->offset_units; + rs->depth_scale = state->offset_scale; } if (state->line_stipple_enable) { @@ -717,7 +705,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; + } else { + r300->tcl_bypass = FALSE; + r300->polygon_offset_enabled = FALSE; + } r300->rs_state.state = rs; r300->rs_state.dirty = TRUE; @@ -864,11 +858,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_scissor_state* scissor = - (struct r300_scissor_state*)r300->scissor_state.state; - r300_set_scissor_regs(state, &scissor->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); r300->scissor_state.dirty = TRUE; } @@ -1015,22 +1007,22 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) static void r300_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct r300_context* r300 = r300_context(pipe); void *mapped; - if (buf == NULL || buf->buffer->size == 0 || - (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) + if (buf == NULL || buf->size == 0 || + (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) { r300->shader_constants[shader].count = 0; return; } - assert((buf->buffer->size % 4 * sizeof(float)) == 0); - memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size); - r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float)); - pipe_buffer_unmap(pipe->screen, buf->buffer); + assert((buf->size % 4 * sizeof(float)) == 0); + memcpy(r300->shader_constants[shader].constants, mapped, buf->size); + r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); + pipe_buffer_unmap(pipe->screen, buf); if (shader == PIPE_SHADER_VERTEX) r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 192846411ba..99c2720897f 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -350,7 +350,8 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ rX00_rs_col(rs, col_count, i, FALSE); diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 35be00e1b01..e2180b33b77 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -537,6 +537,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); + unsigned swizzle[4], i; assert(format); @@ -547,11 +548,26 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { return 0; } - return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT)); + /* Swizzles for 8bits formats are in the reversed order, not sure why. */ + if (desc->channel[0].size == 8) { + for (i = 0; i < 4; i++) { + if (desc->swizzle[i] <= 3) { + swizzle[i] = 3 - desc->swizzle[i]; + } else { + swizzle[i] = desc->swizzle[i]; + } + } + } else { + for (i = 0; i < 4; i++) { + swizzle[i] = desc->swizzle[i]; + } + } + + return ((swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT)); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index f25f3ca217d..47d7e60a40d 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -79,7 +79,8 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + + (caps->family >= CHIP_FAMILY_RV350 ? 4 : 0)); if (caps->has_tcl) { /*Flushing PVS is required before the VAP_GB registers can be changed*/ @@ -115,10 +116,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { + + if (caps->family >= CHIP_FAMILY_RV350) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4dc..1f73f74c268 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,10 +30,23 @@ #include "r300_texture.h" #include "r300_screen.h" -static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + +static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; + boolean is_r500 = screen->caps->is_r500; state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); @@ -67,8 +80,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) } assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); - debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width0, pt->height0, pt->last_level); + SCREEN_DBG(screen, DBG_TEX, "r300: Set texture state (%dx%d, %d levels)\n", + pt->width0, pt->height0, pt->last_level); } unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, @@ -92,33 +105,78 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { - debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, - level, tex->tex.last_level); + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + width = u_minify(tex->tex.width0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(width, tile_width); + return util_format_get_stride(tex->tex.format, width); + } else { + return align(util_format_get_stride(tex->tex.format, width), 32); + } +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + height = u_minify(tex->tex.height0, level); + + if (!util_format_is_compressed(tex->tex.format)) { + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(height, tile_height); + } + + return util_format_get_nblocksy(tex->tex.format, height); } -static void r300_setup_miptree(struct r300_texture* tex) +static void r300_setup_miptree(struct r300_screen* screen, + struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + SCREEN_DBG(screen, DBG_TEX, "r300: Making miptree for texture, format %s\n", + pf_name(base->format)); - stride = r300_texture_get_stride(tex, i); + for (i = 0; i <= base->last_level; i++) { + stride = r300_texture_get_stride(screen, tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -131,10 +189,10 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->layer_size[i] = layer_size; tex->pitch[i] = stride / util_format_get_blocksize(base->format); - debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -150,6 +208,7 @@ static struct pipe_texture* const struct pipe_texture* template) { struct r300_texture* tex = CALLOC_STRUCT(r300_texture); + struct r300_screen* rscreen = r300_screen(screen); if (!tex) { return NULL; @@ -160,10 +219,10 @@ static struct pipe_texture* tex->tex.screen = screen; r300_setup_flags(tex); - r300_setup_miptree(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_miptree(rscreen, tex); + r300_setup_texture_state(rscreen, tex); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); @@ -227,6 +286,7 @@ static struct pipe_texture* struct pipe_buffer* buffer) { struct r300_texture* tex; + struct r300_screen* rscreen = r300_screen(screen); /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || @@ -248,7 +308,7 @@ static struct pipe_texture* tex->pitch[0] = *stride / util_format_get_blocksize(base->format); r300_setup_flags(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); + r300_setup_texture_state(rscreen, tex); pipe_buffer_reference(&tex->buffer, buffer); @@ -315,7 +375,8 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) screen->video_surface_destroy= r300_video_surface_destroy; } -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride) { @@ -327,7 +388,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = r300_texture_get_stride(tex, 0); + *stride = r300_texture_get_stride(r300_screen(screen), tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 55ceb1a5136..1be1e6843c2 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -31,7 +31,8 @@ struct r300_texture; void r300_init_screen_texture_functions(struct pipe_screen* screen); -unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); +unsigned r300_texture_get_stride(struct r300_screen* screen, + struct r300_texture* tex, unsigned level); unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, unsigned zslice, unsigned face); @@ -115,7 +116,8 @@ r300_video_surface(struct pipe_video_surface *pvs) #ifndef R300_WINSYS_H -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 68aef70872e..9fbb830047f 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -124,7 +124,8 @@ static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } @@ -182,7 +183,8 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || + vs_outputs->color[1] != ATTR_UNUSED) { stream_loc[tabi++] = 2 + i; } } @@ -259,7 +261,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; - } else if (any_bcolor_used) { + } else if (any_bcolor_used || + outputs->color[1] != ATTR_UNUSED) { reg++; } } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 1ae6de70fee..bdb8b54bab6 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -40,7 +40,8 @@ extern "C" { struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys); -boolean r300_get_texture_buffer(struct pipe_texture* texture, +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index bcb887a0b26..e4ac49fa85f 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -32,6 +32,7 @@ C_SOURCES = \ sp_tex_tile_cache.c \ sp_tile_cache.c \ sp_surface.c \ - sp_video_context.c + sp_video_context.c \ + sp_winsys.c include ../../Makefile.template diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index aac9edf44e6..3042e556c64 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -34,6 +34,7 @@ softpipe = env.ConvenienceLibrary( 'sp_texture.c', 'sp_tile_cache.c', 'sp_video_context.c', + 'sp_winsys.c' ]) Export('softpipe') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index f3ac6760db5..8e017939402 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -113,8 +113,8 @@ softpipe_destroy( struct pipe_context *pipe ) } for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(&softpipe->constants[i].buffer, NULL); + if (softpipe->constants[i]) { + pipe_buffer_reference(&softpipe->constants[i], NULL); } } @@ -256,6 +256,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; + softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 73fa744f9d4..da673c57ada 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -63,7 +63,7 @@ struct softpipe_context { /** Other rendering state */ struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_buffer *constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 03d35fb3cb5..03b58d2fb72 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -52,18 +52,18 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) uint i, vssize, gssize; for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + if (sp->constants[i] && sp->constants[i]->size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], PIPE_BUFFER_USAGE_CPU_READ); } - if (sp->constants[PIPE_SHADER_VERTEX].buffer) - vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size; + if (sp->constants[PIPE_SHADER_VERTEX]) + vssize = sp->constants[PIPE_SHADER_VERTEX]->size; else vssize = 0; - if (sp->constants[PIPE_SHADER_GEOMETRY].buffer) - gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size; + if (sp->constants[PIPE_SHADER_GEOMETRY]) + gssize = sp->constants[PIPE_SHADER_GEOMETRY]->size; else gssize = 0; @@ -91,26 +91,48 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + if (sp->constants[i] && sp->constants[i]->size) + ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; } } +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, @@ -119,6 +141,91 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); +} + + +void +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); +} + +void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; unsigned i; @@ -128,45 +235,48 @@ softpipe_draw_range_elements(struct pipe_context *pipe, sp->reduced_api_prim = u_reduced_prim(mode); - if (sp->dirty) - softpipe_update_derived( sp ); + if (sp->dirty) { + softpipe_update_derived(sp); + } softpipe_map_transfers(sp); softpipe_map_constant_buffers(sp); - /* - * Map vertex buffers - */ + /* Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf - = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *buf; + + buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, + void *mapped_indexes; + + mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, + indexSize, + minIndex, + maxIndex, mapped_indexes); - } - else { + } else { /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); + draw_set_mapped_element_buffer_range(draw, + 0, + start, + start + count - 1, + NULL); } /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); - /* - * unmap vertex/index buffers - will cause draw module to flush - */ + /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); @@ -176,22 +286,8 @@ softpipe_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); } - /* Note: leave drawing surfaces mapped */ softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; } - - -void -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); -} diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 7f573aef3c3..5812d1eefeb 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -526,6 +526,7 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + align_free(cvbr->vertex_buffer); sp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -541,7 +542,6 @@ sp_create_vbuf_backend(struct softpipe_context *sp) assert(sp->draw); - cvbr->base.max_indices = SP_MAX_VBUF_INDEXES; cvbr->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 9b18dac67bd..7f244c4fd49 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -139,7 +139,7 @@ void softpipe_set_clip_state( struct pipe_context *, void softpipe_set_constant_buffer(struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf); + struct pipe_buffer *buf); void *softpipe_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); @@ -200,6 +200,24 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void softpipe_map_transfers(struct softpipe_context *sp); void diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index aa12bb215a8..b7ed4441b43 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -159,7 +159,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) void softpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -169,8 +169,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, draw_flush(softpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&softpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&softpipe->constants[shader], buf); softpipe->dirty |= SP_NEW_CONSTANTS; } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index a9436a33942..fae72c81aa1 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -57,13 +57,8 @@ softpipe_texture_layout(struct pipe_screen *screen, unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned buffer_size = 0; - pt->width0 = width; - pt->height0 = height; - pt->depth0 = depth; - for (level = 0; level <= pt->last_level; level++) { spt->stride[level] = util_format_get_stride(pt->format, width); diff --git a/src/gallium/drivers/softpipe/sp_winsys.c b/src/gallium/drivers/softpipe/sp_winsys.c new file mode 100644 index 00000000000..8169071dc9f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.c @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * Malloc softpipe winsys. Uses malloc for all memory allocations. + * + * @author Keith Whitwell + * @author Brian Paul + * @author Jose Fonseca + */ + + +#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */ +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "softpipe/sp_winsys.h" + + +struct st_softpipe_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; + void *mapped; +}; + + +/** Cast wrapper */ +static INLINE struct st_softpipe_buffer * +st_softpipe_buffer( struct pipe_buffer *buf ) +{ + return (struct st_softpipe_buffer *)buf; +} + + +static void * +st_softpipe_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = st_softpipe_buf->data; + return st_softpipe_buf->mapped; +} + + +static void +st_softpipe_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = NULL; +} + + +static void +st_softpipe_buffer_destroy(struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); + + if (oldBuf->data) { + if (!oldBuf->userBuffer) + align_free(oldBuf->data); + + oldBuf->data = NULL; + } + + FREE(oldBuf); +} + + +static void +st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ +} + + + +static const char * +st_softpipe_get_name(struct pipe_winsys *winsys) +{ + return "softpipe"; +} + + +static struct pipe_buffer * +st_softpipe_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + buffer->data = align_malloc(size, alignment); + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +st_softpipe_user_buffer_create(struct pipe_winsys *winsys, + void *ptr, + unsigned bytes) +{ + struct st_softpipe_buffer *buffer; + + buffer = CALLOC_STRUCT(st_softpipe_buffer); + if(!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +static struct pipe_buffer * +st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned tex_usage, + unsigned *stride) +{ + const unsigned alignment = 64; + unsigned nblocksy; + + nblocksy = util_format_get_nblocksy(format, height); + *stride = align(util_format_get_stride(format, width), alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); +} + + +static void +st_softpipe_fence_reference(struct pipe_winsys *winsys, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +st_softpipe_fence_signalled(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +st_softpipe_fence_finish(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static void +st_softpipe_destroy(struct pipe_winsys *winsys) +{ + FREE(winsys); +} + + +struct pipe_screen * +softpipe_create_screen_malloc(void) +{ + static struct pipe_winsys *winsys; + struct pipe_screen *screen; + + winsys = CALLOC_STRUCT(pipe_winsys); + if(!winsys) + return NULL; + + winsys->destroy = st_softpipe_destroy; + + winsys->buffer_create = st_softpipe_buffer_create; + winsys->user_buffer_create = st_softpipe_user_buffer_create; + winsys->buffer_map = st_softpipe_buffer_map; + winsys->buffer_unmap = st_softpipe_buffer_unmap; + winsys->buffer_destroy = st_softpipe_buffer_destroy; + + winsys->surface_buffer_create = st_softpipe_surface_buffer_create; + + winsys->fence_reference = st_softpipe_fence_reference; + winsys->fence_signalled = st_softpipe_fence_signalled; + winsys->fence_finish = st_softpipe_fence_finish; + + winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; + winsys->get_name = st_softpipe_get_name; + + screen = softpipe_create_screen(winsys); + if(!screen) + st_softpipe_destroy(winsys); + + return screen; +} diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index f203ded29ee..3042e01a05c 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -49,10 +49,17 @@ struct pipe_buffer; struct pipe_context *softpipe_create( struct pipe_screen * ); +/** + * Create a softpipe screen that uses the + * given winsys for allocating buffers. + */ +struct pipe_screen *softpipe_create_screen( struct pipe_winsys * ); -struct pipe_screen * -softpipe_create_screen(struct pipe_winsys *); - +/** + * Create a softpipe screen that uses + * regular malloc to create all its buffers. + */ +struct pipe_screen *softpipe_create_screen_malloc(void); boolean softpipe_get_texture_buffer( struct pipe_texture *texture, diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c index 10e7a121892..ca2c7c49d72 100644 --- a/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -49,7 +49,7 @@ struct svga_constbuf static void svga_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + struct pipe_buffer *buf) { struct svga_context *svga = svga_context(pipe); @@ -57,7 +57,7 @@ static void svga_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); pipe_buffer_reference( &svga->curr.cb[shader], - buf->buffer ); + buf ); if (shader == PIPE_SHADER_FRAGMENT) svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index d29f3762d2b..ec2886348b9 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -81,8 +81,10 @@ static enum pipe_error compile_fs( struct svga_context *svga, } result->id = util_bitmask_add(svga->fs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index ae1e77e7d44..e7e6c084321 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -71,7 +71,7 @@ static enum pipe_error compile_vs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret = PIPE_OK; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_vertex_program( vs, key ); if (result == NULL) { @@ -80,8 +80,10 @@ static enum pipe_error compile_vs( struct svga_context *svga, } result->id = util_bitmask_add(svga->vs_bm); - if(result->id == UTIL_BITMASK_INVALID_INDEX) + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto fail; + } ret = SVGA3D_DefineShader(svga->swc, result->id, @@ -200,10 +202,12 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; key.element[0].input_offset = vel->src_offset; + key.element[0].instance_divisor = vel->instance_divisor; key.element[0].output_offset = const_idx * 4 * sizeof(float); translate_key_sanitize(&key); @@ -222,7 +226,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, vbuffer->stride); - translate->run(translate, 0, 1, + translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); pipe_buffer_unmap(svga->pipe.screen, diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 075e4f9a0b2..5a9f0fc6901 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -812,13 +812,13 @@ trace_context_set_clip_state(struct pipe_context *_pipe, static INLINE void trace_context_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, - const struct pipe_constant_buffer *buffer) + struct pipe_buffer *buffer) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; if (buffer) - trace_screen_user_buffer_update(_pipe->screen, buffer->buffer); + trace_screen_user_buffer_update(_pipe->screen, buffer); trace_dump_call_begin("pipe_context", "set_constant_buffer"); @@ -827,10 +827,11 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, trace_dump_arg(uint, index); trace_dump_arg(constant_buffer, buffer); + /* XXX hmm? */ if (buffer) { - struct pipe_constant_buffer _buffer; - _buffer.buffer = trace_buffer_unwrap(tr_ctx, buffer->buffer); - pipe->set_constant_buffer(pipe, shader, index, &_buffer); + struct pipe_buffer *_buffer; + _buffer = trace_buffer_unwrap(tr_ctx, buffer); + pipe->set_constant_buffer(pipe, shader, index, _buffer); } else { pipe->set_constant_buffer(pipe, shader, index, buffer); } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 86237e03bcc..32f61f8c944 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -229,7 +229,7 @@ void trace_dump_clip_state(const struct pipe_clip_state *state) } -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +void trace_dump_constant_buffer(const struct pipe_buffer *state) { if (!trace_dumping_enabled_locked()) return; @@ -241,7 +241,7 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) trace_dump_struct_begin("pipe_constant_buffer"); - trace_dump_member(buffer_ptr, state, buffer); + trace_dump_reference(&state->reference); trace_dump_struct_end(); } diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 07ad6fbb205..c7860fd6e18 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -47,7 +47,7 @@ void trace_dump_scissor_state(const struct pipe_scissor_state *state); void trace_dump_clip_state(const struct pipe_clip_state *state); -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); +void trace_dump_constant_buffer(const struct pipe_buffer *state); void trace_dump_token(const struct tgsi_token *token); diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 26a940593f0..272d0308cc1 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -104,7 +104,8 @@ typedef unsigned char boolean; /* Function visibility */ #ifndef PUBLIC -# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +# if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PUBLIC __attribute__((visibility("default"))) # else # define PUBLIC @@ -139,22 +140,40 @@ typedef unsigned char boolean; -#if defined(__GNUC__) -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) -#define ALIGN16_ASSIGN(NAME) NAME##___aligned -#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) -#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) +/* Macros for data alignment. */ +#if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */ +#define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment))) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */ +#define PIPE_ALIGN_VAR(_alignment) __attribute__((aligned(_alignment))) + #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) -#define ALIGN_STACK __attribute__((force_align_arg_pointer)) +#define PIPE_ALIGN_STACK __attribute__((force_align_arg_pointer)) #else -#define ALIGN_STACK +#define PIPE_ALIGN_STACK #endif + +#elif defined(_MSC_VER) + +/* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */ +#define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type +#define PIPE_ALIGN_VAR(_alignment) __declspec(align(_alignment)) + +#define PIPE_ALIGN_STACK + +#elif defined(SWIG) + +#define PIPE_ALIGN_TYPE(_alignment, _type) _type +#define PIPE_ALIGN_VAR(_alignment) + +#define PIPE_ALIGN_STACK + #else -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] -#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) -#define ALIGN16_ATTRIB -#define ALIGN8_ATTRIB -#define ALIGN_STACK + +#error "Unsupported compiler" + #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index d2f8085b421..0b8f6da2f4a 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -69,6 +69,22 @@ struct pipe_context { unsigned indexSize, unsigned mode, unsigned start, unsigned count); + void (*draw_arrays_instanced)(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + void (*draw_elements_instanced)(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. * Using this to quickly evaluate a specialized path in the draw @@ -170,7 +186,7 @@ struct pipe_context { void (*set_constant_buffer)( struct pipe_context *, uint shader, uint index, - const struct pipe_constant_buffer *buf ); + struct pipe_buffer *buf ); void (*set_framebuffer_state)( struct pipe_context *, const struct pipe_framebuffer_state * ); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 550e2abc32a..b489b044667 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -121,17 +121,18 @@ struct tgsi_declaration_range unsigned Last : 16; /**< UINT */ }; -#define TGSI_SEMANTIC_POSITION 0 -#define TGSI_SEMANTIC_COLOR 1 -#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define TGSI_SEMANTIC_FOG 3 -#define TGSI_SEMANTIC_PSIZE 4 -#define TGSI_SEMANTIC_GENERIC 5 -#define TGSI_SEMANTIC_NORMAL 6 -#define TGSI_SEMANTIC_FACE 7 -#define TGSI_SEMANTIC_EDGEFLAG 8 -#define TGSI_SEMANTIC_PRIMID 9 -#define TGSI_SEMANTIC_COUNT 10 /**< number of semantic values */ +#define TGSI_SEMANTIC_POSITION 0 +#define TGSI_SEMANTIC_COLOR 1 +#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define TGSI_SEMANTIC_FOG 3 +#define TGSI_SEMANTIC_PSIZE 4 +#define TGSI_SEMANTIC_GENERIC 5 +#define TGSI_SEMANTIC_NORMAL 6 +#define TGSI_SEMANTIC_FACE 7 +#define TGSI_SEMANTIC_EDGEFLAG 8 +#define TGSI_SEMANTIC_PRIMID 9 +#define TGSI_SEMANTIC_INSTANCEID 10 +#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ struct tgsi_declaration_semantic { diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 60e96b98deb..fdd29ed4492 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -66,10 +66,6 @@ extern "C" { #define PIPE_MAX_TEXTURE_LEVELS 16 -/* fwd decls */ -struct pipe_surface; - - /** * The driver will certainly subclass this to include actual memory * management information. @@ -178,15 +174,6 @@ struct pipe_clip_state }; -/** - * Constants for vertex/fragment shaders - */ -struct pipe_constant_buffer -{ - struct pipe_buffer *buffer; -}; - - struct pipe_shader_state { const struct tgsi_token *tokens; @@ -376,6 +363,11 @@ struct pipe_vertex_element /** Offset of this attribute, in bytes, from the start of the vertex */ unsigned src_offset; + /** Instance data rate divisor. 0 means this is per-vertex data, + * n means per-instance data used for n consecutive instances (n > 0). + */ + unsigned instance_divisor; + /** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does * this attribute live in? */ diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index f131e77ac5e..0fdfa96b35a 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -180,6 +180,7 @@ dri_get_buffers(__DRIdrawable * dPriv) switch (buffers[i].attachment) { case __DRI_BUFFER_FRONT_LEFT: + continue; case __DRI_BUFFER_FAKE_FRONT_LEFT: index = ST_SURFACE_FRONT_LEFT; format = drawable->color_format; @@ -372,6 +373,7 @@ dri_create_buffer(__DRIscreen * sPriv, /* TODO incase of double buffer visual, delay fake creation */ i = 0; drawable->attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + drawable->attachments[i++] = __DRI_BUFFER_FAKE_FRONT_LEFT; if (visual->doubleBufferMode) drawable->attachments[i++] = __DRI_BUFFER_BACK_LEFT; diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 793db087ee1..d8c054313b3 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -83,7 +83,7 @@ dri_fill_in_modes(struct dri_screen *screen, unsigned num_modes; uint8_t depth_bits_array[5]; uint8_t stencil_bits_array[5]; - uint8_t msaa_samples_array[1]; + uint8_t msaa_samples_array[2]; unsigned depth_buffer_factor; unsigned back_buffer_factor; unsigned msaa_samples_factor; @@ -147,8 +147,9 @@ dri_fill_in_modes(struct dri_screen *screen, } msaa_samples_array[0] = 0; + msaa_samples_array[1] = 4; back_buffer_factor = 3; - msaa_samples_factor = 1; + msaa_samples_factor = 2; num_modes = depth_buffer_factor * back_buffer_factor * msaa_samples_factor * 4; @@ -158,7 +159,7 @@ dri_fill_in_modes(struct dri_screen *screen, depth_bits_array, stencil_bits_array, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, msaa_samples_factor); } else { __DRIconfig **configs_a8r8g8b8 = NULL; __DRIconfig **configs_x8r8g8b8 = NULL; @@ -170,7 +171,8 @@ dri_fill_in_modes(struct dri_screen *screen, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, + msaa_samples_factor); if (pf_x8r8g8b8) configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV, depth_bits_array, @@ -178,7 +180,8 @@ dri_fill_in_modes(struct dri_screen *screen, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, + msaa_samples_factor); if (configs_a8r8g8b8 && configs_x8r8g8b8) configs = driConcatConfigs(configs_x8r8g8b8, configs_a8r8g8b8); diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c index 8b2e3b5f85b..042e9518c2d 100644 --- a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.c @@ -24,7 +24,9 @@ #include <assert.h> #include <string.h> +#include "pipe/p_screen.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "egldriver.h" #include "eglcurrent.h" #include "eglconfigutil.h" @@ -43,8 +45,13 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) struct egl_g3d_display *gdpy = egl_g3d_display(dpy); struct pipe_screen *screen = gdpy->native->screen; struct egl_g3d_context *gctx = egl_g3d_context(ctx); - EGLint num_surfaces; - EGLint s, i; + const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = { + ST_SURFACE_FRONT_LEFT, + ST_SURFACE_BACK_LEFT, + ST_SURFACE_FRONT_RIGHT, + ST_SURFACE_BACK_RIGHT, + }; + EGLint num_surfaces, s; /* validate draw and/or read buffers */ num_surfaces = (gctx->base.ReadSurface == gctx->base.DrawSurface) ? 1 : 2; @@ -52,6 +59,7 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; struct egl_g3d_surface *gsurf; struct egl_g3d_buffer *gbuf; + EGLint att; if (s == 0) { gsurf = egl_g3d_surface(gctx->base.DrawSurface); @@ -63,35 +71,33 @@ egl_g3d_validate_context(_EGLDisplay *dpy, _EGLContext *ctx) } if (!gctx->force_validate) { - EGLint cur_w, cur_h; - - cur_w = gsurf->base.Width; - cur_h = gsurf->base.Height; - gsurf->native->validate(gsurf->native, - gbuf->native_atts, gbuf->num_atts, - NULL, - &gsurf->base.Width, &gsurf->base.Height); - /* validate only when the geometry changed */ - if (gsurf->base.Width == cur_w && gsurf->base.Height == cur_h) + unsigned int seq_num; + + gsurf->native->validate(gsurf->native, gbuf->attachment_mask, + &seq_num, NULL, NULL, NULL); + /* skip validation */ + if (gsurf->sequence_number == seq_num) continue; } - gsurf->native->validate(gsurf->native, - gbuf->native_atts, gbuf->num_atts, - (struct pipe_texture **) textures, + pipe_surface_reference(&gsurf->render_surface, NULL); + memset(textures, 0, sizeof(textures)); + + gsurf->native->validate(gsurf->native, gbuf->attachment_mask, + &gsurf->sequence_number, textures, &gsurf->base.Width, &gsurf->base.Height); - for (i = 0; i < gbuf->num_atts; i++) { - struct pipe_texture *pt = textures[i]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct pipe_texture *pt = textures[att]; struct pipe_surface *ps; - if (pt) { + if (native_attachment_mask_test(gbuf->attachment_mask, att) && pt) { ps = screen->get_tex_surface(screen, pt, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE); gctx->stapi->st_set_framebuffer_surface(gbuf->st_fb, - gbuf->st_atts[i], ps); + st_att_map[att], ps); - if (gbuf->native_atts[i] == gsurf->render_att) + if (gsurf->render_att == att) pipe_surface_reference(&gsurf->render_surface, ps); pipe_surface_reference(&ps, NULL); @@ -130,13 +136,7 @@ static void egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx) { struct egl_g3d_context *gctx = egl_g3d_context(ctx); - const uint st_att_map[NUM_NATIVE_ATTACHMENTS] = { - ST_SURFACE_FRONT_LEFT, - ST_SURFACE_BACK_LEFT, - ST_SURFACE_FRONT_RIGHT, - ST_SURFACE_BACK_RIGHT, - }; - EGLint s, i; + EGLint s; /* route draw and read buffers' attachments */ for (s = 0; s < 2; s++) { @@ -152,11 +152,7 @@ egl_g3d_route_context(_EGLDisplay *dpy, _EGLContext *ctx) gbuf = &gctx->read; } - gbuf->native_atts[0] = gsurf->render_att; - gbuf->num_atts = 1; - - for (i = 0; i < gbuf->num_atts; i++) - gbuf->st_atts[i] = st_att_map[gbuf->native_atts[i]]; + gbuf->attachment_mask = (1 << gsurf->render_att); /* FIXME OpenGL defaults to draw the front or back buffer when the * context is single-buffered or double-buffered respectively. In EGL, @@ -198,19 +194,19 @@ egl_g3d_realloc_context(_EGLDisplay *dpy, _EGLContext *ctx) if (!gdraw || priv != (void *) &gdraw->base) { gctx->stapi->st_unreference_framebuffer(gctx->draw.st_fb); gctx->draw.st_fb = NULL; - gctx->draw.num_atts = 0; + gctx->draw.attachment_mask = 0x0; } if (is_equal) { gctx->read.st_fb = NULL; - gctx->draw.num_atts = 0; + gctx->draw.attachment_mask = 0x0; } else { priv = gctx->stapi->st_framebuffer_private(gctx->read.st_fb); if (!gread || priv != (void *) &gread->base) { gctx->stapi->st_unreference_framebuffer(gctx->read.st_fb); gctx->read.st_fb = NULL; - gctx->draw.num_atts = 0; + gctx->draw.attachment_mask = 0x0; } } } @@ -459,16 +455,30 @@ egl_g3d_add_configs(_EGLDriver *drv, _EGLDisplay *dpy, EGLint id) * Flush the front buffer of the context's draw surface. */ static void -egl_g3d_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) +egl_g3d_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, void *context_private) { struct egl_g3d_context *gctx = egl_g3d_context(context_private); struct egl_g3d_surface *gsurf = egl_g3d_surface(gctx->base.DrawSurface); - if (gsurf) { + if (gsurf) gsurf->native->flush_frontbuffer(gsurf->native); - egl_g3d_validate_context(gctx->base.Display, &gctx->base); - } +} + +/** + * Re-validate the context. + */ +static void +egl_g3d_update_buffer(struct pipe_screen *screen, void *context_private) +{ + struct egl_g3d_context *gctx = egl_g3d_context(context_private); + + /** + * It is likely that the surface has changed when this function is called. + * Set force_validate to skip an unnecessary check. + */ + gctx->force_validate = EGL_TRUE; + egl_g3d_validate_context(gctx->base.Display, &gctx->base); } static EGLBoolean @@ -512,13 +522,15 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, } dpy->DriverData = gdpy; - gdpy->native = - native_create_display(dpy->NativeDisplay, egl_g3d_flush_frontbuffer); + gdpy->native = native_create_display(dpy->NativeDisplay); if (!gdpy->native) { _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); goto fail; } + gdpy->native->screen->flush_frontbuffer = egl_g3d_flush_frontbuffer; + gdpy->native->screen->update_buffer = egl_g3d_update_buffer; + dpy->ClientAPIsMask = gdrv->api_mask; if (egl_g3d_add_configs(drv, dpy, 1) == 1) { @@ -550,7 +562,7 @@ egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, _EGLContext *share, const EGLint *attribs) { struct egl_g3d_display *gdpy = egl_g3d_display(dpy); - struct egl_g3d_context *xshare = egl_g3d_context(share); + struct egl_g3d_context *gshare = egl_g3d_context(share); struct egl_g3d_config *gconf = egl_g3d_config(conf); struct egl_g3d_context *gctx; const __GLcontextModes *mode; @@ -575,8 +587,18 @@ egl_g3d_create_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, mode = &gconf->native->mode; gctx->pipe = gdpy->native->create_context(gdpy->native, (void *) &gctx->base); + if (!gctx->pipe) { + free(gctx); + return NULL; + } + gctx->st_ctx = gctx->stapi->st_create_context(gctx->pipe, mode, - (xshare) ? xshare->st_ctx : NULL); + (gshare) ? gshare->st_ctx : NULL); + if (!gctx->st_ctx) { + gctx->pipe->destroy(gctx->pipe); + free(gctx); + return NULL; + } return &gctx->base; } @@ -599,6 +621,16 @@ egl_g3d_destroy_context(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) return EGL_TRUE; } +static EGLBoolean +init_surface_geometry(_EGLSurface *surf) +{ + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + + return gsurf->native->validate(gsurf->native, 0x0, + &gsurf->sequence_number, NULL, + &gsurf->base.Width, &gsurf->base.Height); +} + static _EGLSurface * egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLConfig *conf, EGLNativeWindowType win, @@ -626,8 +658,7 @@ egl_g3d_create_window_surface(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } - if (!gsurf->native->validate(gsurf->native, NULL, 0, NULL, - &gsurf->base.Width, &gsurf->base.Height)) { + if (!init_surface_geometry(&gsurf->base)) { gsurf->native->destroy(gsurf->native); free(gsurf); return NULL; @@ -667,8 +698,7 @@ egl_g3d_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } - if (!gsurf->native->validate(gsurf->native, NULL, 0, NULL, - &gsurf->base.Width, &gsurf->base.Height)) { + if (!init_surface_geometry(&gsurf->base)) { gsurf->native->destroy(gsurf->native); free(gsurf); return NULL; @@ -706,6 +736,12 @@ egl_g3d_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } + if (!init_surface_geometry(&gsurf->base)) { + gsurf->native->destroy(gsurf->native); + free(gsurf); + return NULL; + } + gsurf->render_att = (!gconf->native->mode.doubleBufferMode) ? NATIVE_ATTACHMENT_FRONT_LEFT : NATIVE_ATTACHMENT_BACK_LEFT; @@ -817,14 +853,110 @@ egl_g3d_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf) struct egl_g3d_config *gconf = egl_g3d_config(gsurf->base.Config); /* force validation if the swap method is not copy */ - if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) + if (gconf->native->mode.swapMethod != GLX_SWAP_COPY_OML) { gctx->force_validate = EGL_TRUE; - egl_g3d_validate_context(dpy, &gctx->base); + egl_g3d_validate_context(dpy, &gctx->base); + } } return EGL_TRUE; } +/** + * Find a config that supports the pixmap. + */ +static _EGLConfig * +find_pixmap_config(_EGLDisplay *dpy, EGLNativePixmapType pix) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_config *gconf; + EGLint i; + + for (i = 0; i < dpy->NumConfigs; i++) { + gconf = egl_g3d_config(dpy->Configs[i]); + if (gdpy->native->is_pixmap_supported(gdpy->native, pix, gconf->native)) + break; + } + + return (i < dpy->NumConfigs) ? &gconf->base : NULL; +} + +/** + * Get the pipe surface of the given attachment of the native surface. + */ +static struct pipe_surface * +get_pipe_surface(struct native_display *ndpy, struct native_surface *nsurf, + enum native_attachment natt) +{ + struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + struct pipe_surface *psurf; + + textures[natt] = NULL; + nsurf->validate(nsurf, 1 << natt, NULL, textures, NULL, NULL); + if (!textures[natt]) + return NULL; + + psurf = ndpy->screen->get_tex_surface(ndpy->screen, textures[natt], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_texture_reference(&textures[natt], NULL); + + return psurf; +} + +static EGLBoolean +egl_g3d_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, + NativePixmapType target) +{ + struct egl_g3d_display *gdpy = egl_g3d_display(dpy); + struct egl_g3d_surface *gsurf = egl_g3d_surface(surf); + _EGLContext *ctx = _eglGetCurrentContext(); + struct egl_g3d_config *gconf; + struct native_surface *nsurf; + struct pipe_screen *screen = gdpy->native->screen; + struct pipe_surface *psurf; + + if (!gsurf->render_surface) + return EGL_TRUE; + + gconf = egl_g3d_config(find_pixmap_config(dpy, target)); + if (!gconf) + return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); + + nsurf = gdpy->native->create_pixmap_surface(gdpy->native, + target, gconf->native); + if (!nsurf) + return _eglError(EGL_BAD_NATIVE_PIXMAP, "eglCopyBuffers"); + + /* flush if the surface is current */ + if (ctx && ctx->DrawSurface == &gsurf->base) { + struct egl_g3d_context *gctx = egl_g3d_context(ctx); + gctx->stapi->st_flush(gctx->st_ctx, + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, NULL); + } + + psurf = get_pipe_surface(gdpy->native, nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); + if (psurf) { + struct pipe_context pipe; + + /** + * XXX This is hacky. If we might allow the EGLDisplay to create a pipe + * context of its own and use the blitter context for this. + */ + memset(&pipe, 0, sizeof(pipe)); + pipe.screen = screen; + + util_surface_copy(&pipe, FALSE, psurf, 0, 0, + gsurf->render_surface, 0, 0, psurf->width, psurf->height); + + pipe_surface_reference(&psurf, NULL); + nsurf->flush_frontbuffer(nsurf); + } + + nsurf->destroy(nsurf); + + return EGL_TRUE; +} + static EGLBoolean egl_g3d_wait_client(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx) { @@ -849,15 +981,14 @@ egl_g3d_wait_native(_EGLDriver *drv, _EGLDisplay *dpy, EGLint engine) } static _EGLProc -egl_g3d_get_proc_address(const char *procname) +egl_g3d_get_proc_address(_EGLDriver *drv, const char *procname) { - /* FIXME how come _EGLDriver is not passed? */ - const struct egl_g3d_st *stapi; + struct egl_g3d_driver *gdrv = egl_g3d_driver(drv); _EGLProc proc; EGLint i; for (i = 0; i < NUM_EGL_G3D_STS; i++) { - stapi = egl_g3d_get_st(i); + const struct egl_g3d_st *stapi = gdrv->stapis[i]; if (stapi) { proc = (_EGLProc) stapi->st_get_proc_address(procname); if (proc) @@ -1079,6 +1210,7 @@ _eglMain(const char *args) gdrv->base.API.DestroySurface = egl_g3d_destroy_surface; gdrv->base.API.MakeCurrent = egl_g3d_make_current; gdrv->base.API.SwapBuffers = egl_g3d_swap_buffers; + gdrv->base.API.CopyBuffers = egl_g3d_copy_buffers; gdrv->base.API.WaitClient = egl_g3d_wait_client; gdrv->base.API.WaitNative = egl_g3d_wait_native; gdrv->base.API.GetProcAddress = egl_g3d_get_proc_address; diff --git a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h index 33894b614f9..1da8af495b1 100644 --- a/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h +++ b/src/gallium/state_trackers/egl_g3d/common/egl_g3d.h @@ -52,9 +52,7 @@ struct egl_g3d_display { struct egl_g3d_buffer { struct st_framebuffer *st_fb; - EGLint num_atts; - enum native_attachment native_atts[NUM_NATIVE_ATTACHMENTS]; - uint st_atts[NUM_NATIVE_ATTACHMENTS]; + uint attachment_mask; }; struct egl_g3d_context { @@ -73,6 +71,7 @@ struct egl_g3d_surface { struct native_surface *native; enum native_attachment render_att; struct pipe_surface *render_surface; + unsigned int sequence_number; }; struct egl_g3d_config { diff --git a/src/gallium/state_trackers/egl_g3d/common/native.h b/src/gallium/state_trackers/egl_g3d/common/native.h index 88d87c6f3ae..f374f2e4a68 100644 --- a/src/gallium/state_trackers/egl_g3d/common/native.h +++ b/src/gallium/state_trackers/egl_g3d/common/native.h @@ -64,13 +64,19 @@ struct native_surface { boolean (*flush_frontbuffer)(struct native_surface *nsurf); /** - * Validate the buffers of the surface. Those not listed in the attachments - * will be destroyed. The returned textures are owned by the caller. + * Validate the buffers of the surface. textures, if not NULL, points to an + * array of size NUM_NATIVE_ATTACHMENTS and the returned textures are owned + * by the caller. A sequence number is also returned. The caller can use + * it to check if anything has changed since the last call. Any of the + * pointers may be NULL and it indicates the caller has no interest in those + * values. + * + * If this function is called multiple times with different attachment + * masks, those not listed in the latest call might be destroyed. This + * behavior might change in the future. */ - boolean (*validate)(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, + boolean (*validate)(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, int *width, int *height); /** @@ -80,6 +86,7 @@ struct native_surface { }; struct native_config { + /* __GLcontextModes should go away some day */ __GLcontextModes mode; enum pipe_format color_format; enum pipe_format depth_format; @@ -107,7 +114,14 @@ struct native_display_modeset; * the native display server. */ struct native_display { + /** + * The pipe screen of the native display. + * + * Note that the "flush_frontbuffer" and "update_buffer" callbacks will be + * overridden. + */ struct pipe_screen *screen; + void (*destroy)(struct native_display *ndpy); /** @@ -122,6 +136,17 @@ struct native_display { int *num_configs); /** + * Test if a pixmap is supported by the given config. Required unless no + * config has GLX_PIXMAP_BIT set. + * + * This function is usually called to find a config that supports a given + * pixmap. Thus, it is usually called with the same pixmap in a row. + */ + boolean (*is_pixmap_supported)(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf); + + /** * Create a pipe context. */ struct pipe_context *(*create_context)(struct native_display *ndpy, @@ -197,15 +222,19 @@ struct native_display_modeset { const struct native_mode *nmode); }; -typedef void (*native_flush_frontbuffer)(void *dummy, - struct pipe_surface *surf, - void *context_private); +/** + * Test whether an attachment is set in the mask. + */ +static INLINE boolean +native_attachment_mask_test(uint mask, enum native_attachment att) +{ + return !!(mask & (1 << att)); +} const char * native_get_name(void); struct native_display * -native_create_display(EGLNativeDisplayType dpy, - native_flush_frontbuffer flush_frontbuffer); +native_create_display(EGLNativeDisplayType dpy); #endif /* _NATIVE_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.c b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c index 0e0babdb14f..dc664366305 100644 --- a/src/gallium/state_trackers/egl_g3d/kms/native_kms.c +++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.c @@ -33,22 +33,17 @@ #include "native_kms.h" static boolean -kms_surface_validate(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, +kms_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, int *width, int *height) { struct kms_surface *ksurf = kms_surface(nsurf); struct kms_display *kdpy = ksurf->kdpy; struct pipe_screen *screen = kdpy->base.screen; struct pipe_texture templ, *ptex; - int i; - - if (num_natts) { - if (textures) - memset(textures, 0, sizeof(*textures) * num_natts); + int att; + if (attachment_mask) { memset(&templ, 0, sizeof(templ)); templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; @@ -62,19 +57,25 @@ kms_surface_validate(struct native_surface *nsurf, } /* create textures */ - for (i = 0; i < num_natts; i++) { - enum native_attachment natt = natts[i]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) + continue; - ptex = ksurf->textures[natt]; + ptex = ksurf->textures[att]; if (!ptex) { ptex = screen->texture_create(screen, &templ); - ksurf->textures[natt] = ptex; + ksurf->textures[att] = ptex; } - if (textures) - pipe_texture_reference(&textures[i], ptex); + if (textures) { + textures[att] = NULL; + pipe_texture_reference(&textures[att], ptex); + } } + if (seq_num) + *seq_num = ksurf->sequence_number; if (width) *width = ksurf->width; if (height) @@ -111,7 +112,7 @@ kms_surface_init_framebuffers(struct native_surface *nsurf, boolean need_back) if (!fb->texture) { /* make sure the texture has been allocated */ - kms_surface_validate(&ksurf->base, &natt, 1, NULL, NULL, NULL); + kms_surface_validate(&ksurf->base, 1 << natt, NULL, NULL, NULL, NULL); if (!ksurf->textures[natt]) return FALSE; @@ -196,6 +197,9 @@ kms_surface_swap_buffers(struct native_surface *nsurf) ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT]; ksurf->textures[NATIVE_ATTACHMENT_BACK_LEFT] = tmp_texture; + /* the front/back textures are swapped */ + ksurf->sequence_number++; + return TRUE; } @@ -764,8 +768,7 @@ static struct native_display_modeset kms_display_modeset = { }; static struct native_display * -kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api, - native_flush_frontbuffer flush_frontbuffer) +kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api) { struct kms_display *kdpy; @@ -807,10 +810,6 @@ kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api, return NULL; } - kdpy->base.screen->flush_frontbuffer = - (void (*)(struct pipe_screen *, struct pipe_surface *, void *)) - flush_frontbuffer; - kdpy->base.destroy = kms_display_destroy; kdpy->base.get_configs = kms_display_get_configs; kdpy->base.create_context = kms_display_create_context; @@ -821,13 +820,6 @@ kms_create_display(EGLNativeDisplayType dpy, struct drm_api *api, return &kdpy->base; } -static void -dummy_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) -{ - _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied"); -} - /* the api is destroyed with the native display */ static struct drm_api *drm_api; @@ -848,19 +840,15 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - native_flush_frontbuffer flush_frontbuffer) +native_create_display(EGLNativeDisplayType dpy) { struct native_display *ndpy = NULL; if (!drm_api) drm_api = drm_api_create(); - if (!flush_frontbuffer) - flush_frontbuffer = dummy_flush_frontbuffer; - if (drm_api) - ndpy = kms_create_display(dpy, drm_api, flush_frontbuffer); + ndpy = kms_create_display(dpy, drm_api); return ndpy; } diff --git a/src/gallium/state_trackers/egl_g3d/kms/native_kms.h b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h index 3f869b25acf..095186e3cf3 100644 --- a/src/gallium/state_trackers/egl_g3d/kms/native_kms.h +++ b/src/gallium/state_trackers/egl_g3d/kms/native_kms.h @@ -81,6 +81,7 @@ struct kms_surface { int width, height; struct pipe_texture *textures[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; struct kms_framebuffer front_fb, back_fb; boolean is_shown; diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c index 0dda786bbdb..07f82d878c5 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl_g3d/x11/native_dri2.c @@ -64,6 +64,7 @@ struct dri2_surface { struct pipe_texture *pbuffer_textures[NUM_NATIVE_ATTACHMENTS]; boolean have_back, have_fake; int width, height; + unsigned int sequence_number; }; struct dri2_config { @@ -133,21 +134,18 @@ dri2_surface_swap_buffers(struct native_surface *nsurf) } static boolean -dri2_surface_validate(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, - int *width, int *height) +dri2_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) { struct dri2_surface *dri2surf = dri2_surface(nsurf); struct dri2_display *dri2dpy = dri2surf->dri2dpy; unsigned int dri2atts[NUM_NATIVE_ATTACHMENTS]; - EGLint texture_indices[NUM_NATIVE_ATTACHMENTS]; struct pipe_texture templ; struct x11_drawable_buffer *xbufs; - int num_ins, num_outs, i; + int num_ins, num_outs, att, i; - if (num_natts) { + if (attachment_mask) { memset(&templ, 0, sizeof(templ)); templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; @@ -158,26 +156,31 @@ dri2_surface_validate(struct native_surface *nsurf, templ.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; if (textures) - memset(textures, 0, sizeof(*textures) * num_natts); + memset(textures, 0, sizeof(*textures) * NUM_NATIVE_ATTACHMENTS); } /* create textures for pbuffer */ if (dri2surf->type == DRI2_SURFACE_TYPE_PBUFFER) { struct pipe_screen *screen = dri2dpy->base.screen; - for (i = 0; i < num_natts; i++) { - enum native_attachment natt = natts[i]; - struct pipe_texture *ptex = dri2surf->pbuffer_textures[natt]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct pipe_texture *ptex = dri2surf->pbuffer_textures[att]; + + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) + continue; if (!ptex) { ptex = screen->texture_create(screen, &templ); - dri2surf->pbuffer_textures[natt] = ptex; + dri2surf->pbuffer_textures[att] = ptex; } if (textures) - pipe_texture_reference(&textures[i], ptex); + pipe_texture_reference(&textures[att], ptex); } + if (seq_num) + *seq_num = dri2surf->sequence_number; if (width) *width = dri2surf->width; if (height) @@ -186,48 +189,56 @@ dri2_surface_validate(struct native_surface *nsurf, return TRUE; } - for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) - texture_indices[i] = -1; - /* prepare the attachments */ - num_ins = num_natts; - for (i = 0; i < num_natts; i++) { - unsigned int dri2att; + num_ins = 0; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + if (native_attachment_mask_test(attachment_mask, att)) { + unsigned int dri2att; + + switch (att) { + case NATIVE_ATTACHMENT_FRONT_LEFT: + dri2att = DRI2BufferFrontLeft; + break; + case NATIVE_ATTACHMENT_BACK_LEFT: + dri2att = DRI2BufferBackLeft; + break; + case NATIVE_ATTACHMENT_FRONT_RIGHT: + dri2att = DRI2BufferFrontRight; + break; + case NATIVE_ATTACHMENT_BACK_RIGHT: + dri2att = DRI2BufferBackRight; + break; + default: + assert(0); + dri2att = 0; + break; + } - switch (natts[i]) { - case NATIVE_ATTACHMENT_FRONT_LEFT: - dri2att = DRI2BufferFrontLeft; - break; - case NATIVE_ATTACHMENT_BACK_LEFT: - dri2att = DRI2BufferBackLeft; - break; - case NATIVE_ATTACHMENT_FRONT_RIGHT: - dri2att = DRI2BufferFrontRight; - break; - case NATIVE_ATTACHMENT_BACK_RIGHT: - dri2att = DRI2BufferBackRight; - break; - default: - assert(0); - dri2att = 0; - break; + dri2atts[num_ins] = dri2att; + num_ins++; } - dri2atts[i] = dri2att; - texture_indices[natts[i]] = i; } dri2surf->have_back = FALSE; dri2surf->have_fake = FALSE; + /* remember old geometry */ + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + xbufs = x11_drawable_get_buffers(dri2dpy->xscr, dri2surf->drawable, &dri2surf->width, &dri2surf->height, dri2atts, FALSE, num_ins, &num_outs); if (!xbufs) return FALSE; - /* update width and height */ - templ.width0 = dri2surf->width; - templ.height0 = dri2surf->height; + if (templ.width0 != dri2surf->width || templ.height0 != dri2surf->height) { + /* are there cases where the buffers change and the geometry doesn't? */ + dri2surf->sequence_number++; + + templ.width0 = dri2surf->width; + templ.height0 = dri2surf->height; + } for (i = 0; i < num_outs; i++) { struct x11_drawable_buffer *xbuf = &xbufs[i]; @@ -254,13 +265,13 @@ dri2_surface_validate(struct native_surface *nsurf, break; } - if (!desc || texture_indices[natt] < 0 || - (textures && textures[texture_indices[natt]])) { + if (!desc || !native_attachment_mask_test(attachment_mask, natt) || + (textures && textures[natt])) { if (!desc) _eglLog(_EGL_WARNING, "unknown buffer %d", xbuf->attachment); - else if (texture_indices[natt] < 0) + else if (!native_attachment_mask_test(attachment_mask, natt)) _eglLog(_EGL_WARNING, "unexpected buffer %d", xbuf->attachment); - else if (textures && textures[texture_indices[natt]]) + else _eglLog(_EGL_WARNING, "both real and fake front buffers are listed"); continue; } @@ -272,13 +283,15 @@ dri2_surface_validate(struct native_surface *nsurf, desc, xbuf->pitch, xbuf->name); if (ptex) { /* the caller owns the textures */ - textures[texture_indices[natt]] = ptex; + textures[natt] = ptex; } } } free(xbufs); + if (seq_num) + *seq_num = dri2surf->sequence_number; if (width) *width = dri2surf->width; if (height) @@ -575,6 +588,21 @@ dri2_display_get_configs(struct native_display *ndpy, int *num_configs) return configs; } +static boolean +dri2_display_is_pixmap_supported(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct dri2_display *dri2dpy = dri2_display(ndpy); + uint depth, nconf_depth; + + depth = x11_drawable_get_depth(dri2dpy->xscr, (Drawable) pix); + nconf_depth = util_format_get_blocksizebits(nconf->color_format); + + /* simple depth match for now */ + return (depth == nconf_depth || (depth == 24 && depth + 8 == nconf_depth)); +} + static void dri2_display_destroy(struct native_display *ndpy) { @@ -627,18 +655,8 @@ dri2_display_init_screen(struct native_display *ndpy) return TRUE; } -static void -dri2_display_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) -{ - /* TODO get native surface from context private, and remove the callback */ - _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied"); -} - struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, - struct drm_api *api, - native_flush_frontbuffer flush_frontbuffer) +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api) { struct dri2_display *dri2dpy; @@ -675,15 +693,9 @@ x11_create_dri2_display(EGLNativeDisplayType dpy, return NULL; } - if (!flush_frontbuffer) - flush_frontbuffer = dri2_display_flush_frontbuffer; - - dri2dpy->base.screen->flush_frontbuffer = - (void (*)(struct pipe_screen *, struct pipe_surface *, void *)) - flush_frontbuffer; - dri2dpy->base.destroy = dri2_display_destroy; dri2dpy->base.get_configs = dri2_display_get_configs; + dri2dpy->base.is_pixmap_supported = dri2_display_is_pixmap_supported; dri2dpy->base.create_context = dri2_display_create_context; dri2dpy->base.create_window_surface = dri2_display_create_window_surface; dri2dpy->base.create_pixmap_surface = dri2_display_create_pixmap_surface; diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.c b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c index a4f36e9deca..583ce3d3293 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_x11.c +++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.c @@ -48,8 +48,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - native_flush_frontbuffer flush_frontbuffer) +native_create_display(EGLNativeDisplayType dpy) { struct native_display *ndpy = NULL; boolean force_sw; @@ -59,14 +58,14 @@ native_create_display(EGLNativeDisplayType dpy, force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE); if (api && !force_sw) { - ndpy = x11_create_dri2_display(dpy, api, flush_frontbuffer); + ndpy = x11_create_dri2_display(dpy, api); } if (!ndpy) { EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; _eglLog(level, "use software fallback"); - ndpy = x11_create_ximage_display(dpy, TRUE, flush_frontbuffer); + ndpy = x11_create_ximage_display(dpy, TRUE); } return ndpy; diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_x11.h b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h index 9217eb62529..622ddac5df6 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_x11.h +++ b/src/gallium/state_trackers/egl_g3d/x11/native_x11.h @@ -29,13 +29,9 @@ #include "common/native.h" struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, - boolean use_xshm, - native_flush_frontbuffer flush_frontbuffer); +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm); struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, - struct drm_api *api, - native_flush_frontbuffer flush_frontbuffer); +x11_create_dri2_display(EGLNativeDisplayType dpy, struct drm_api *api); #endif /* _NATIVE_X11_H_ */ diff --git a/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c index e02faa9b7ba..d76107c47f1 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl_g3d/x11/native_ximage.c @@ -83,6 +83,7 @@ struct ximage_surface { GC gc; struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS]; + unsigned int sequence_number; }; struct ximage_config { @@ -260,6 +261,9 @@ ximage_surface_swap_buffers(struct native_surface *nsurf) *xfront = *xback; *xback = xtmp; + /* the front/back textures are swapped */ + xsurf->sequence_number++; + return ximage_surface_draw_buffer(nsurf, NATIVE_ATTACHMENT_FRONT_LEFT); } @@ -285,33 +289,29 @@ ximage_surface_update_geometry(struct native_surface *nsurf) } static boolean -ximage_surface_validate(struct native_surface *nsurf, - const enum native_attachment *natts, - unsigned num_natts, - struct pipe_texture **textures, - int *width, int *height) +ximage_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_texture **textures, + int *width, int *height) { struct ximage_surface *xsurf = ximage_surface(nsurf); - boolean error = FALSE; - unsigned i; + boolean new_buffers = FALSE; + int att; ximage_surface_update_geometry(&xsurf->base); - if (textures) - memset(textures, 0, sizeof(*textures) * num_natts); - - for (i = 0; i < num_natts; i++) { - enum native_attachment natt = natts[i]; - struct ximage_buffer *xbuf = &xsurf->buffers[natt]; + for (att = 0; att < NUM_NATIVE_ATTACHMENTS; att++) { + struct ximage_buffer *xbuf = &xsurf->buffers[att]; - if (!xbuf) + /* delay the allocation */ + if (!native_attachment_mask_test(attachment_mask, att)) continue; /* reallocate the texture */ if (!xbuf->texture || xsurf->width != xbuf->texture->width0 || xsurf->height != xbuf->texture->height0) { - if (ximage_surface_alloc_buffer(&xsurf->base, natt)) { + new_buffers = TRUE; + if (ximage_surface_alloc_buffer(&xsurf->base, att)) { /* update ximage */ if (xbuf->ximage) { xbuf->ximage->width = xbuf->transfer->width; @@ -321,27 +321,24 @@ ximage_surface_validate(struct native_surface *nsurf, } } - /* allocation failed */ - if (!xbuf->texture) { - unsigned j; - for (j = 0; j < i; j++) - pipe_texture_reference(&textures[j], NULL); - for (j = i; j < num_natts; j++) - textures[j] = NULL; - error = TRUE; - break; + if (textures) { + textures[att] = NULL; + pipe_texture_reference(&textures[att], xbuf->texture); } - - if (textures) - pipe_texture_reference(&textures[i], xbuf->texture); } + /* increase the sequence number so that caller knows */ + if (new_buffers) + xsurf->sequence_number++; + + if (seq_num) + *seq_num = xsurf->sequence_number; if (width) *width = xsurf->width; if (height) *height = xsurf->height; - return !error; + return TRUE; } static void @@ -603,6 +600,34 @@ ximage_display_get_configs(struct native_display *ndpy, int *num_configs) return configs; } +static boolean +ximage_display_is_pixmap_supported(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct ximage_display *xdpy = ximage_display(ndpy); + enum pipe_format fmt; + uint depth; + + depth = x11_drawable_get_depth(xdpy->xscr, (Drawable) pix); + switch (depth) { + case 32: + fmt = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case 24: + fmt = PIPE_FORMAT_X8R8G8B8_UNORM; + break; + case 16: + fmt = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + fmt = PIPE_FORMAT_NONE; + break; + } + + return (fmt == nconf->color_format); +} + static void ximage_display_destroy(struct native_display *ndpy) { @@ -620,18 +645,8 @@ ximage_display_destroy(struct native_display *ndpy) free(xdpy); } -static void -ximage_display_flush_frontbuffer(void *dummy, struct pipe_surface *surf, - void *context_private) -{ - /* TODO get native surface from context private, and remove the callback */ - _eglLog(_EGL_WARNING, "flush_frontbuffer is not supplied"); -} - struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, - boolean use_xshm, - native_flush_frontbuffer flush_frontbuffer) +x11_create_ximage_display(EGLNativeDisplayType dpy, boolean use_xshm) { struct ximage_display *xdpy; @@ -660,17 +675,12 @@ x11_create_ximage_display(EGLNativeDisplayType dpy, (use_xshm && x11_screen_support(xdpy->xscr, X11_SCREEN_EXTENSION_XSHM)); xdpy->winsys = create_sw_winsys(); - if (!flush_frontbuffer) - flush_frontbuffer = ximage_display_flush_frontbuffer; - xdpy->winsys->flush_frontbuffer = - (void (*)(struct pipe_winsys *, struct pipe_surface *, void *)) - flush_frontbuffer; - xdpy->base.screen = softpipe_create_screen(xdpy->winsys); xdpy->base.destroy = ximage_display_destroy; xdpy->base.get_configs = ximage_display_get_configs; + xdpy->base.is_pixmap_supported = ximage_display_is_pixmap_supported; xdpy->base.create_context = ximage_display_create_context; xdpy->base.create_window_surface = ximage_display_create_window_surface; xdpy->base.create_pixmap_surface = ximage_display_create_pixmap_surface; diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c index 1e98943242a..4d68a88d2e6 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.c @@ -30,6 +30,7 @@ #include <X11/extensions/XShm.h> #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_format.h" #include "xf86drm.h" #include "egllog.h" @@ -50,6 +51,10 @@ struct x11_screen { XVisualInfo *visuals; int num_visuals; + + /* cached values for x11_drawable_get_depth */ + Drawable last_drawable; + unsigned int last_depth; }; @@ -351,6 +356,37 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, } /** + * Return the depth of a drawable. + * + * Unlike other drawable functions, the drawable needs not be a DRI2 drawable. + */ +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable) +{ + unsigned int depth; + + if (drawable != xscr->last_drawable) { + Window root; + int x, y; + unsigned int w, h, border; + Status ok; + + ok = XGetGeometry(xscr->dpy, drawable, &root, + &x, &y, &w, &h, &border, &depth); + if (!ok) + depth = 0; + + xscr->last_drawable = drawable; + xscr->last_depth = depth; + } + else { + depth = xscr->last_depth; + } + + return depth; +} + +/** * Create a mode list of the given size. */ __GLcontextModes * diff --git a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h index 86e8e0501a3..bf482189053 100644 --- a/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h +++ b/src/gallium/state_trackers/egl_g3d/x11/x11_screen.h @@ -96,4 +96,7 @@ x11_drawable_get_buffers(struct x11_screen *xscr, Drawable drawable, int *width, int *height, unsigned int *attachments, boolean with_format, int num_ins, int *num_outs); +uint +x11_drawable_get_depth(struct x11_screen *xscr, Drawable drawable); + #endif /* _X11_SCREEN_H_ */ diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c index 84d47b12edc..bd4a85caa04 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c +++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c @@ -193,7 +193,7 @@ _glxapi_get_proc_address(const char *funcName) } -__GLXextFuncPtr +PUBLIC __GLXextFuncPtr glXGetProcAddressARB(const GLubyte *procName) { __GLXextFuncPtr f; diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 96b13c2258e..68797228459 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -76,7 +76,6 @@ %rename(BlendColor) pipe_blend_color; %rename(Blend) pipe_blend_state; %rename(Clip) pipe_clip_state; -%rename(ConstantBuffer) pipe_constant_buffer; %rename(Depth) pipe_depth_state; %rename(Stencil) pipe_stencil_state; %rename(Alpha) pipe_alpha_state; diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 84ce1a41e6d..ce893dad453 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -142,10 +142,7 @@ struct st_context { void set_constant_buffer(unsigned shader, unsigned index, struct pipe_buffer *buffer ) { - struct pipe_constant_buffer state; - memset(&state, 0, sizeof(state)); - state.buffer = buffer; - $self->pipe->set_constant_buffer($self->pipe, shader, index, &state); + $self->pipe->set_constant_buffer($self->pipe, shader, index, buffer); } void set_framebuffer(const struct pipe_framebuffer_state *state ) diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index a68709f5cf3..b61d47d6456 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -94,7 +94,7 @@ struct_factories = { "pipe_blend_color": gallium.BlendColor, "pipe_blend_state": gallium.Blend, #"pipe_clip_state": gallium.Clip, - #"pipe_constant_buffer": gallium.ConstantBuffer, + #"pipe_buffer": gallium.Buffer, "pipe_depth_state": gallium.Depth, "pipe_stencil_state": gallium.Stencil, "pipe_alpha_state": gallium.Alpha, @@ -462,10 +462,10 @@ class Context(Object): sys.stdout.flush() def set_constant_buffer(self, shader, index, buffer): - if buffer is not None and buffer.buffer is not None: - self.real.set_constant_buffer(shader, index, buffer.buffer) + if buffer is not None: + self.real.set_constant_buffer(shader, index, buffer) - self.dump_constant_buffer(buffer.buffer) + self.dump_constant_buffer(buffer) def set_framebuffer_state(self, state): _state = gallium.Framebuffer() diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index a3294e877a6..dfe3e465f77 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -35,225 +35,10 @@ * @author Jose Fonseca */ - -#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */ -#include "pipe/p_format.h" -#include "pipe/p_context.h" -#include "pipe/p_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "st_winsys.h" - -struct st_softpipe_buffer -{ - struct pipe_buffer base; - boolean userBuffer; /** Is this a user-space buffer? */ - void *data; - void *mapped; -}; - - -/** Cast wrapper */ -static INLINE struct st_softpipe_buffer * -st_softpipe_buffer( struct pipe_buffer *buf ) -{ - return (struct st_softpipe_buffer *)buf; -} - - -static void * -st_softpipe_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = st_softpipe_buf->data; - return st_softpipe_buf->mapped; -} - - -static void -st_softpipe_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = NULL; -} - - -static void -st_softpipe_buffer_destroy(struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); - - if (oldBuf->data) { - if (!oldBuf->userBuffer) - align_free(oldBuf->data); - - oldBuf->data = NULL; - } - - FREE(oldBuf); -} - - -static void -st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, - struct pipe_surface *surf, - void *context_private) -{ -} - - - -static const char * -st_softpipe_get_name(struct pipe_winsys *winsys) -{ - return "softpipe"; -} - - -static struct pipe_buffer * -st_softpipe_buffer_create(struct pipe_winsys *winsys, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.alignment = alignment; - buffer->base.usage = usage; - buffer->base.size = size; - - buffer->data = align_malloc(size, alignment); - - return &buffer->base; -} - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_buffer * -st_softpipe_user_buffer_create(struct pipe_winsys *winsys, - void *ptr, - unsigned bytes) -{ - struct st_softpipe_buffer *buffer; - - buffer = CALLOC_STRUCT(st_softpipe_buffer); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.size = bytes; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -static struct pipe_buffer * -st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - const unsigned alignment = 64; - unsigned nblocksy; - - nblocksy = util_format_get_nblocksy(format, height); - *stride = align(util_format_get_stride(format, width), alignment); - - return winsys->buffer_create(winsys, alignment, - usage, - *stride * nblocksy); -} - - -static void -st_softpipe_fence_reference(struct pipe_winsys *winsys, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ -} - - -static int -st_softpipe_fence_signalled(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static int -st_softpipe_fence_finish(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static void -st_softpipe_destroy(struct pipe_winsys *winsys) -{ - FREE(winsys); -} - - -static struct pipe_screen * -st_softpipe_screen_create(void) -{ - static struct pipe_winsys *winsys; - struct pipe_screen *screen; - - winsys = CALLOC_STRUCT(pipe_winsys); - if(!winsys) - return NULL; - - winsys->destroy = st_softpipe_destroy; - - winsys->buffer_create = st_softpipe_buffer_create; - winsys->user_buffer_create = st_softpipe_user_buffer_create; - winsys->buffer_map = st_softpipe_buffer_map; - winsys->buffer_unmap = st_softpipe_buffer_unmap; - winsys->buffer_destroy = st_softpipe_buffer_destroy; - - winsys->surface_buffer_create = st_softpipe_surface_buffer_create; - - winsys->fence_reference = st_softpipe_fence_reference; - winsys->fence_signalled = st_softpipe_fence_signalled; - winsys->fence_finish = st_softpipe_fence_finish; - - winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; - winsys->get_name = st_softpipe_get_name; - - screen = softpipe_create_screen(winsys); - if(!screen) - st_softpipe_destroy(winsys); - - return screen; -} - - -static struct pipe_context * -st_softpipe_context_create(struct pipe_screen *screen) -{ - return softpipe_create(screen); -} - - const struct st_winsys st_softpipe_winsys = { - &st_softpipe_screen_create, - &st_softpipe_context_create, + &softpipe_create_screen_malloc, + &softpipe_create, }; diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore new file mode 100644 index 00000000000..e33609d251c --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/.gitignore @@ -0,0 +1 @@ +*.png diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore new file mode 100644 index 00000000000..e33609d251c --- /dev/null +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/.gitignore @@ -0,0 +1 @@ +*.png diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index 2f984fb7b9a..8f69ee01099 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -147,22 +147,22 @@ static void setup_constant_buffer(struct vg_context *ctx, const void *buffer, VGint param_bytes) { struct pipe_context *pipe = ctx->pipe; - struct pipe_constant_buffer *cbuf = &ctx->filter.buffer; + struct pipe_buffer **cbuf = &ctx->filter.buffer; /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(pipe->screen, 16, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); + *cbuf = pipe_buffer_create(pipe->screen, 16, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, buffer); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); } static void setup_samplers(struct vg_context *ctx, struct filter_info *info) diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c index 4f9f3dae173..97cbe69205e 100644 --- a/src/gallium/state_trackers/vega/api_masks.c +++ b/src/gallium/state_trackers/vega/api_masks.c @@ -32,7 +32,6 @@ #include "vg_context.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" /* for winsys->update_buffer */ #include "util/u_pack_color.h" #include "util/u_draw_quad.h" @@ -116,8 +115,8 @@ clear_with_quad(struct vg_context *st, float x0, float y0, x1, y1); */ - if (st->pipe->winsys && st->pipe->winsys->update_buffer) - st->pipe->winsys->update_buffer( st->pipe->winsys, + if (st->pipe->screen && st->pipe->screen->update_buffer) + st->pipe->screen->update_buffer( st->pipe->screen, st->pipe->priv ); cso_save_blend(st->cso_context); diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index 42300bb6d57..3e260e70736 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -217,7 +217,7 @@ static void setup_mask_framebuffer(struct pipe_surface *surf, static void setup_mask_operation(VGMaskOperation operation) { struct vg_context *ctx = vg_current_context(); - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; const VGint param_bytes = 4 * sizeof(VGfloat); const VGfloat ones[4] = {1.f, 1.f, 1.f, 1.f}; void *shader = 0; @@ -225,17 +225,17 @@ static void setup_mask_operation(VGMaskOperation operation) /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + *cbuf = pipe_buffer_create(ctx->pipe->screen, 1, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, ones); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); switch (operation) { case VG_UNION_MASK: { if (!ctx->mask.union_fs) { @@ -320,22 +320,22 @@ static void setup_mask_samplers(struct pipe_texture *umask) static void setup_mask_fill(const VGfloat color[4]) { struct vg_context *ctx = vg_current_context(); - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; const VGint param_bytes = 4 * sizeof(VGfloat); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 1, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, 0, param_bytes, color); + *cbuf = pipe_buffer_create(ctx->pipe->screen, 1, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, color); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); cso_set_fragment_shader_handle(ctx->cso_context, shaders_cache_fill(ctx->sc, VEGA_SOLID_FILL_SHADER)); diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index cc73771d358..d8f6299b2db 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -77,7 +77,8 @@ struct vg_paint { struct pipe_sampler_state sampler; } pattern; - struct pipe_constant_buffer cbuf; + /* XXX next 3 all unneded? */ + struct pipe_buffer *cbuf; struct pipe_shader_state fs_state; void *fs; }; diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index b6d282d803b..d385ee567fa 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -293,6 +293,7 @@ static void draw_polygon(struct vg_context *ctx, /* tell pipe about the vertex attributes */ velement.src_offset = 0; + velement.instance_divisor = 0; velement.vertex_buffer_index = 0; velement.src_format = PIPE_FORMAT_R32G32_FLOAT; velement.nr_components = COMPONENTS; diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c index d9074a377b3..bd5ae79e551 100644 --- a/src/gallium/state_trackers/vega/shader.c +++ b/src/gallium/state_trackers/vega/shader.c @@ -51,7 +51,7 @@ struct shader { VGImageMode image_mode; float constants[MAX_CONSTANTS]; - struct pipe_constant_buffer cbuf; + struct pipe_buffer *cbuf; struct pipe_shader_state fs_state; void *fs; }; @@ -96,25 +96,25 @@ static void setup_constant_buffer(struct shader *shader) { struct vg_context *ctx = shader->context; struct pipe_context *pipe = shader->context->pipe; - struct pipe_constant_buffer *cbuf = &shader->cbuf; + struct pipe_buffer **cbuf = &shader->cbuf; VGint param_bytes = paint_constant_buffer_size(shader->paint); float temp_buf[MAX_CONSTANTS]; assert(param_bytes <= sizeof(temp_buf)); paint_fill_constant_buffer(shader->paint, temp_buf); - if (cbuf->buffer == NULL || + if (*cbuf == NULL || memcmp(temp_buf, shader->constants, param_bytes) != 0) { - pipe_buffer_reference(&cbuf->buffer, NULL); + pipe_buffer_reference(cbuf, NULL); memcpy(shader->constants, temp_buf, param_bytes); - cbuf->buffer = pipe_user_buffer_create(pipe->screen, - &shader->constants, - sizeof(shader->constants)); + *cbuf = pipe_user_buffer_create(pipe->screen, + &shader->constants, + sizeof(shader->constants)); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, *cbuf); } static VGint blend_bind_samplers(struct vg_context *ctx, diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c index 00d23f5c227..c16ac036e3b 100644 --- a/src/gallium/state_trackers/vega/vg_context.c +++ b/src/gallium/state_trackers/vega/vg_context.c @@ -122,8 +122,8 @@ struct vg_context * vg_create_context(struct pipe_context *pipe, void vg_destroy_context(struct vg_context *ctx) { - struct pipe_constant_buffer *cbuf = &ctx->mask.cbuf; - struct pipe_constant_buffer *vsbuf = &ctx->vs_const_buffer; + struct pipe_buffer **cbuf = &ctx->mask.cbuf; + struct pipe_buffer **vsbuf = &ctx->vs_const_buffer; util_destroy_blit(ctx->blit); renderer_destroy(ctx->renderer); @@ -131,11 +131,11 @@ void vg_destroy_context(struct vg_context *ctx) shader_destroy(ctx->shader); paint_destroy(ctx->default_paint); - if (cbuf && cbuf->buffer) - pipe_buffer_reference(&cbuf->buffer, NULL); + if (*cbuf) + pipe_buffer_reference(cbuf, NULL); - if (vsbuf && vsbuf->buffer) - pipe_buffer_reference(&vsbuf->buffer, NULL); + if (*vsbuf) + pipe_buffer_reference(vsbuf, NULL); if (ctx->clear.fs) { cso_delete_fragment_shader(ctx->cso_context, ctx->clear.fs); @@ -371,20 +371,20 @@ void vg_validate_state(struct vg_context *ctx) 2.f/fb->width, 2.f/fb->height, 1, 1, -1, -1, 0, 0 }; - struct pipe_constant_buffer *cbuf = &ctx->vs_const_buffer; + struct pipe_buffer **cbuf = &ctx->vs_const_buffer; vg_set_viewport(ctx, VEGA_Y0_BOTTOM); - pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(ctx->pipe->screen, 16, + pipe_buffer_reference(cbuf, NULL); + *cbuf = pipe_buffer_create(ctx->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); - if (cbuf->buffer) { - st_no_flush_pipe_buffer_write(ctx, cbuf->buffer, + if (*cbuf) { + st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, vs_consts); } - ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, cbuf); + ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, *cbuf); } if ((ctx->state.dirty & VS_DIRTY)) { cso_set_vertex_shader_handle(ctx->cso_context, diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h index ccc8889c8c5..bc88c8d139d 100644 --- a/src/gallium/state_trackers/vega/vg_context.h +++ b/src/gallium/state_trackers/vega/vg_context.h @@ -50,7 +50,7 @@ struct st_renderbuffer { }; struct st_framebuffer { - VGint init_width, init_height; + VGint width, height; struct st_renderbuffer *strb; struct st_renderbuffer *dsrb; @@ -113,7 +113,7 @@ struct vg_context } clear; struct { - struct pipe_constant_buffer cbuf; + struct pipe_buffer *cbuf; struct pipe_sampler_state sampler; struct vg_shader *union_fs; @@ -135,7 +135,7 @@ struct vg_context struct pipe_sampler_state blend_sampler; struct { - struct pipe_constant_buffer buffer; + struct pipe_buffer *buffer; void *color_matrix_fs; } filter; struct vg_paint *default_paint; @@ -145,7 +145,7 @@ struct vg_context struct vg_shader *plain_vs; struct vg_shader *clear_vs; struct vg_shader *texture_vs; - struct pipe_constant_buffer vs_const_buffer; + struct pipe_buffer *vs_const_buffer; }; struct vg_object { diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index ff80aab03a3..617c174eb6a 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -193,8 +193,8 @@ struct st_framebuffer * st_create_framebuffer(const void *visual, */ stfb->alpha_mask = 0; - stfb->init_width = width; - stfb->init_height = height; + stfb->width = width; + stfb->height = height; stfb->privateData = privateData; } @@ -282,11 +282,14 @@ void st_resize_framebuffer(struct st_framebuffer *stfb, /* If this is a noop, exit early and don't do the clear, etc below. */ - if (strb->width == width && - strb->height == height && + if (stfb->width == width && + stfb->height == height && state->zsbuf) return; + stfb->width = width; + stfb->height = height; + if (strb->width != width || strb->height != height) st_renderbuffer_alloc_storage(ctx, strb, width, height); diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index d80f341e6c2..8f729b565b1 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -379,14 +379,14 @@ struct xorg_renderer * renderer_create(struct pipe_context *pipe) void renderer_destroy(struct xorg_renderer *r) { - struct pipe_constant_buffer *vsbuf = &r->vs_const_buffer; - struct pipe_constant_buffer *fsbuf = &r->fs_const_buffer; + struct pipe_buffer **vsbuf = &r->vs_const_buffer; + struct pipe_buffer **fsbuf = &r->fs_const_buffer; - if (vsbuf && vsbuf->buffer) - pipe_buffer_reference(&vsbuf->buffer, NULL); + if (*vsbuf) + pipe_buffer_reference(vsbuf, NULL); - if (fsbuf && fsbuf->buffer) - pipe_buffer_reference(&fsbuf->buffer, NULL); + if (*fsbuf) + pipe_buffer_reference(fsbuf, NULL); if (r->shaders) { xorg_shaders_destroy(r->shaders); @@ -409,20 +409,20 @@ void renderer_set_constants(struct xorg_renderer *r, const float *params, int param_bytes) { - struct pipe_constant_buffer *cbuf = + struct pipe_buffer **cbuf = (shader_type == PIPE_SHADER_VERTEX) ? &r->vs_const_buffer : &r->fs_const_buffer; - pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(r->pipe->screen, 16, - PIPE_BUFFER_USAGE_CONSTANT, - param_bytes); + pipe_buffer_reference(cbuf, NULL); + *cbuf = pipe_buffer_create(r->pipe->screen, 16, + PIPE_BUFFER_USAGE_CONSTANT, + param_bytes); - if (cbuf->buffer) { - pipe_buffer_write(r->pipe->screen, cbuf->buffer, + if (*cbuf) { + pipe_buffer_write(r->pipe->screen, *cbuf, 0, param_bytes, params); } - r->pipe->set_constant_buffer(r->pipe, shader_type, 0, cbuf); + r->pipe->set_constant_buffer(r->pipe, shader_type, 0, *cbuf); } diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.h b/src/gallium/state_trackers/xorg/xorg_renderer.h index 5272cde2b3f..af6aa0567d6 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.h +++ b/src/gallium/state_trackers/xorg/xorg_renderer.h @@ -23,8 +23,8 @@ struct xorg_renderer { int fb_width; int fb_height; - struct pipe_constant_buffer vs_const_buffer; - struct pipe_constant_buffer fs_const_buffer; + struct pipe_buffer *vs_const_buffer; + struct pipe_buffer *fs_const_buffer; float buffer[BUF_SIZE]; int buffer_size; diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index 6b5a41a3727..5bf0e94b627 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -486,8 +486,11 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, int dxo, dyo; Bool hdtv; int x, y, w, h; - struct exa_pixmap_priv *dst = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); + struct exa_pixmap_priv *dst; + struct pipe_surface *dst_surf = NULL; + + exaMoveInPixmap(pPixmap); + dst = exaGetPixmapDriverPrivate(pPixmap); if (dst && !dst->tex) { xorg_exa_set_shared_usage(pPixmap); @@ -497,6 +500,7 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, if (!dst || !dst->tex) XORG_FALLBACK("Xv destination %s", !dst ? "!dst" : "!dst->tex"); + dst_surf = xorg_gpu_surface(pPriv->r->pipe->screen, dst); hdtv = ((src_w >= RES_720P_X) && (src_h >= RES_720P_Y)); REGION_TRANSLATE(pScrn->pScreen, dstRegion, -pPixmap->screen_x, @@ -516,7 +520,6 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id, bind_samplers(pPriv); setup_fs_video_constants(pPriv->r, hdtv); - exaMoveInPixmap(pPixmap); DamageDamageRegion(&pPixmap->drawable, dstRegion); while (nbox--) { diff --git a/src/gallium/winsys/drm/i965/xorg/Makefile b/src/gallium/winsys/drm/i965/xorg/Makefile index d91d0006efd..c25726b0bb1 100644 --- a/src/gallium/winsys/drm/i965/xorg/Makefile +++ b/src/gallium/winsys/drm/i965/xorg/Makefile @@ -1,19 +1,25 @@ -TARGET = modesetting_drv.so -CFILES = $(wildcard ./*.c) -OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) TOP = ../../../../../.. -include $(TOP)/configs/current -INCLUDES = \ - $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \ - -I../gem \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include \ - -I$(TOP)/src/egl/main +GALLIUMDIR = $(TOP)/src/gallium + +TARGET = i965g_drv.so + +CFILES = $(wildcard ./*.c) + +include ${TOP}/configs/current + +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) + +CFLAGS = -DHAVE_CONFIG_H \ + -g -Wall -Wimplicit-function-declaration -fPIC \ + $(shell pkg-config --cflags pixman-1 xorg-server libdrm xproto) \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/drivers \ + -I${GALLIUMDIR}/auxiliary \ + -I${TOP}/src/mesa \ + -I$(TOP)/include \ + -I$(TOP)/src/egl/main LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ @@ -23,20 +29,21 @@ LIBS = \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) -DRIVER_DEFINES = \ - -DHAVE_CONFIG_H - - +TARGET_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(TARGET) ############################################# +all default: $(TARGET) $(TARGET_STAGING) - -all default: $(TARGET) - -$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS) +$(TARGET): $(OBJECTS) Makefile $(GALLIUMDIR)/state_trackers/xorg/libxorgtracker.a $(LIBS) $(TOP)/bin/mklib -noprefix -o $@ \ $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel +$(TOP)/$(LIB_DIR)/gallium: + mkdir -p $@ + +$(TARGET_STAGING): $(TARGET) $(TOP)/$(LIB_DIR)/gallium + $(INSTALL) $(TARGET) $(TOP)/$(LIB_DIR)/gallium + clean: rm -rf $(OBJECTS) $(TARGET) @@ -44,14 +51,4 @@ install: $(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) $(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) - -############################################## - - -.c.o: - $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@ - - -############################################## - .PHONY = all clean install diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 385fa857b56..b020ff38fa0 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -58,6 +58,7 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, { struct radeon_winsys *radeon_ws = (struct radeon_winsys *)ws; struct radeon_pipe_buffer *radeon_buffer; + struct pb_desc desc; uint32_t domain; radeon_buffer = CALLOC_STRUCT(radeon_pipe_buffer); @@ -70,6 +71,14 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, radeon_buffer->base.usage = usage; radeon_buffer->base.size = size; + if (usage == PIPE_BUFFER_USAGE_CONSTANT && is_r3xx(radeon_ws->pci_id)) { + /* Don't bother allocating a BO, as it'll never get to the card. */ + desc.alignment = alignment; + desc.usage = usage; + radeon_buffer->pb = pb_malloc_buffer_create(size, &desc); + return &radeon_buffer->base; + } + domain = 0; if (usage & PIPE_BUFFER_USAGE_PIXEL) { @@ -133,8 +142,16 @@ static void radeon_buffer_del(struct pipe_buffer *buffer) struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; - radeon_bo_unref(radeon_buffer->bo); - free(radeon_buffer); + if (radeon_buffer->pb) { + pipe_reference_init(&radeon_buffer->pb->base.reference, 0); + pb_destroy(radeon_buffer->pb); + } + + if (radeon_buffer->bo) { + radeon_bo_unref(radeon_buffer->bo); + } + + FREE(radeon_buffer); } static void *radeon_buffer_map(struct pipe_winsys *ws, @@ -146,6 +163,10 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; + if (radeon_buffer->pb) { + return pb_map(radeon_buffer->pb, flags); + } + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { uint32_t domain; @@ -174,7 +195,11 @@ static void radeon_buffer_unmap(struct pipe_winsys *ws, struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; - radeon_bo_unmap(radeon_buffer->bo); + if (radeon_buffer->pb) { + pb_unmap(radeon_buffer->pb); + } else { + radeon_bo_unmap(radeon_buffer->bo); + } } static void radeon_fence_reference(struct pipe_winsys *ws, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index d7f17564a9f..de71cb2f42d 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -//#include "state_tracker/st_public.h" +#include "pipebuffer/pb_buffer.h" #include "util/u_memory.h" @@ -49,7 +49,10 @@ struct radeon_pipe_buffer { struct pipe_buffer base; + /* Pointer to GPU-backed BO. */ struct radeon_bo *bo; + /* Pointer to fallback PB buffer. */ + struct pb_buffer *pb; boolean flinked; uint32_t flink; }; diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 851c2236979..9552f0ad6a9 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -40,12 +40,16 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) struct drm_radeon_info info = {0}; int target = 0; int retval; + drmVersionPtr version; info.value = (unsigned long)⌖ /* We do things in a specific order here. * - * First, the PCI ID. This is essential and should return usable numbers + * DRM version first. We need to be sure we're running on a KMS chipset. + * This is also for some features. + * + * Then, the PCI ID. This is essential and should return usable numbers * for all Radeons. If this fails, we probably got handed an FD for some * non-Radeon card. * @@ -55,8 +59,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) * * The GEM info is actually bogus on the kernel side, as well as our side * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because - * we don't actually use the info for anything yet. - * XXX update the above when we can safely use vram_size instead of vram_visible */ + * we don't actually use the info for anything yet. */ + + version = drmGetVersion(fd); + if (version->version_major != 2) { + fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " + "only compatible with 2.x.x\n", __FUNCTION__, + version->version_major, version->version_minor, + version->version_patchlevel); + drmFreeVersion(version); + exit(1); + } + info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { @@ -92,16 +106,18 @@ static void do_ioctls(int fd, struct radeon_winsys* winsys) exit(1); } winsys->gart_size = gem_info.gart_size; - /* XXX */ - winsys->vram_size = gem_info.vram_visible; -} - -/* Guess at whether this chipset should use r300g. - * - * I believe that this check is valid, but I haven't been exhaustive. */ -static boolean is_r3xx(int pciid) -{ - return (pciid > 0x3150) && (pciid < 0x796f); + winsys->vram_size = gem_info.vram_size; + + debug_printf("radeon: Successfully grabbed chipset info from kernel!\n" + "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n" + "radeon: GART size: %d MB VRAM size: %d MB\n", + version->version_major, version->version_minor, + version->version_patchlevel, winsys->pci_id, + winsys->gb_pipes, winsys->z_pipes, + winsys->gart_size / 1024 / 1024, + winsys->vram_size / 1024 / 1024); + + drmFreeVersion(version); } /* Create a pipe_screen. */ @@ -136,12 +152,13 @@ struct pipe_context* radeon_create_context(struct drm_api* api, } boolean radeon_buffer_from_texture(struct drm_api* api, + struct pipe_screen* screen, struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride) { /* XXX fix this */ - return r300_get_texture_buffer(texture, buffer, stride); + return r300_get_texture_buffer(screen, texture, buffer, stride); } /* Create a buffer from a handle. */ @@ -208,7 +225,7 @@ static boolean radeon_shared_handle_from_texture(struct drm_api *api, struct radeon_pipe_buffer* radeon_buffer; struct pipe_buffer *buffer = NULL; - if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { + if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) { return FALSE; } @@ -240,7 +257,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api, unsigned *handle) { struct pipe_buffer *buffer = NULL; - if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { + if (!radeon_buffer_from_texture(api, screen, texture, &buffer, stride)) { return FALSE; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h index bf0e78138d7..ddd7983824a 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h @@ -56,6 +56,7 @@ struct pipe_context* radeon_create_context(struct drm_api* api, struct pipe_screen* screen); boolean radeon_buffer_from_texture(struct drm_api* api, + struct pipe_screen* screen, struct pipe_texture* texture, struct pipe_buffer** buffer, unsigned* stride); @@ -76,4 +77,13 @@ boolean radeon_global_handle_from_buffer(struct drm_api* api, unsigned* handle); void radeon_destroy_drm_api(struct drm_api* api); + +/* Guess at whether this chipset should use r300g. + * + * I believe that this check is valid, but I haven't been exhaustive. */ +static boolean is_r3xx(int pciid) +{ + return (pciid > 0x3150) && (pciid < 0x796f); +} + #endif diff --git a/src/gallium/winsys/drm/radeon/dri/Makefile b/src/gallium/winsys/drm/radeon/dri/Makefile index a9889444de8..eaa34180321 100644 --- a/src/gallium/winsys/drm/radeon/dri/Makefile +++ b/src/gallium/winsys/drm/radeon/dri/Makefile @@ -2,7 +2,7 @@ TOP = ../../../../../.. include $(TOP)/configs/current -LIBNAME = radeon_dri.so +LIBNAME = radeong_dri.so MINIGLX_SOURCES = diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 420dccc92c9..1d9bac3871c 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -274,7 +274,7 @@ xlib_eglTerminate(_EGLDriver *drv, _EGLDisplay *dpy) static _EGLProc -xlib_eglGetProcAddress(const char *procname) +xlib_eglGetProcAddress(_EGLDriver *drv, const char *procname) { return (_EGLProc) st_get_proc_address(procname); } |